PyPI - keras-nightly - Versions diffs - 3.12.0.dev2025100503__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl - Mend

keras-nightly 3.12.0.dev2025100503py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

keras/__init__.py +1 -0
keras/_tf_keras/keras/__init__.py +1 -0
keras/_tf_keras/keras/callbacks/__init__.py +3 -0
keras/_tf_keras/keras/distillation/__init__.py +16 -0
keras/_tf_keras/keras/distribution/__init__.py +3 -0
keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/layers/__init__.py +21 -0
keras/_tf_keras/keras/ops/__init__.py +13 -0
keras/_tf_keras/keras/ops/image/__init__.py +1 -0
keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +9 -0
keras/_tf_keras/keras/quantizers/__init__.py +13 -0
keras/callbacks/__init__.py +3 -0
keras/distillation/__init__.py +16 -0
keras/distribution/__init__.py +3 -0
keras/dtype_policies/__init__.py +3 -0
keras/layers/__init__.py +21 -0
keras/ops/__init__.py +13 -0
keras/ops/image/__init__.py +1 -0
keras/ops/linalg/__init__.py +1 -0
keras/ops/nn/__init__.py +3 -0
keras/ops/numpy/__init__.py +9 -0
keras/quantizers/__init__.py +13 -0
keras/src/applications/imagenet_utils.py +4 -1
keras/src/backend/common/backend_utils.py +30 -6
keras/src/backend/common/name_scope.py +2 -1
keras/src/backend/common/variables.py +30 -15
keras/src/backend/jax/core.py +92 -3
keras/src/backend/jax/distribution_lib.py +16 -2
keras/src/backend/jax/linalg.py +4 -0
keras/src/backend/jax/nn.py +509 -29
keras/src/backend/jax/numpy.py +59 -8
keras/src/backend/jax/trainer.py +14 -2
keras/src/backend/numpy/linalg.py +4 -0
keras/src/backend/numpy/nn.py +311 -1
keras/src/backend/numpy/numpy.py +65 -2
keras/src/backend/openvino/__init__.py +1 -0
keras/src/backend/openvino/core.py +2 -23
keras/src/backend/openvino/linalg.py +4 -0
keras/src/backend/openvino/nn.py +271 -20
keras/src/backend/openvino/numpy.py +943 -189
keras/src/backend/tensorflow/layer.py +43 -9
keras/src/backend/tensorflow/linalg.py +24 -0
keras/src/backend/tensorflow/nn.py +545 -1
keras/src/backend/tensorflow/numpy.py +250 -50
keras/src/backend/torch/core.py +3 -1
keras/src/backend/torch/linalg.py +4 -0
keras/src/backend/torch/nn.py +125 -0
keras/src/backend/torch/numpy.py +80 -2
keras/src/callbacks/__init__.py +1 -0
keras/src/callbacks/model_checkpoint.py +5 -0
keras/src/callbacks/orbax_checkpoint.py +332 -0
keras/src/callbacks/terminate_on_nan.py +54 -5
keras/src/datasets/cifar10.py +5 -0
keras/src/distillation/__init__.py +1 -0
keras/src/distillation/distillation_loss.py +390 -0
keras/src/distillation/distiller.py +598 -0
keras/src/distribution/distribution_lib.py +14 -0
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +90 -1
keras/src/export/__init__.py +2 -0
keras/src/export/export_utils.py +39 -2
keras/src/export/litert.py +248 -0
keras/src/export/openvino.py +1 -1
keras/src/export/tf2onnx_lib.py +3 -0
keras/src/layers/__init__.py +13 -0
keras/src/layers/activations/softmax.py +9 -4
keras/src/layers/attention/multi_head_attention.py +4 -1
keras/src/layers/core/dense.py +241 -111
keras/src/layers/core/einsum_dense.py +316 -131
keras/src/layers/core/embedding.py +84 -94
keras/src/layers/core/input_layer.py +1 -0
keras/src/layers/core/reversible_embedding.py +399 -0
keras/src/layers/input_spec.py +17 -17
keras/src/layers/layer.py +45 -15
keras/src/layers/merging/dot.py +4 -1
keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
keras/src/layers/preprocessing/discretization.py +6 -5
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/layers/preprocessing/index_lookup.py +19 -1
keras/src/layers/preprocessing/normalization.py +14 -1
keras/src/layers/regularization/dropout.py +43 -1
keras/src/layers/rnn/rnn.py +19 -0
keras/src/losses/loss.py +1 -1
keras/src/losses/losses.py +24 -0
keras/src/metrics/confusion_metrics.py +7 -6
keras/src/models/cloning.py +4 -0
keras/src/models/functional.py +11 -3
keras/src/models/model.py +172 -34
keras/src/ops/image.py +257 -20
keras/src/ops/linalg.py +93 -0
keras/src/ops/nn.py +258 -0
keras/src/ops/numpy.py +569 -36
keras/src/optimizers/muon.py +65 -31
keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
keras/src/quantizers/__init__.py +14 -1
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +2 -8
keras/src/quantizers/gptq_config.py +36 -1
keras/src/quantizers/gptq_core.py +65 -79
keras/src/quantizers/quantization_config.py +246 -0
keras/src/quantizers/quantizers.py +127 -61
keras/src/quantizers/utils.py +23 -0
keras/src/random/seed_generator.py +6 -4
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +26 -0
keras/src/saving/saving_api.py +37 -14
keras/src/saving/saving_lib.py +1 -1
keras/src/testing/__init__.py +1 -0
keras/src/testing/test_case.py +45 -5
keras/src/utils/backend_utils.py +31 -4
keras/src/utils/dataset_utils.py +234 -35
keras/src/utils/file_utils.py +49 -11
keras/src/utils/image_utils.py +14 -2
keras/src/utils/jax_layer.py +244 -55
keras/src/utils/module_utils.py +29 -0
keras/src/utils/progbar.py +10 -2
keras/src/utils/rng_utils.py +9 -1
keras/src/utils/tracking.py +5 -5
keras/src/version.py +1 -1
{keras_nightly-3.12.0.dev2025100503.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
{keras_nightly-3.12.0.dev2025100503.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +136 -115
{keras_nightly-3.12.0.dev2025100503.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
{keras_nightly-3.12.0.dev2025100503.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0

keras/src/layers/core/reversible_embedding.py ADDED Viewed

@@ -0,0 +1,399 @@
+import copy
+from keras.src import dtype_policies
+from keras.src import layers
+from keras.src import ops
+from keras.src import quantizers
+from keras.src.api_export import keras_export
+from keras.src.backend import KerasTensor
+from keras.src.backend import set_keras_mask
+from keras.src.quantizers.quantization_config import QuantizationConfig
+@keras_export("keras.layers.ReversibleEmbedding")
+class ReversibleEmbedding(layers.Embedding):
+    """An embedding layer which can project backwards to the input dim.
+    This layer is an extension of `keras.layers.Embedding` for language models.
+    This layer can be called "in reverse" with `reverse=True`, in which case the
+    layer will linearly project from `output_dim` back to `input_dim`.
+    By default, the reverse projection will use the transpose of the
+    `embeddings` weights to project to `input_dim` (weights are "tied"). If
+    `tie_weights=False`, the model will use a separate, trainable variable for
+    reverse projection.
+    This layer has no bias terms.
+    Args:
+        input_dim: Integer. Size of the vocabulary,
+            i.e. maximum integer index + 1.
+        output_dim: Integer. Dimension of the dense embedding.
+        tie_weights: Boolean, whether or not the matrix for embedding and
+            the matrix for the `reverse` projection should share the same
+            weights.
+        embeddings_initializer: Initializer for the `embeddings`
+            matrix (see `keras.initializers`).
+        embeddings_regularizer: Regularizer function applied to
+            the `embeddings` matrix (see `keras.regularizers`).
+        embeddings_constraint: Constraint function applied to
+            the `embeddings` matrix (see `keras.constraints`).
+        mask_zero: Boolean, whether or not the input value 0 is a special
+            "padding" value that should be masked out.
+        reverse_dtype: The dtype for the reverse projection computation.
+            Defaults to the `compute_dtype` of the layer.
+        logit_soft_cap: If `logit_soft_cap` is set and `reverse=True`, the
+            output logits will be scaled by
+            `tanh(logits / logit_soft_cap) * logit_soft_cap`. This narrows the
+            range of output logits and can improve training.
+        **kwargs: other keyword arguments passed to `keras.layers.Embedding`,
+            including `name`, `trainable`, `dtype` etc.
+    Call arguments:
+        inputs: The tensor inputs to the layer.
+        reverse: Boolean. If `True` the layer will perform a linear projection
+            from `output_dim` to `input_dim`, instead of a normal embedding
+            call. Default to `False`.
+    Example:
+    ```python
+    batch_size = 16
+    vocab_size = 100
+    hidden_dim = 32
+    seq_length = 50
+    # Generate random inputs.
+    token_ids = np.random.randint(vocab_size, size=(batch_size, seq_length))
+    embedding = keras.layers.ReversibleEmbedding(vocab_size, hidden_dim)
+    # Embed tokens to shape `(batch_size, seq_length, hidden_dim)`.
+    hidden_states = embedding(token_ids)
+    # Project hidden states to shape `(batch_size, seq_length, vocab_size)`.
+    logits = embedding(hidden_states, reverse=True)
+    ```
+    References:
+    - [Vaswani et al., 2017](https://arxiv.org/abs/1706.03762)
+    - [Press and Wolf, 2016](https://arxiv.org/abs/1608.05859)
+    """
+    def __init__(
+        self,
+        input_dim,
+        output_dim,
+        tie_weights=True,
+        embeddings_initializer="uniform",
+        embeddings_regularizer=None,
+        embeddings_constraint=None,
+        mask_zero=False,
+        reverse_dtype=None,
+        logit_soft_cap=None,
+        **kwargs,
+    ):
+        super().__init__(
+            input_dim,
+            output_dim,
+            embeddings_initializer=embeddings_initializer,
+            embeddings_regularizer=embeddings_regularizer,
+            embeddings_constraint=embeddings_constraint,
+            mask_zero=mask_zero,
+            **kwargs,
+        )
+        self.tie_weights = tie_weights
+        self.reverse_dtype = reverse_dtype
+        self.logit_soft_cap = logit_soft_cap
+    def build(self, inputs_shape=None):
+        super().build(inputs_shape)
+        if not self.tie_weights and self.quantization_mode not in (
+            "int8",
+            "int4",
+        ):
+            self.reverse_embeddings = self.add_weight(
+                shape=(self.output_dim, self.input_dim),
+                initializer=self.embeddings_initializer,
+                name="reverse_embeddings",
+                trainable=True,
+            )
+    def call(self, inputs, reverse=False):
+        if not reverse:
+            result = super().call(inputs)
+            mask = super().compute_mask(inputs)
+            if mask is not None:
+                set_keras_mask(result, mask)
+            return result
+        else:
+            if self.tie_weights:
+                kernel = ops.transpose(ops.convert_to_tensor(self.embeddings))
+            else:
+                kernel = self.reverse_embeddings
+            if self.reverse_dtype is not None:
+                inputs = ops.cast(inputs, self.reverse_dtype)
+                kernel = ops.cast(kernel, self.reverse_dtype)
+            logits = ops.matmul(inputs, kernel)
+            # Optionally soft-cap logits.
+            if self.logit_soft_cap is not None:
+                soft_cap = self.logit_soft_cap
+                logits = ops.multiply(
+                    ops.tanh(ops.divide(logits, soft_cap)), soft_cap
+                )
+            return logits
+    def compute_mask(self, inputs, mask=None):
+        # Disable masking from super class, masking is done directly in call.
+        return None
+    def compute_output_shape(self, input_shape, reverse=False):
+        output_shape = list(input_shape)
+        if reverse:
+            output_shape[-1] = self.input_dim
+        else:
+            output_shape += [self.output_dim]
+        return output_shape
+    def compute_output_spec(self, inputs, reverse=False):
+        output_shape = list(inputs.shape)
+        if reverse:
+            output_shape[-1] = self.input_dim
+        else:
+            output_shape += [self.output_dim]
+        return KerasTensor(output_shape, dtype=self.compute_dtype)
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                "tie_weights": self.tie_weights,
+                "reverse_dtype": self.reverse_dtype,
+                "logit_soft_cap": self.logit_soft_cap,
+            }
+        )
+        return config
+    @property
+    def variable_serialization_spec(self):
+        # Avoid modifying the parent's spec.
+        _spec = copy.deepcopy(super().variable_serialization_spec)
+        if not self.tie_weights:
+            for mode, variable_spec in _spec.items():
+                variable_spec.append("reverse_embeddings")
+                if mode in ("int4", "int8"):
+                    variable_spec.append("reverse_embeddings_scale")
+        return _spec
+    def quantized_build(self, embeddings_shape, mode, config=None):
+        if mode == "int8":
+            self._int8_build(embeddings_shape, config)
+        elif mode == "int4":
+            self._int4_build(embeddings_shape, config)
+        else:
+            raise self._quantization_mode_error(mode)
+        self._is_quantized = True
+    def _int8_build(self, embeddings_shape, config=None):
+        if embeddings_shape is None:
+            embeddings_shape = (self.input_dim, self.output_dim)
+        super()._int8_build(embeddings_shape=embeddings_shape)
+        self.inputs_quantizer = (
+            QuantizationConfig.activation_quantizer_or_default(
+                config, quantizers.AbsMaxQuantizer(axis=-1)
+            )
+        )
+        if not self.tie_weights:
+            self.reverse_embeddings = self.add_weight(
+                name="reverse_embeddings",
+                shape=(self.output_dim, self.input_dim),
+                initializer="zeros",
+                dtype="int8",
+                trainable=False,
+            )
+            self.reverse_embeddings_scale = self.add_weight(
+                name="reverse_embeddings_scale",
+                shape=(self.input_dim,),
+                initializer="ones",
+                trainable=False,
+            )
+    def _int4_build(self, embeddings_shape, config=None):
+        if embeddings_shape is None:
+            embeddings_shape = (self.input_dim, self.output_dim)
+        super()._int4_build(embeddings_shape=embeddings_shape, config=config)
+        self.inputs_quantizer = (
+            QuantizationConfig.activation_quantizer_or_default(
+                config, quantizers.AbsMaxQuantizer(axis=-1)
+            )
+        )
+        if not self.tie_weights:
+            packed_rows = (self.output_dim + 1) // 2  # ceil for odd dims
+            self.reverse_embeddings = self.add_weight(
+                name="reverse_embeddings",
+                shape=(packed_rows, self.input_dim),
+                initializer="zeros",
+                dtype="int8",
+                trainable=False,
+            )
+            self.reverse_embeddings_scale = self.add_weight(
+                name="reverse_embeddings_scale",
+                shape=(self.input_dim,),
+                initializer="ones",
+                trainable=False,
+            )
+    def _int8_call(self, inputs, reverse=False):
+        if not reverse:
+            return super()._int8_call(inputs)
+        else:
+            if self.tie_weights:
+                kernel = ops.transpose(self._embeddings)
+                scale = ops.transpose(self.embeddings_scale)
+            else:
+                kernel = self.reverse_embeddings
+                scale = self.reverse_embeddings_scale
+            if self.inputs_quantizer:
+                inputs, inputs_scale = self.inputs_quantizer(inputs)
+            else:
+                inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
+            logits = ops.matmul(inputs, kernel)
+            # De-scale outputs
+            logits = ops.cast(logits, self.compute_dtype)
+            logits = ops.divide(logits, ops.multiply(inputs_scale, scale))
+            # Optionally soft-cap logits.
+            if self.logit_soft_cap is not None:
+                soft_cap = self.logit_soft_cap
+                logits = ops.multiply(
+                    ops.tanh(ops.divide(logits, soft_cap)), soft_cap
+                )
+            return logits
+    def _int4_call(self, inputs, reverse=False):
+        if not reverse:
+            return super()._int4_call(inputs)
+        else:
+            if self.tie_weights:
+                embeddings = ops.transpose(self._embeddings)
+                scale = ops.transpose(self.embeddings_scale)
+            else:
+                embeddings = self.reverse_embeddings
+                scale = self.reverse_embeddings_scale
+            unpacked_embeddings = quantizers.unpack_int4(
+                embeddings, self.output_dim, axis=0
+            )
+            if self.inputs_quantizer:
+                inputs, inputs_scale = self.inputs_quantizer(inputs)
+            else:
+                inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
+            logits = ops.matmul(inputs, unpacked_embeddings)
+            # De-scale outputs
+            logits = ops.cast(logits, self.compute_dtype)
+            logits = ops.divide(logits, ops.multiply(inputs_scale, scale))
+            # Optionally soft-cap logits.
+            if self.logit_soft_cap is not None:
+                soft_cap = self.logit_soft_cap
+                logits = ops.multiply(
+                    ops.tanh(ops.divide(logits, soft_cap)), soft_cap
+                )
+            return logits
+    def quantize(self, mode=None, type_check=True, config=None):
+        if type_check and type(self) is not ReversibleEmbedding:
+            raise self._not_implemented_error(self.quantize)
+        self.quantization_config = config
+        embeddings_shape = (self.input_dim, self.output_dim)
+        if mode == "int8":
+            # Quantize `self._embeddings` to int8 and compute corresponding
+            # scale.
+            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
+                self.quantization_config, quantizers.AbsMaxQuantizer(axis=-1)
+            )
+            embeddings_value, embeddings_scale = weight_quantizer(
+                self._embeddings, to_numpy=True
+            )
+            embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
+            del self._embeddings
+            if not self.tie_weights:
+                reverse_weight_quantizer = (
+                    QuantizationConfig.weight_quantizer_or_default(
+                        self.quantization_config,
+                        quantizers.AbsMaxQuantizer(axis=0),
+                    )
+                )
+                reverse_embeddings_value, reverse_embeddings_scale = (
+                    reverse_weight_quantizer(
+                        self.reverse_embeddings, to_numpy=True
+                    )
+                )
+                reverse_embeddings_scale = ops.squeeze(
+                    reverse_embeddings_scale, axis=0
+                )
+                del self.reverse_embeddings
+            self.quantized_build(
+                embeddings_shape, mode, self.quantization_config
+            )
+            self._embeddings.assign(embeddings_value)
+            self.embeddings_scale.assign(embeddings_scale)
+            if not self.tie_weights:
+                self.reverse_embeddings.assign(reverse_embeddings_value)
+                self.reverse_embeddings_scale.assign(reverse_embeddings_scale)
+        elif mode == "int4":
+            # Quantize to int4 values (stored in int8 dtype, range [-8, 7]).
+            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
+                self.quantization_config,
+                quantizers.AbsMaxQuantizer(
+                    axis=-1,
+                    value_range=(-8, 7),
+                    output_dtype="int8",
+                ),
+            )
+            embeddings_value, embeddings_scale = weight_quantizer(
+                self._embeddings, to_numpy=True
+            )
+            embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
+            # 2. Pack two int4 values into a single int8 byte.
+            packed_embeddings_value, _, _ = quantizers.pack_int4(
+                embeddings_value, axis=-1
+            )
+            del self._embeddings
+            if not self.tie_weights:
+                reverse_weight_quantizer = (
+                    QuantizationConfig.weight_quantizer_or_default(
+                        self.quantization_config,
+                        quantizers.AbsMaxQuantizer(
+                            axis=0,
+                            value_range=(-8, 7),
+                            output_dtype="int8",
+                        ),
+                    )
+                )
+                reverse_embeddings_value, reverse_embeddings_scale = (
+                    reverse_weight_quantizer(
+                        self.reverse_embeddings, to_numpy=True
+                    )
+                )
+                reverse_embeddings_scale = ops.squeeze(
+                    reverse_embeddings_scale, axis=0
+                )
+                # Pack two int4 values into a single int8 byte.
+                packed_reverse_embeddings_value, _, _ = quantizers.pack_int4(
+                    reverse_embeddings_value, axis=0
+                )
+                del self.reverse_embeddings
+            self.quantized_build(
+                embeddings_shape, mode, self.quantization_config
+            )
+            self._embeddings.assign(packed_embeddings_value)
+            self.embeddings_scale.assign(embeddings_scale)
+            if not self.tie_weights:
+                self.reverse_embeddings.assign(packed_reverse_embeddings_value)
+                self.reverse_embeddings_scale.assign(reverse_embeddings_scale)
+        else:
+            raise self._quantization_mode_error(mode)
+        # Set new dtype policy.
+        if self.dtype_policy.quantization_mode is None:
+            policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}")
+            self.dtype_policy = policy

keras/src/layers/input_spec.py CHANGED Viewed

@@ -111,6 +111,7 @@ class InputSpec:
             "max_ndim": self.max_ndim,
             "min_ndim": self.min_ndim,
             "axes": self.axes,
+            "optional": self.optional,
         }
     @classmethod
@@ -184,24 +185,24 @@ def assert_input_compatibility(input_spec, inputs, layer_name):
         if spec.ndim is not None and not spec.allow_last_axis_squeeze:
             if ndim != spec.ndim:
                 raise ValueError(
-                    f'Input {input_index} of layer "{layer_name}" '
-                    "is incompatible with the layer: "
+                    f"Input {input_index} with name '{spec.name}' of layer "
+                    f"'{layer_name}' is incompatible with the layer: "
                     f"expected ndim={spec.ndim}, found ndim={ndim}. "
                     f"Full shape received: {shape}"
                 )
         if spec.max_ndim is not None:
             if ndim is not None and ndim > spec.max_ndim:
                 raise ValueError(
-                    f'Input {input_index} of layer "{layer_name}" '
-                    "is incompatible with the layer: "
+                    f"Input {input_index} with name '{spec.name}' of layer "
+                    f"'{layer_name}' is incompatible with the layer: "
                     f"expected max_ndim={spec.max_ndim}, "
                     f"found ndim={ndim}"
                 )
         if spec.min_ndim is not None:
             if ndim is not None and ndim < spec.min_ndim:
                 raise ValueError(
-                    f'Input {input_index} of layer "{layer_name}" '
-                    "is incompatible with the layer: "
+                    f"Input {input_index} with name '{spec.name}' of layer "
+                    f"'{layer_name}' is incompatible with the layer: "
                     f"expected min_ndim={spec.min_ndim}, "
                     f"found ndim={ndim}. "
                     f"Full shape received: {shape}"
@@ -211,8 +212,8 @@ def assert_input_compatibility(input_spec, inputs, layer_name):
             dtype = backend.standardize_dtype(x.dtype)
             if dtype != spec.dtype:
                 raise ValueError(
-                    f'Input {input_index} of layer "{layer_name}" '
-                    "is incompatible with the layer: "
+                    f"Input {input_index} with name '{spec.name}' of layer "
+                    f"'{layer_name}' is incompatible with the layer: "
                     f"expected dtype={spec.dtype}, "
                     f"found dtype={dtype}"
                 )
@@ -225,11 +226,10 @@ def assert_input_compatibility(input_spec, inputs, layer_name):
                     None,
                 }:
                     raise ValueError(
-                        f'Input {input_index} of layer "{layer_name}" is '
-                        f"incompatible with the layer: expected axis {axis} "
-                        f"of input shape to have value {value}, "
-                        "but received input with "
-                        f"shape {shape}"
+                        f"Input {input_index} with name '{spec.name}' of layer "
+                        f"'{layer_name}' is incompatible with the layer: "
+                        f"expected axis {axis} of input shape to have value "
+                        f"{value}, but received input with shape {shape}"
                     )
         # Check shape.
         if spec.shape is not None:
@@ -243,8 +243,8 @@ def assert_input_compatibility(input_spec, inputs, layer_name):
                 if spec_dim is not None and dim is not None:
                     if spec_dim != dim:
                         raise ValueError(
-                            f'Input {input_index} of layer "{layer_name}" is '
-                            "incompatible with the layer: "
-                            f"expected shape={spec.shape}, "
-                            f"found shape={shape}"
+                            f"Input {input_index} with name '{spec.name}' of "
+                            f"layer '{layer_name}' is incompatible with the "
+                            f"layer: expected shape={spec.shape}, found "
+                            f"shape={shape}"
                         )

keras/src/layers/layer.py CHANGED Viewed

@@ -45,6 +45,7 @@ from keras.src.layers import input_spec
 from keras.src.metrics.metric import Metric
 from keras.src.ops.node import Node
 from keras.src.ops.operation import Operation
+from keras.src.quantizers.quantization_config import validate_and_resolve_config
 from keras.src.utils import python_utils
 from keras.src.utils import summary_utils
 from keras.src.utils import traceback_utils
@@ -244,11 +245,13 @@ class Layer(BackendLayer, Operation):
         original_quantize_method = obj.quantize
         @wraps(original_quantize_method)
-        def quantize_wrapper(mode, **kwargs):
+        def quantize_wrapper(mode=None, config=None, **kwargs):
+            config = validate_and_resolve_config(mode, config)
+            mode = config.mode
             obj._check_quantize_args(mode, obj.compute_dtype)
             obj._tracker.unlock()
             try:
-                original_quantize_method(mode, **kwargs)
+                original_quantize_method(mode=mode, config=config, **kwargs)
             except Exception:
                 raise
             finally:
@@ -757,6 +760,15 @@ class Layer(BackendLayer, Operation):
             self._dtype_policy = policy
         if policy.quantization_mode is not None:
             if self.built and not getattr(self, "_is_quantized", False):
+                if policy.quantization_mode == "gptq":
+                    raise ValueError(
+                        "Implicitly enabling GPTQ quantization by setting "
+                        f"`dtype_policy` to '{value}' is not supported. "
+                        "GPTQ requires a calibration dataset and a "
+                        "`GPTQConfig` object.\n\n"
+                        "Please use the `.quantize('gptq', config=...)` method "
+                        "on the layer or model instead."
+                    )
                 self.quantize(policy.quantization_mode)
     @property
@@ -824,9 +836,14 @@ class Layer(BackendLayer, Operation):
         #############################################################
         # 1. Convert any array arguments to tensors of correct dtype.
         def maybe_convert(x):
-            return self.dtype_policy.convert_input(
+            # Prevent _keras_mask from disappearing
+            mask = backend.get_keras_mask(x)
+            y = self.dtype_policy.convert_input(
                 x, self.autocast, self.input_dtype
             )
+            if mask is not None:
+                backend.set_keras_mask(y, mask)
+            return y
         # Used to avoid expensive `tree` operations in the most common case.
         if (
@@ -1268,7 +1285,7 @@ class Layer(BackendLayer, Operation):
     def quantized_build(self, input_shape, mode):
         raise self._not_implemented_error(self.quantized_build)
-    def quantize(self, mode, type_check=True, config=None):
+    def quantize(self, mode=None, type_check=True, config=None):
         raise self._not_implemented_error(self.quantize)
     def _check_quantize_args(self, mode, compute_dtype):
@@ -1320,6 +1337,8 @@ class Layer(BackendLayer, Operation):
             return self._int4_call(*args, **kwargs)
         elif self.quantization_mode == "gptq":
             return self._gptq_call(*args, **kwargs)
+        elif self.quantization_mode == "awq":
+            return self._awq_call(*args, **kwargs)
         else:
             raise self._quantization_mode_error(self.quantization_mode)
@@ -1335,6 +1354,9 @@ class Layer(BackendLayer, Operation):
     def _gptq_call(self, *args, **kwargs):
         raise self._not_implemented_error(self._gptq_call)
+    def _awq_call(self, *args, **kwargs):
+        raise self._not_implemented_error(self._awq_call)
     def _not_implemented_error(self, attr, msg=None):
         if callable(attr):
             attr_name = attr.__name__
@@ -1368,15 +1390,7 @@ class Layer(BackendLayer, Operation):
         for i, v in enumerate(all_vars):
             store[f"{i}"] = v
-    def load_own_variables(self, store):
-        """Loads the state of the layer.
-        You can override this method to take full control of how the state of
-        the layer is loaded upon calling `keras.models.load_model()`.
-        Args:
-            store: Dict from which the state of the model will be loaded.
-        """
+    def _check_load_own_variables(self, store):
         all_vars = self._trainable_variables + self._non_trainable_variables
         if len(store.keys()) != len(all_vars):
             if len(all_vars) == 0 and not self.built:
@@ -1409,6 +1423,18 @@ class Layer(BackendLayer, Operation):
                 f"{len(store.keys())} variables during loading. "
                 f"Expected: {[v.name for v in all_vars]}"
             )
+    def load_own_variables(self, store):
+        """Loads the state of the layer.
+        You can override this method to take full control of how the state of
+        the layer is loaded upon calling `keras.models.load_model()`.
+        Args:
+            store: Dict from which the state of the model will be loaded.
+        """
+        self._check_load_own_variables(store)
+        all_vars = self._trainable_variables + self._non_trainable_variables
         for i, v in enumerate(all_vars):
             v.assign(store[f"{i}"])
@@ -1889,6 +1915,10 @@ def get_shapes_dict(call_spec):
     {"input_a_shape": (2, 3)}
     ```
     """
+    def standardize_shape_or_none(x):
+        return None if x is None else backend.standardize_shape(x.shape)
     shapes_dict = {}
     for k, v in call_spec.tensor_arguments_dict.items():
         if k == "mask" or k.endswith("_mask"):
@@ -1899,10 +1929,10 @@ def get_shapes_dict(call_spec):
             continue
         if k in call_spec.nested_tensor_argument_names:
             shapes_dict[f"{k}_shape"] = tree.map_structure(
-                lambda x: backend.standardize_shape(x.shape), v
+                standardize_shape_or_none, v
             )
         else:
-            shapes_dict[f"{k}_shape"] = backend.standardize_shape(v.shape)
+            shapes_dict[f"{k}_shape"] = standardize_shape_or_none(v)
     return shapes_dict

keras/src/layers/merging/dot.py CHANGED Viewed

@@ -41,6 +41,7 @@ def batch_dot(x, y, axes=None):
         axes: Tuple or list of integers with target dimensions, or single
             integer. The sizes of `x.shape[axes[0]]` and `y.shape[axes[1]]`
             should be equal.
+            Note that axis `0` (the batch axis) cannot be included.
     Returns:
         A tensor with shape equal to the concatenation of `x`'s shape
@@ -226,7 +227,8 @@ class Dot(Merge):
             take the dot product. If a tuple, should be two integers
             corresponding to the desired axis from the first input and the
             desired axis from the second input, respectively. Note that the
-            size of the two selected axes must match.
+            size of the two selected axes must match, and that
+            axis `0` (the batch axis) cannot be included.
         normalize: Whether to L2-normalize samples along the dot product axis
             before taking the dot product. If set to `True`, then
             the output of the dot product is the cosine proximity
@@ -363,6 +365,7 @@ def dot(inputs, axes=-1, **kwargs):
         inputs: A list of input tensors (at least 2).
         axes: Integer or tuple of integers,
             axis or axes along which to take the dot product.
+            Note that axis `0` (the batch axis) cannot be included.
         normalize: Whether to L2-normalize samples along the
             dot product axis before taking the dot product.
             If set to `True`, then the output of the dot product

keras-nightly 3.12.0.dev2025100503__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl

keras-nightly 3.12.0.dev2025100503py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl