PyPI - keras-nightly - Versions diffs - 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl - Mend

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

keras/__init__.py +1 -0
keras/_tf_keras/keras/__init__.py +1 -0
keras/_tf_keras/keras/callbacks/__init__.py +3 -0
keras/_tf_keras/keras/distillation/__init__.py +16 -0
keras/_tf_keras/keras/distribution/__init__.py +3 -0
keras/_tf_keras/keras/dtype_policies/__init__.py +6 -0
keras/_tf_keras/keras/layers/__init__.py +21 -0
keras/_tf_keras/keras/ops/__init__.py +16 -0
keras/_tf_keras/keras/ops/image/__init__.py +1 -0
keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +12 -0
keras/_tf_keras/keras/quantizers/__init__.py +13 -0
keras/callbacks/__init__.py +3 -0
keras/distillation/__init__.py +16 -0
keras/distribution/__init__.py +3 -0
keras/dtype_policies/__init__.py +6 -0
keras/layers/__init__.py +21 -0
keras/ops/__init__.py +16 -0
keras/ops/image/__init__.py +1 -0
keras/ops/linalg/__init__.py +1 -0
keras/ops/nn/__init__.py +3 -0
keras/ops/numpy/__init__.py +12 -0
keras/quantizers/__init__.py +13 -0
keras/src/applications/imagenet_utils.py +4 -1
keras/src/backend/common/backend_utils.py +30 -6
keras/src/backend/common/dtypes.py +6 -12
keras/src/backend/common/name_scope.py +2 -1
keras/src/backend/common/variables.py +38 -20
keras/src/backend/jax/core.py +126 -78
keras/src/backend/jax/distribution_lib.py +16 -2
keras/src/backend/jax/layer.py +3 -1
keras/src/backend/jax/linalg.py +4 -0
keras/src/backend/jax/nn.py +511 -29
keras/src/backend/jax/numpy.py +109 -23
keras/src/backend/jax/optimizer.py +3 -2
keras/src/backend/jax/trainer.py +18 -3
keras/src/backend/numpy/linalg.py +4 -0
keras/src/backend/numpy/nn.py +313 -2
keras/src/backend/numpy/numpy.py +97 -8
keras/src/backend/openvino/__init__.py +1 -0
keras/src/backend/openvino/core.py +6 -23
keras/src/backend/openvino/linalg.py +4 -0
keras/src/backend/openvino/nn.py +271 -20
keras/src/backend/openvino/numpy.py +1369 -195
keras/src/backend/openvino/random.py +7 -14
keras/src/backend/tensorflow/layer.py +43 -9
keras/src/backend/tensorflow/linalg.py +24 -0
keras/src/backend/tensorflow/nn.py +545 -1
keras/src/backend/tensorflow/numpy.py +351 -56
keras/src/backend/tensorflow/trainer.py +6 -2
keras/src/backend/torch/core.py +3 -1
keras/src/backend/torch/linalg.py +4 -0
keras/src/backend/torch/nn.py +125 -0
keras/src/backend/torch/numpy.py +109 -9
keras/src/backend/torch/trainer.py +8 -2
keras/src/callbacks/__init__.py +1 -0
keras/src/callbacks/callback_list.py +45 -11
keras/src/callbacks/model_checkpoint.py +5 -0
keras/src/callbacks/orbax_checkpoint.py +332 -0
keras/src/callbacks/terminate_on_nan.py +54 -5
keras/src/datasets/cifar10.py +5 -0
keras/src/distillation/__init__.py +1 -0
keras/src/distillation/distillation_loss.py +390 -0
keras/src/distillation/distiller.py +598 -0
keras/src/distribution/distribution_lib.py +14 -0
keras/src/dtype_policies/__init__.py +4 -0
keras/src/dtype_policies/dtype_policy.py +180 -1
keras/src/export/__init__.py +2 -0
keras/src/export/export_utils.py +39 -2
keras/src/export/litert.py +248 -0
keras/src/export/onnx.py +6 -0
keras/src/export/openvino.py +1 -1
keras/src/export/tf2onnx_lib.py +3 -0
keras/src/layers/__init__.py +13 -0
keras/src/layers/activations/softmax.py +9 -4
keras/src/layers/attention/attention.py +1 -1
keras/src/layers/attention/multi_head_attention.py +4 -1
keras/src/layers/core/dense.py +406 -102
keras/src/layers/core/einsum_dense.py +521 -116
keras/src/layers/core/embedding.py +257 -99
keras/src/layers/core/input_layer.py +1 -0
keras/src/layers/core/reversible_embedding.py +399 -0
keras/src/layers/input_spec.py +17 -17
keras/src/layers/layer.py +50 -15
keras/src/layers/merging/concatenate.py +6 -5
keras/src/layers/merging/dot.py +4 -1
keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
keras/src/layers/preprocessing/discretization.py +6 -5
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py +5 -5
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/layers/preprocessing/index_lookup.py +19 -1
keras/src/layers/preprocessing/normalization.py +16 -1
keras/src/layers/preprocessing/string_lookup.py +26 -28
keras/src/layers/regularization/dropout.py +43 -1
keras/src/layers/rnn/gru.py +1 -1
keras/src/layers/rnn/lstm.py +2 -2
keras/src/layers/rnn/rnn.py +19 -0
keras/src/layers/rnn/simple_rnn.py +1 -1
keras/src/legacy/preprocessing/image.py +4 -1
keras/src/legacy/preprocessing/sequence.py +20 -12
keras/src/losses/loss.py +1 -1
keras/src/losses/losses.py +24 -0
keras/src/metrics/confusion_metrics.py +7 -6
keras/src/models/cloning.py +4 -0
keras/src/models/functional.py +11 -3
keras/src/models/model.py +195 -44
keras/src/ops/image.py +257 -20
keras/src/ops/linalg.py +93 -0
keras/src/ops/nn.py +268 -2
keras/src/ops/numpy.py +701 -44
keras/src/ops/operation.py +90 -29
keras/src/ops/operation_utils.py +2 -0
keras/src/optimizers/adafactor.py +29 -10
keras/src/optimizers/base_optimizer.py +22 -3
keras/src/optimizers/loss_scale_optimizer.py +51 -18
keras/src/optimizers/muon.py +65 -31
keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
keras/src/quantizers/__init__.py +14 -1
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +346 -207
keras/src/quantizers/gptq_config.py +63 -13
keras/src/quantizers/gptq_core.py +328 -215
keras/src/quantizers/quantization_config.py +246 -0
keras/src/quantizers/quantizers.py +407 -38
keras/src/quantizers/utils.py +23 -0
keras/src/random/seed_generator.py +6 -4
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +26 -0
keras/src/saving/saving_api.py +37 -14
keras/src/saving/saving_lib.py +1 -1
keras/src/testing/__init__.py +1 -0
keras/src/testing/test_case.py +45 -5
keras/src/trainers/compile_utils.py +38 -17
keras/src/trainers/data_adapters/grain_dataset_adapter.py +1 -5
keras/src/tree/torchtree_impl.py +215 -0
keras/src/tree/tree_api.py +6 -1
keras/src/utils/backend_utils.py +31 -4
keras/src/utils/dataset_utils.py +234 -35
keras/src/utils/file_utils.py +49 -11
keras/src/utils/image_utils.py +14 -2
keras/src/utils/jax_layer.py +244 -55
keras/src/utils/module_utils.py +29 -0
keras/src/utils/progbar.py +10 -12
keras/src/utils/python_utils.py +5 -0
keras/src/utils/rng_utils.py +9 -1
keras/src/utils/tracking.py +70 -5
keras/src/version.py +1 -1
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +163 -142
keras/src/quantizers/gptq_quant.py +0 -133
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0

keras/src/layers/core/embedding.py CHANGED Viewed

@@ -10,6 +10,8 @@ from keras.src import regularizers
 from keras.src.api_export import keras_export
 from keras.src.backend import KerasTensor
 from keras.src.layers.layer import Layer
+from keras.src.quantizers.quantization_config import QuantizationConfig
+from keras.src.saving import serialization_lib
 @keras_export("keras.layers.Embedding")
@@ -90,6 +92,7 @@ class Embedding(Layer):
         weights=None,
         lora_rank=None,
         lora_alpha=None,
+        quantization_config=None,
         **kwargs,
     ):
         input_length = kwargs.pop("input_length", None)
@@ -109,6 +112,7 @@ class Embedding(Layer):
         self.lora_rank = lora_rank
         self.lora_alpha = lora_alpha if lora_alpha is not None else lora_rank
         self.lora_enabled = False
+        self.quantization_config = quantization_config
         if weights is not None:
             self.build()
@@ -120,9 +124,13 @@ class Embedding(Layer):
         if self.built:
             return
         embeddings_shape = (self.input_dim, self.output_dim)
-        if self.quantization_mode is not None:
-            self.quantized_build(embeddings_shape, mode=self.quantization_mode)
-        if self.quantization_mode != "int8":
+        if self.quantization_mode:
+            self.quantized_build(
+                embeddings_shape,
+                mode=self.quantization_mode,
+                config=self.quantization_config,
+            )
+        if self.quantization_mode not in ("int8", "int4"):
             self._embeddings = self.add_weight(
                 shape=embeddings_shape,
                 initializer=self.embeddings_initializer,
@@ -137,12 +145,20 @@ class Embedding(Layer):
     @property
     def embeddings(self):
+        if not self.built:
+            raise AttributeError(
+                "You must build the layer before accessing `embeddings`."
+            )
+        embeddings = self._embeddings
+        if self.quantization_mode == "int4":
+            embeddings = quantizers.unpack_int4(
+                embeddings, self._orig_output_dim, axis=-1
+            )
         if self.lora_enabled:
-            return self._embeddings + (
-                self.lora_alpha / self.lora_rank
-            ) * ops.matmul(self.lora_embeddings_a, self.lora_embeddings_b)
-        return self._embeddings
+            return embeddings + (self.lora_alpha / self.lora_rank) * ops.matmul(
+                self.lora_embeddings_a, self.lora_embeddings_b
+            )
+        return embeddings
     def call(self, inputs):
         if inputs.dtype != "int32" and inputs.dtype != "int64":
@@ -189,13 +205,13 @@ class Embedding(Layer):
         self._tracker.unlock()
         self.lora_embeddings_a = self.add_weight(
             name="lora_embeddings_a",
-            shape=(self.embeddings.shape[0], rank),
+            shape=(self.input_dim, rank),
             initializer=initializers.get(a_initializer),
             regularizer=self.embeddings_regularizer,
         )
         self.lora_embeddings_b = self.add_weight(
             name="lora_embeddings_b",
-            shape=(rank, self.embeddings.shape[1]),
+            shape=(rank, self.output_dim),
             initializer=initializers.get(b_initializer),
             regularizer=self.embeddings_regularizer,
         )
@@ -209,19 +225,26 @@ class Embedding(Layer):
         # Do nothing if the layer isn't yet built
         if not self.built:
             return
-        # The keys of the `store` will be saved as determined because the
-        # default ordering will change after quantization
-        embeddings_value, embeddings_scale = (
+        mode = self.quantization_mode
+        if mode not in self.variable_serialization_spec:
+            raise self._quantization_mode_error(mode)
+        # Embeddings plus optional merged LoRA-aware scale
+        # (returns (embeddings, None) for `None` mode).
+        embeddings_value, merged_kernel_scale = (
             self._get_embeddings_with_merged_lora()
         )
-        target_variables = [embeddings_value]
-        if self.quantization_mode is not None:
-            if self.quantization_mode == "int8":
-                target_variables.append(embeddings_scale)
+        idx = 0
+        for name in self.variable_serialization_spec[mode]:
+            if name == "embeddings":
+                store[str(idx)] = embeddings_value
+            elif name == "embeddings_scale" and mode in ("int4", "int8"):
+                # For int4/int8, the merged LoRA scale (if any) comes from
+                # `_get_embeddings_with_merged_lora()`
+                store[str(idx)] = merged_kernel_scale
             else:
-                raise self._quantization_mode_error(self.quantization_mode)
-        for i, variable in enumerate(target_variables):
-            store[str(i)] = variable
+                store[str(idx)] = getattr(self, name)
+            idx += 1
     def load_own_variables(self, store):
         if not self.lora_enabled:
@@ -229,16 +252,17 @@ class Embedding(Layer):
         # Do nothing if the layer isn't yet built
         if not self.built:
             return
-        # The keys of the `store` will be saved as determined because the
-        # default ordering will change after quantization
-        target_variables = [self._embeddings]
-        if self.quantization_mode is not None:
-            if self.quantization_mode == "int8":
-                target_variables.append(self.embeddings_scale)
+        mode = self.quantization_mode
+        if mode not in self.variable_serialization_spec:
+            raise self._quantization_mode_error(mode)
+        idx = 0
+        for name in self.variable_serialization_spec[mode]:
+            if name == "embeddings":
+                self._embeddings.assign(store[str(idx)])
             else:
-                raise self._quantization_mode_error(self.quantization_mode)
-        for i, variable in enumerate(target_variables):
-            variable.assign(store[str(i)])
+                getattr(self, name).assign(store[str(idx)])
+            idx += 1
         if self.lora_enabled:
             self.lora_embeddings_a.assign(
                 ops.zeros(self.lora_embeddings_a.shape)
@@ -265,62 +289,63 @@ class Embedding(Layer):
                 self.embeddings_constraint
             ),
             "mask_zero": self.mask_zero,
+            "quantization_config": serialization_lib.serialize_keras_object(
+                self.quantization_config
+            ),
         }
         if self.lora_rank:
             config["lora_rank"] = self.lora_rank
             config["lora_alpha"] = self.lora_alpha
         return {**base_config, **config}
-    def _check_load_own_variables(self, store):
-        all_vars = self._trainable_variables + self._non_trainable_variables
-        if len(store.keys()) != len(all_vars):
-            if len(all_vars) == 0 and not self.built:
-                raise ValueError(
-                    f"Layer '{self.name}' was never built "
-                    "and thus it doesn't have any variables. "
-                    f"However the weights file lists {len(store.keys())} "
-                    "variables for this layer.\n"
-                    "In most cases, this error indicates that either:\n\n"
-                    "1. The layer is owned by a parent layer that "
-                    "implements a `build()` method, but calling the "
-                    "parent's `build()` method did NOT create the state of "
-                    f"the child layer '{self.name}'. A `build()` method "
-                    "must create ALL state for the layer, including "
-                    "the state of any children layers.\n\n"
-                    "2. You need to implement "
-                    "the `def build_from_config(self, config)` method "
-                    f"on layer '{self.name}', to specify how to rebuild "
-                    "it during loading. "
-                    "In this case, you might also want to implement the "
-                    "method that generates the build config at saving time, "
-                    "`def get_build_config(self)`. "
-                    "The method `build_from_config()` is meant "
-                    "to create the state "
-                    "of the layer (i.e. its variables) upon deserialization.",
-                )
-            raise ValueError(
-                f"Layer '{self.name}' expected {len(all_vars)} variables, "
-                "but received "
-                f"{len(store.keys())} variables during loading. "
-                f"Expected: {[v.name for v in all_vars]}"
+    @classmethod
+    def from_config(cls, config):
+        config = config.copy()
+        config["quantization_config"] = (
+            serialization_lib.deserialize_keras_object(
+                config.get("quantization_config", None)
             )
-    """Quantization-related (int8) methods"""
+        )
+        return super().from_config(config)
     def _quantization_mode_error(self, mode):
         return NotImplementedError(
-            "Invalid quantization mode. Expected 'int8'. "
+            "Invalid quantization mode. Expected one of ('int8', 'int4'). "
             f"Received: quantization_mode={mode}"
         )
-    def quantized_build(self, embeddings_shape, mode):
+    @property
+    def variable_serialization_spec(self):
+        """Returns a dict mapping quantization modes to variable names in order.
+        This spec is used by `save_own_variables` and `load_own_variables` to
+        determine the correct ordering of variables during serialization for
+        each quantization mode. `None` means no quantization.
+        """
+        return {
+            None: [
+                "embeddings",
+            ],
+            "int8": [
+                "embeddings",
+                "embeddings_scale",
+            ],
+            "int4": [
+                "embeddings",
+                "embeddings_scale",
+            ],
+        }
+    def quantized_build(self, embeddings_shape, mode, config=None):
         if mode == "int8":
-            self._int8_build(embeddings_shape)
+            self._int8_build(embeddings_shape, config)
+        elif mode == "int4":
+            self._int4_build(embeddings_shape, config)
         else:
             raise self._quantization_mode_error(mode)
         self._is_quantized = True
-    def _int8_build(self, embeddings_shape):
+    def _int8_build(self, embeddings_shape, config=None):
         self._embeddings = self.add_weight(
             name="embeddings",
             shape=embeddings_shape,
@@ -338,10 +363,27 @@ class Embedding(Layer):
             trainable=False,
         )
-    def quantized_call(self, *args, **kwargs):
-        if self.quantization_mode != "int8":
-            raise self._quantization_mode_error(self.quantization_mode)
-        return super().quantized_call(*args, **kwargs)
+    def _int4_build(self, embeddings_shape, config=None):
+        input_dim, output_dim = embeddings_shape
+        packed_rows = (output_dim + 1) // 2  # ceil for odd dims
+        # Embeddings are stored *packed*: each int8 byte contains two int4
+        # values.
+        self._embeddings = self.add_weight(
+            name="embeddings",
+            shape=(input_dim, packed_rows),
+            initializer="zeros",
+            dtype="int8",
+            trainable=False,
+        )
+        self.embeddings_scale = self.add_weight(
+            name="embeddings_scale",
+            shape=(self.input_dim,),
+            initializer="ones",
+            trainable=False,
+        )
+        # Record original output_dim for unpacking at runtime.
+        self._orig_output_dim = output_dim
     def _int8_call(self, inputs, training=None):
         # We cannot update quantized self._embeddings, so the custom gradient is
@@ -363,49 +405,165 @@ class Embedding(Layer):
             )
         return outputs
-    def quantize(self, mode, type_check=True):
-        # Prevent quantization of the subclasses
+    def _int4_call(self, inputs, training=None):
+        # We cannot update quantized self._embeddings, so the custom gradient is
+        # not needed
+        if backend.standardize_dtype(inputs.dtype) not in ("int32", "int64"):
+            inputs = ops.cast(inputs, "int32")
+        embeddings_scale = ops.take(self.embeddings_scale, inputs, axis=0)
+        unpacked_embeddings = quantizers.unpack_int4(
+            self._embeddings, self._orig_output_dim, axis=-1
+        )
+        outputs = ops.take(unpacked_embeddings, inputs, axis=0)
+        # De-scale outputs
+        outputs = ops.divide(
+            ops.cast(outputs, dtype=self.compute_dtype),
+            ops.expand_dims(embeddings_scale, axis=-1),
+        )
+        if self.lora_enabled:
+            lora_outputs = ops.take(self.lora_embeddings_a, inputs, axis=0)
+            lora_outputs = ops.matmul(lora_outputs, self.lora_embeddings_b)
+            outputs = ops.add(
+                outputs, (self.lora_alpha / self.lora_rank) * lora_outputs
+            )
+        return outputs
+    def quantize(self, mode=None, type_check=True, config=None):
+        # Prevent quantization of the subclasses.
         if type_check and (type(self) is not Embedding):
             raise self._not_implemented_error(self.quantize)
+        self.quantization_config = config
         embeddings_shape = (self.input_dim, self.output_dim)
         if mode == "int8":
             # Quantize `self._embeddings` to int8 and compute corresponding
-            # scale
-            embeddings_value, embeddings_scale = quantizers.abs_max_quantize(
-                self._embeddings, axis=-1, to_numpy=True
+            # scale.
+            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
+                self.quantization_config,
+                quantizers.AbsMaxQuantizer(axis=-1),
+            )
+            embeddings_value, embeddings_scale = weight_quantizer(
+                self._embeddings, to_numpy=True
             )
             embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
             del self._embeddings
-        self.quantized_build(embeddings_shape, mode)
-        if mode == "int8":
+            self.quantized_build(
+                embeddings_shape, mode, self.quantization_config
+            )
             self._embeddings.assign(embeddings_value)
             self.embeddings_scale.assign(embeddings_scale)
+        elif mode == "int4":
+            # Quantize to int4 values (stored in int8 dtype, range [-8, 7]).
+            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
+                self.quantization_config,
+                quantizers.AbsMaxQuantizer(
+                    axis=-1,
+                    value_range=(-8, 7),
+                    output_dtype="int8",
+                ),
+            )
+            embeddings_value, embeddings_scale = weight_quantizer(
+                self._embeddings, to_numpy=True
+            )
+            embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
+            # 2. Pack two int4 values into a single int8 byte.
+            packed_embeddings_value, _, _ = quantizers.pack_int4(
+                embeddings_value, axis=-1
+            )
+            del self._embeddings
+            self.quantized_build(
+                embeddings_shape, mode, self.quantization_config
+            )
+            self._embeddings.assign(packed_embeddings_value)
+            self.embeddings_scale.assign(embeddings_scale)
+        else:
+            raise self._quantization_mode_error(mode)
-        # Set new dtype policy
+        # Set new dtype policy.
         if self.dtype_policy.quantization_mode is None:
             policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}")
             self.dtype_policy = policy
     def _get_embeddings_with_merged_lora(self):
-        if self.dtype_policy.quantization_mode is not None:
-            embeddings_value = self._embeddings
-            embeddings_scale = self.embeddings_scale
-            if self.lora_enabled:
-                # Dequantize & quantize to merge lora weights into embeddings
-                # Note that this is a lossy compression
-                embeddings_value = ops.divide(
-                    embeddings_value, ops.expand_dims(embeddings_scale, axis=-1)
-                )
-                embeddings_value = ops.add(
-                    embeddings_value,
-                    ops.matmul(self.lora_embeddings_a, self.lora_embeddings_b),
-                )
-                embeddings_value, embeddings_scale = (
-                    quantizers.abs_max_quantize(
-                        embeddings_value, axis=-1, to_numpy=True
-                    )
-                )
-                embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
+        """Returns the embeddings with LoRA matrices merged, for serialization.
+        This method is called by `save_own_variables` to produce a single
+        embeddings tensor that includes the adaptations from LoRA. This is
+        useful for deploying the model or for continuing training after
+        permanently applying the LoRA update.
+        If the layer is quantized (`int8` or `int4`), the process is:
+        1. Dequantize the base embeddings to float.
+        2. Compute the LoRA delta (`lora_embeddings_a @ lora_embeddings_b`) and
+            add it to the dequantized embeddings.
+        3. Re-quantize the merged result back to the original quantized
+            type (`int8` or packed `int4`), calculating a new scale factor.
+        If the layer is not quantized, this method returns the result of the
+        `embeddings` property (which computes the merge in floating-point) and a
+        scale of `None`.
+        If LoRA is not enabled, it returns the original embeddings and scale
+        without modification.
+        Returns:
+            A tuple `(embeddings_value, embeddings_scale)`:
+                `embeddings_value`: The merged embeddings. A quantized tensor if
+                    quantization is active, otherwise a high precision tensor.
+                `embeddings_scale`: The quantization scale for the merged
+                    embeddings. This is `None` if the layer is not quantized.
+        """
+        if self.dtype_policy.quantization_mode in (None, "gptq", "awq"):
+            return self.embeddings, None
+        embeddings_value = self._embeddings
+        embeddings_scale = self.embeddings_scale
+        if not self.lora_enabled:
             return embeddings_value, embeddings_scale
-        return self.embeddings, None
+        # Dequantize embeddings to float.
+        if self.quantization_mode == "int4":
+            unpacked_embeddings = quantizers.unpack_int4(
+                embeddings_value, self._orig_output_dim, axis=-1
+            )
+            float_embeddings = ops.divide(
+                ops.cast(unpacked_embeddings, self.compute_dtype),
+                ops.expand_dims(embeddings_scale, axis=-1),
+            )
+            quant_range = (-8, 7)
+        elif self.quantization_mode == "int8":
+            float_embeddings = ops.divide(
+                ops.cast(embeddings_value, self.compute_dtype),
+                ops.expand_dims(embeddings_scale, axis=-1),
+            )
+            quant_range = (-127, 127)
+        else:
+            raise ValueError(
+                f"Unsupported quantization mode: {self.quantization_mode}"
+            )
+        # Merge LoRA weights in float domain.
+        lora_delta = (self.lora_alpha / self.lora_rank) * ops.matmul(
+            self.lora_embeddings_a, self.lora_embeddings_b
+        )
+        merged_float_embeddings = ops.add(float_embeddings, lora_delta)
+        # Requantize.
+        requantized_embeddings, embeddings_scale = quantizers.abs_max_quantize(
+            merged_float_embeddings,
+            axis=-1,
+            value_range=quant_range,
+            dtype="int8",
+            to_numpy=True,
+        )
+        embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
+        # Pack if int4.
+        if self.quantization_mode == "int4":
+            embeddings_value, _, _ = quantizers.pack_int4(
+                requantized_embeddings, axis=-1
+            )
+        else:
+            embeddings_value = requantized_embeddings
+        return embeddings_value, embeddings_scale

keras/src/layers/core/input_layer.py CHANGED Viewed

@@ -138,6 +138,7 @@ class InputLayer(Layer):
             "sparse": self.sparse,
             "ragged": self.ragged,
             "name": self.name,
+            "optional": self.optional,
         }

keras-nightly 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl