PyPI - keras-nightly - Versions diffs - 3.14.0.dev2026012804__py3-none-any.whl → 3.14.0.dev2026012904__py3-none-any.whl - Mend

keras-nightly 3.14.0.dev2026012804py3-none-any.whl → 3.14.0.dev2026012904py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/quantizers/__init__.py +3 -0
keras/dtype_policies/__init__.py +3 -0
keras/quantizers/__init__.py +3 -0
keras/src/backend/jax/core.py +12 -2
keras/src/callbacks/orbax_checkpoint.py +41 -8
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +80 -1
keras/src/layers/core/dense.py +278 -95
keras/src/layers/core/einsum_dense.py +350 -181
keras/src/layers/core/embedding.py +236 -49
keras/src/layers/core/reversible_embedding.py +177 -35
keras/src/layers/preprocessing/discretization.py +30 -1
keras/src/quantizers/__init__.py +6 -0
keras/src/quantizers/quantization_config.py +98 -4
keras/src/quantizers/quantizers.py +262 -32
keras/src/saving/saving_api.py +66 -2
keras/src/version.py +1 -1
{keras_nightly-3.14.0.dev2026012804.dist-info → keras_nightly-3.14.0.dev2026012904.dist-info}/METADATA +1 -1
{keras_nightly-3.14.0.dev2026012804.dist-info → keras_nightly-3.14.0.dev2026012904.dist-info}/RECORD +22 -22
{keras_nightly-3.14.0.dev2026012804.dist-info → keras_nightly-3.14.0.dev2026012904.dist-info}/WHEEL +0 -0
{keras_nightly-3.14.0.dev2026012804.dist-info → keras_nightly-3.14.0.dev2026012904.dist-info}/top_level.txt +0 -0

keras/src/layers/core/reversible_embedding.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import copy
+import math
 from keras.src import dtype_policies
 from keras.src import layers
@@ -8,6 +9,8 @@ from keras.src.api_export import keras_export
 from keras.src.backend import KerasTensor
 from keras.src.backend import set_keras_mask
 from keras.src.quantizers.quantization_config import QuantizationConfig
+from keras.src.quantizers.quantization_config import get_block_size_for_layer
+from keras.src.quantizers.quantizers import dequantize_with_sz_map
 @keras_export("keras.layers.ReversibleEmbedding")
@@ -125,7 +128,7 @@ class ReversibleEmbedding(layers.Embedding):
             return result
         else:
             if self.tie_weights:
-                kernel = ops.transpose(ops.convert_to_tensor(self.embeddings))
+                kernel = ops.transpose(self.embeddings)
             else:
                 kernel = self.reverse_embeddings
             if self.reverse_dtype is not None:
@@ -180,6 +183,9 @@ class ReversibleEmbedding(layers.Embedding):
                 variable_spec.append("reverse_embeddings")
                 if mode in ("int4", "int8"):
                     variable_spec.append("reverse_embeddings_scale")
+                if mode == "int4":
+                    # reverse_embeddings_zero only exists for sub-channel
+                    variable_spec.append("reverse_embeddings_zero")
         return _spec
     def quantized_build(self, embeddings_shape, mode, config=None):
@@ -235,13 +241,34 @@ class ReversibleEmbedding(layers.Embedding):
                 dtype="int8",
                 trainable=False,
             )
+            # Determine block_size from config or dtype_policy
+            block_size = get_block_size_for_layer(self, config)
+            if block_size is None or block_size == -1:
+                # Per-channel: one scale per output unit (input_dim)
+                reverse_scale_shape = (self.input_dim,)
+            else:
+                # Grouped: scale per group along output_dim (axis=0)
+                n_groups = math.ceil(self.output_dim / block_size)
+                reverse_scale_shape = (n_groups, self.input_dim)
             self.reverse_embeddings_scale = self.add_weight(
                 name="reverse_embeddings_scale",
-                shape=(self.input_dim,),
+                shape=reverse_scale_shape,
                 initializer="ones",
                 trainable=False,
             )
+            # Zero point for asymmetric grouped quantization
+            if block_size is not None and block_size != -1:
+                self.reverse_embeddings_zero = self.add_weight(
+                    name="reverse_embeddings_zero",
+                    shape=reverse_scale_shape,
+                    initializer="zeros",
+                    trainable=False,
+                )
     def _int8_call(self, inputs, reverse=False):
         if not reverse:
             return super()._int8_call(inputs)
@@ -272,23 +299,79 @@ class ReversibleEmbedding(layers.Embedding):
         if not reverse:
             return super()._int4_call(inputs)
         else:
+            block_size = getattr(self, "_int4_block_size", None)
             if self.tie_weights:
                 embeddings = ops.transpose(self._embeddings)
-                scale = ops.transpose(self.embeddings_scale)
+                scale = self.embeddings_scale
+                # For tied weights, scale shape is (input_dim,) or
+                # (input_dim, n_groups). For per-channel, transpose scale.
+                if block_size is None or block_size == -1:
+                    scale = ops.transpose(scale)
             else:
                 embeddings = self.reverse_embeddings
                 scale = self.reverse_embeddings_scale
             unpacked_embeddings = quantizers.unpack_int4(
                 embeddings, self.output_dim, axis=0
             )
             if self.inputs_quantizer:
                 inputs, inputs_scale = self.inputs_quantizer(inputs)
             else:
                 inputs_scale = ops.ones((1,), dtype=self.compute_dtype)
-            logits = ops.matmul(inputs, unpacked_embeddings)
-            # De-scale outputs
-            logits = ops.cast(logits, self.compute_dtype)
-            logits = ops.divide(logits, ops.multiply(inputs_scale, scale))
+            if block_size is None or block_size == -1:
+                # Per-channel: do matmul then dequantize
+                logits = ops.matmul(inputs, unpacked_embeddings)
+                logits = ops.cast(logits, self.compute_dtype)
+                logits = ops.divide(logits, ops.multiply(inputs_scale, scale))
+            elif self.tie_weights:
+                # Sub-channel with asymmetric quantization (tied weights)
+                # Must dequantize embeddings before matmul for correctness
+                # unpacked_embeddings shape: (output_dim, input_dim)
+                # scale shape: (input_dim, n_groups)
+                # embeddings_zero shape: (input_dim, n_groups)
+                # g_idx shape: (output_dim,)
+                # Transpose scale/zero for dequantization:
+                # [input_dim, n_groups] -> [n_groups, input_dim]
+                scale_t = ops.transpose(scale)
+                zero_t = ops.transpose(self.embeddings_zero)
+                float_embeddings = dequantize_with_sz_map(
+                    ops.cast(unpacked_embeddings, self.compute_dtype),
+                    scale_t,
+                    zero_t,
+                    self.g_idx,
+                    group_axis=0,
+                )
+                # inputs shape: (batch, output_dim)
+                # float_embeddings shape: (output_dim, input_dim)
+                logits = ops.matmul(inputs, float_embeddings)
+                logits = ops.divide(logits, inputs_scale)
+            else:
+                # Untied weights with asymmetric grouped quantization
+                # Must dequantize embeddings before matmul for correctness
+                # unpacked_embeddings shape: (output_dim, input_dim)
+                # scale shape: (n_groups, input_dim)
+                # reverse_embeddings_zero shape: (n_groups, input_dim)
+                # g_idx shape: (output_dim,) - reuse from forward pass
+                float_embeddings = dequantize_with_sz_map(
+                    ops.cast(unpacked_embeddings, self.compute_dtype),
+                    scale,
+                    self.reverse_embeddings_zero,
+                    self.g_idx,
+                    group_axis=0,
+                )
+                # inputs shape: (batch, output_dim)
+                # float_embeddings shape: (output_dim, input_dim)
+                logits = ops.matmul(inputs, float_embeddings)
+                logits = ops.divide(logits, inputs_scale)
             # Optionally soft-cap logits.
             if self.logit_soft_cap is not None:
                 soft_cap = self.logit_soft_cap
@@ -340,60 +423,119 @@ class ReversibleEmbedding(layers.Embedding):
                 self.reverse_embeddings.assign(reverse_embeddings_value)
                 self.reverse_embeddings_scale.assign(reverse_embeddings_scale)
         elif mode == "int4":
-            # Quantize to int4 values (stored in int8 dtype, range [-8, 7]).
-            weight_quantizer = QuantizationConfig.weight_quantizer_or_default(
-                self.quantization_config,
-                quantizers.AbsMaxQuantizer(
-                    axis=-1,
-                    value_range=(-8, 7),
-                    output_dtype="int8",
-                ),
-            )
-            embeddings_value, embeddings_scale = weight_quantizer(
-                self._embeddings, to_numpy=True
+            from keras.src.quantizers.quantization_config import (
+                Int4QuantizationConfig,
             )
-            embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
-            # 2. Pack two int4 values into a single int8 byte.
-            packed_embeddings_value, _, _ = quantizers.pack_int4(
-                embeddings_value, axis=-1
-            )
-            del self._embeddings
-            if not self.tie_weights:
-                reverse_weight_quantizer = (
+            block_size = None
+            if isinstance(self.quantization_config, Int4QuantizationConfig):
+                block_size = self.quantization_config.block_size
+            use_grouped = block_size is not None and block_size != -1
+            # Quantize forward embeddings
+            if not use_grouped:
+                # Per-channel quantization
+                weight_quantizer = (
                     QuantizationConfig.weight_quantizer_or_default(
                         self.quantization_config,
                         quantizers.AbsMaxQuantizer(
-                            axis=0,
+                            axis=-1,
                             value_range=(-8, 7),
                             output_dtype="int8",
                         ),
                     )
                 )
-                reverse_embeddings_value, reverse_embeddings_scale = (
-                    reverse_weight_quantizer(
-                        self.reverse_embeddings, to_numpy=True
-                    )
+                embeddings_value, embeddings_scale = weight_quantizer(
+                    self._embeddings, to_numpy=True
                 )
-                reverse_embeddings_scale = ops.squeeze(
-                    reverse_embeddings_scale, axis=0
+                embeddings_scale = ops.squeeze(embeddings_scale, axis=-1)
+            else:
+                # Sub-channel quantization with asymmetric zero point
+                embeddings_t = ops.transpose(self._embeddings)
+                embeddings_value_t, scale_t, zero_t = (
+                    quantizers.abs_max_quantize_grouped_with_zero_point(
+                        embeddings_t,
+                        block_size=block_size,
+                        value_range=(-8, 7),
+                        dtype="int8",
+                        to_numpy=True,
+                    )
                 )
-                # Pack two int4 values into a single int8 byte.
+                # Transpose back to (input_dim, output_dim) layout
+                embeddings_value = ops.transpose(embeddings_value_t)
+                embeddings_scale = ops.transpose(scale_t)
+                embeddings_zero = ops.transpose(zero_t)
+            packed_embeddings_value, _, _ = quantizers.pack_int4(
+                embeddings_value, axis=-1
+            )
+            del self._embeddings
+            # Quantize reverse embeddings if not tied
+            if not self.tie_weights:
+                if not use_grouped:
+                    reverse_weight_quantizer = (
+                        QuantizationConfig.weight_quantizer_or_default(
+                            self.quantization_config,
+                            quantizers.AbsMaxQuantizer(
+                                axis=0,
+                                value_range=(-8, 7),
+                                output_dtype="int8",
+                            ),
+                        )
+                    )
+                    reverse_embeddings_value, reverse_embeddings_scale = (
+                        reverse_weight_quantizer(
+                            self.reverse_embeddings, to_numpy=True
+                        )
+                    )
+                    reverse_embeddings_scale = ops.squeeze(
+                        reverse_embeddings_scale, axis=0
+                    )
+                else:
+                    reverse_value, reverse_scale, reverse_zero = (
+                        quantizers.abs_max_quantize_grouped_with_zero_point(
+                            self.reverse_embeddings,
+                            block_size=block_size,
+                            value_range=(-8, 7),
+                            dtype="int8",
+                            to_numpy=True,
+                        )
+                    )
+                    reverse_embeddings_value = reverse_value
+                    reverse_embeddings_scale = reverse_scale
+                    reverse_embeddings_zero = reverse_zero
                 packed_reverse_embeddings_value, _, _ = quantizers.pack_int4(
                     reverse_embeddings_value, axis=0
                 )
                 del self.reverse_embeddings
             self.quantized_build(
                 embeddings_shape, mode, self.quantization_config
             )
             self._embeddings.assign(packed_embeddings_value)
             self.embeddings_scale.assign(embeddings_scale)
+            if use_grouped:
+                self.embeddings_zero.assign(embeddings_zero)
             if not self.tie_weights:
                 self.reverse_embeddings.assign(packed_reverse_embeddings_value)
                 self.reverse_embeddings_scale.assign(reverse_embeddings_scale)
+                if use_grouped:
+                    self.reverse_embeddings_zero.assign(reverse_embeddings_zero)
         else:
             raise self._quantization_mode_error(mode)
         # Set new dtype policy.
         if self.dtype_policy.quantization_mode is None:
-            policy = dtype_policies.get(f"{mode}_from_{self.dtype_policy.name}")
+            policy_name = mode
+            if mode == "int4":
+                # Include block_size in policy name for sub-channel quantization
+                block_size = get_block_size_for_layer(self, config)
+                block_size_value = -1 if block_size is None else block_size
+                policy_name = f"int4/{block_size_value}"
+            policy = dtype_policies.get(
+                f"{policy_name}_from_{self.dtype_policy.name}"
+            )
             self.dtype_policy = policy

keras/src/layers/preprocessing/discretization.py CHANGED Viewed

@@ -213,8 +213,37 @@ class Discretization(DataLayer):
             return
         self.summary = np.array([[], []], dtype="float32")
+    def compute_output_shape(self, input_shape):
+        if self.output_mode == "int":
+            return input_shape
+        # Calculate depth (number of bins)
+        depth = (
+            len(self.bin_boundaries) + 1
+            if self.bin_boundaries is not None
+            else self.num_bins
+        )
+        if self.output_mode == "one_hot":
+            # For one_hot mode, add depth dimension
+            # If last dimension is 1, replace it with depth, otherwise append
+            if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
+                return tuple(input_shape[:-1]) + (depth,)
+            else:
+                return tuple(input_shape) + (depth,)
+        else:
+            if input_shape and len(input_shape) >= 2:
+                # Match to eager tensor, remove second and append depth
+                out_shape = (
+                    (input_shape[0],) + tuple(input_shape[2:]) + (depth,)
+                )
+                return out_shape
+            else:
+                return (depth,)
     def compute_output_spec(self, inputs):
-        return backend.KerasTensor(shape=inputs.shape, dtype=self.output_dtype)
+        output_shape = self.compute_output_shape(inputs.shape)
+        return backend.KerasTensor(shape=output_shape, dtype=self.output_dtype)
     def load_own_variables(self, store):
         if len(store) == 1:

keras/src/quantizers/__init__.py CHANGED Viewed

@@ -9,11 +9,17 @@ from keras.src.quantizers.quantization_config import QuantizationConfig
 from keras.src.quantizers.quantizers import AbsMaxQuantizer
 from keras.src.quantizers.quantizers import Quantizer
 from keras.src.quantizers.quantizers import abs_max_quantize
+from keras.src.quantizers.quantizers import (
+    abs_max_quantize_grouped_with_zero_point,
+)
 from keras.src.quantizers.quantizers import compute_float8_amax_history
 from keras.src.quantizers.quantizers import compute_float8_scale
+from keras.src.quantizers.quantizers import compute_quantization_parameters
+from keras.src.quantizers.quantizers import dequantize_with_sz_map
 from keras.src.quantizers.quantizers import fake_quant_with_min_max_vars
 from keras.src.quantizers.quantizers import pack_int4
 from keras.src.quantizers.quantizers import quantize_and_dequantize
+from keras.src.quantizers.quantizers import quantize_with_sz_map
 from keras.src.quantizers.quantizers import unpack_int4
 from keras.src.saving import serialization_lib
 from keras.src.utils.naming import to_snake_case

keras/src/quantizers/quantization_config.py CHANGED Viewed

@@ -99,14 +99,46 @@ class Int4QuantizationConfig(QuantizationConfig):
         weight_quantizer: Quantizer for weights.
         activation_quantizer: Quantizer for activations. If "default", uses
             AbsMaxQuantizer with axis=-1.
+        block_size: Size of groups along the input dimension for sub-channel
+            quantization. If a positive integer, uses sub-channel quantization
+            with `ceil(input_dim / block_size)` groups. If `None` or `-1`,
+            uses per-channel quantization (one scale per output channel).
+            Default: `128` (sub-channel with 128-element groups).
     """
-    def __init__(self, weight_quantizer=None, activation_quantizer="default"):
-        from keras.src.quantizers.quantizers import AbsMaxQuantizer
+    def __init__(
+        self,
+        weight_quantizer=None,
+        activation_quantizer="default",
+        block_size=128,
+    ):
         if activation_quantizer == "default":
-            activation_quantizer = AbsMaxQuantizer()
+            # Use weight-only quantization by default for int4
+            activation_quantizer = None
         super().__init__(weight_quantizer, activation_quantizer)
+        # Validate block_size
+        if block_size is not None and block_size != -1 and block_size <= 0:
+            raise ValueError(
+                f"block_size must be None, -1, or a positive integer. "
+                f"Received: block_size={block_size}"
+            )
+        self.block_size = block_size
+        # Sub-channel quantization does not support custom quantizers
+        is_sub_channel = block_size is not None and block_size > 0
+        has_custom_quantizer = (
+            self.weight_quantizer is not None
+            or self.activation_quantizer is not None
+        )
+        if is_sub_channel and has_custom_quantizer:
+            raise ValueError(
+                "Int4 sub-channel quantization (block_size > 0) does not "
+                "support custom quantizers. Either set block_size to None "
+                "or -1 for per-channel quantization, or remove the custom "
+                f"quantizer arguments. Received: block_size={block_size}"
+            )
         if self.weight_quantizer is not None:
             if self.weight_quantizer.value_range != (-8, 7):
                 raise ValueError(
@@ -126,6 +158,28 @@ class Int4QuantizationConfig(QuantizationConfig):
     def mode(self):
         return "int4"
+    def get_config(self):
+        config = super().get_config()
+        config["block_size"] = self.block_size
+        return config
+    @classmethod
+    def from_config(cls, config):
+        weight_quantizer = serialization_lib.deserialize_keras_object(
+            config.get("weight_quantizer")
+        )
+        activation_quantizer = serialization_lib.deserialize_keras_object(
+            config.get("activation_quantizer")
+        )
+        # Default to None for backwards compatibility with models saved
+        # before block_size was introduced (those used per-channel mode)
+        block_size = config.get("block_size", None)
+        return cls(
+            weight_quantizer=weight_quantizer,
+            activation_quantizer=activation_quantizer,
+            block_size=block_size,
+        )
 @keras_export("keras.quantizers.Float8QuantizationConfig")
 class Float8QuantizationConfig(QuantizationConfig):
@@ -244,3 +298,43 @@ def _validate_mode(mode):
             "Invalid quantization mode. "
             f"Expected one of {QUANTIZATION_MODES}. Received: mode={mode}"
         )
+def get_block_size_for_layer(layer, config):
+    """Determine the block size for int4 quantization.
+    The block size can be specified either through the `config` argument
+    or through the `dtype_policy` if it is of type `Int4DTypePolicy`.
+    The config argument is usually available when quantizing the layer
+    via the `quantize` method. If the layer was deserialized from a
+    saved model, the block size should be specified in the `dtype_policy`.
+    Args:
+        layer: The layer being quantized.
+        config: An optional configuration object that may contain the
+            `block_size` attribute.
+    Returns:
+        int or None. The determined block size for int4 quantization.
+        Returns `None` or `-1` for per-channel quantization.
+    """
+    from keras.src.dtype_policies.dtype_policy import Int4DTypePolicy
+    from keras.src.dtype_policies.dtype_policy_map import DTypePolicyMap
+    if config and isinstance(config, Int4QuantizationConfig):
+        return config.block_size
+    elif isinstance(layer.dtype_policy, Int4DTypePolicy):
+        block_size = layer.dtype_policy.block_size
+        # Convert -1 to None for consistency
+        return None if block_size == -1 else block_size
+    elif isinstance(layer.dtype_policy, DTypePolicyMap):
+        policy = layer.dtype_policy[layer.path]
+        if isinstance(policy, Int4DTypePolicy):
+            block_size = policy.block_size
+            return None if block_size == -1 else block_size
+        # Fall back to None for legacy QuantizedDTypePolicy
+        return None
+    else:
+        # For backwards compatibility with models that don't have
+        # Int4DTypePolicy (legacy per-channel mode)
+        return None

keras-nightly 3.14.0.dev2026012804__py3-none-any.whl → 3.14.0.dev2026012904__py3-none-any.whl

keras-nightly 3.14.0.dev2026012804py3-none-any.whl → 3.14.0.dev2026012904py3-none-any.whl