PyPI - keras-nightly - Versions diffs - 3.14.0.dev2026011404__py3-none-any.whl → 3.14.0.dev2026011504__py3-none-any.whl - Mend

keras-nightly 3.14.0.dev2026011404py3-none-any.whl → 3.14.0.dev2026011504py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/quantizers/__init__.py +1 -0
keras/dtype_policies/__init__.py +3 -0
keras/quantizers/__init__.py +1 -0
keras/src/backend/openvino/numpy.py +63 -2
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +90 -1
keras/src/layers/core/dense.py +122 -6
keras/src/layers/core/einsum_dense.py +151 -7
keras/src/layers/core/embedding.py +1 -1
keras/src/layers/layer.py +5 -0
keras/src/models/model.py +7 -3
keras/src/ops/numpy.py +9 -0
keras/src/quantizers/__init__.py +2 -0
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +1 -2
keras/src/quantizers/gptq_core.py +1 -1
keras/src/quantizers/quantization_config.py +14 -0
keras/src/quantizers/quantizers.py +61 -52
keras/src/version.py +1 -1
{keras_nightly-3.14.0.dev2026011404.dist-info → keras_nightly-3.14.0.dev2026011504.dist-info}/METADATA +1 -1
{keras_nightly-3.14.0.dev2026011404.dist-info → keras_nightly-3.14.0.dev2026011504.dist-info}/RECORD +26 -23
{keras_nightly-3.14.0.dev2026011404.dist-info → keras_nightly-3.14.0.dev2026011504.dist-info}/WHEEL +0 -0
{keras_nightly-3.14.0.dev2026011404.dist-info → keras_nightly-3.14.0.dev2026011504.dist-info}/top_level.txt +0 -0

keras/src/layers/core/einsum_dense.py CHANGED Viewed

@@ -180,7 +180,7 @@ class EinsumDense(Layer):
         # quantized to int8 or int4, because `quantized_build` has created the
         # appropriate kernel variable. For other modes (e.g., float8 or no
         # quantization), we still need the floating-point kernel.
-        if self.quantization_mode not in ("int8", "int4", "gptq"):
+        if self.quantization_mode not in ("int8", "int4", "gptq", "awq"):
             # If the layer is quantized to int8, `self._kernel` will be added
             # in `self._int8_build`. Therefore, we skip it here.
             self._kernel = self.add_weight(
@@ -219,15 +219,17 @@ class EinsumDense(Layer):
         mode = self.quantization_mode
         is_gptq = mode == "gptq"
+        is_awq = mode == "awq"
         is_int4 = mode == "int4"
-        calibrated = bool(getattr(self, "is_gptq_calibrated", False))
+        gptq_calibrated = bool(getattr(self, "is_gptq_calibrated", False))
+        awq_calibrated = bool(getattr(self, "is_awq_calibrated", False))
         gptq_bits = (
             gptq_core.get_weight_bits_for_layer(self, None) if is_gptq else None
         )
         # Decide the source tensor first (packed vs already-quantized vs plain
         # kernel)
-        if is_gptq and calibrated and gptq_bits != 4:
+        if is_gptq and gptq_calibrated and gptq_bits != 4:
             # calibrated GPTQ, not 4-bit, no unpacking needed
             kernel = self.quantized_kernel
         else:
@@ -241,13 +243,21 @@ class EinsumDense(Layer):
                     self._orig_length_along_pack_axis,
                     self._int4_pack_axis,
                 )
-            elif is_gptq and calibrated and gptq_bits == 4:
+            elif is_gptq and gptq_calibrated and gptq_bits == 4:
                 kernel = quantizers.unpack_int4(
                     self.quantized_kernel,
                     orig_len=self.gptq_unpacked_column_size,
                     axis=0,
                     dtype="uint8",
                 )
+            elif is_awq and awq_calibrated:
+                # AWQ always uses 4-bit quantization
+                kernel = quantizers.unpack_int4(
+                    self.quantized_kernel,
+                    orig_len=self.awq_unpacked_column_size,
+                    axis=0,
+                    dtype="uint8",
+                )
         # Apply LoRA if enabled
         if self.lora_enabled:
@@ -362,8 +372,9 @@ class EinsumDense(Layer):
         if mode not in self.variable_serialization_spec:
             raise self._quantization_mode_error(mode)
-        # A saved GPTQ quantized model will always be calibrated.
+        # A saved GPTQ/AWQ quantized model will always be calibrated.
         self.is_gptq_calibrated = mode == "gptq"
+        self.is_awq_calibrated = mode == "awq"
         idx = 0
         for name in self.variable_serialization_spec[mode]:
@@ -459,6 +470,14 @@ class EinsumDense(Layer):
                 "kernel_zero",
                 "g_idx",
             ],
+            "awq": [
+                "bias",
+                "quantized_kernel",
+                "kernel_scale",
+                "kernel_zero",
+                "awq_scales",
+                "g_idx",
+            ],
         }
     def quantized_build(self, kernel_shape, mode, config=None):
@@ -470,6 +489,8 @@ class EinsumDense(Layer):
             self._float8_build()
         elif mode == "gptq":
             self._gptq_build(kernel_shape, config)
+        elif mode == "awq":
+            self._awq_build(kernel_shape, config)
         else:
             raise self._quantization_mode_error(mode)
         self._is_quantized = True
@@ -616,6 +637,127 @@ class EinsumDense(Layer):
             y = self.activation(y)
         return y
+    def _awq_build(self, kernel_shape, config):
+        """Build variables for AWQ quantization.
+        AWQ uses 4-bit quantization with per-channel AWQ scales that protect
+        salient weights based on activation magnitudes.
+        """
+        from keras.src.quantizers import awq_core
+        # Ensures the forward pass uses the original high-precision kernel
+        # until calibration has been performed.
+        self.is_awq_calibrated = False
+        self.original_kernel_shape = kernel_shape
+        if len(kernel_shape) == 2:
+            rows = kernel_shape[0]
+            columns = kernel_shape[1]
+        elif len(kernel_shape) == 3:
+            shape = list(self.original_kernel_shape)
+            d_model_dim_index = shape.index(max(shape))
+            if d_model_dim_index == 0:  # QKV projection case
+                in_features, heads, head_dim = shape
+                rows, columns = (
+                    in_features,
+                    heads * head_dim,
+                )
+            elif d_model_dim_index in [1, 2]:  # Attention Output case
+                heads, head_dim, out_features = shape
+                rows, columns = (
+                    heads * head_dim,
+                    out_features,
+                )
+            else:
+                raise ValueError("Could not determine row/column split.")
+        else:
+            raise ValueError("AWQ quantization only supports 2D or 3D kernels.")
+        group_size = awq_core.get_group_size_for_layer(self, config)
+        num_groups = 1 if group_size == -1 else math.ceil(rows / group_size)
+        self.awq_unpacked_column_size = columns
+        # For 4-bit weights, we pack two values per byte.
+        kernel_columns = (columns + 1) // 2
+        self._set_quantization_info()
+        self.quantized_kernel = self.add_weight(
+            name="kernel",
+            shape=(kernel_columns, rows),
+            initializer="zeros",
+            dtype="uint8",
+            trainable=False,
+        )
+        self.kernel_scale = self.add_weight(
+            name="kernel_scale",
+            shape=(columns, num_groups),
+            initializer="ones",
+            trainable=False,
+        )
+        self.kernel_zero = self.add_weight(
+            name="zero_point",
+            shape=(columns, num_groups),
+            initializer="zeros",
+            dtype="uint8",
+            trainable=False,
+        )
+        # Per-channel AWQ scales from activation magnitudes
+        self.awq_scales = self.add_weight(
+            name="awq_scales",
+            shape=(rows,),
+            initializer="ones",
+            trainable=False,
+        )
+        self.g_idx = self.add_weight(
+            name="g_idx",
+            shape=(rows,),
+            initializer="zeros",
+            dtype="float32",
+            trainable=False,
+        )
+    def _awq_call(self, inputs, training=False):
+        """Forward pass for AWQ quantized layer."""
+        if not self.is_awq_calibrated:
+            W = self._kernel
+        else:
+            # Unpack 4-bit weights
+            W = quantizers.unpack_int4(
+                self.quantized_kernel,
+                orig_len=self.awq_unpacked_column_size,
+                axis=0,
+                dtype="uint8",
+            )
+            # Dequantize using scale/zero maps
+            W = dequantize_with_sz_map(
+                W,
+                self.kernel_scale,
+                self.kernel_zero,
+                self.g_idx,
+            )
+            W = ops.transpose(W)
+            # Apply AWQ scales by dividing to restore original magnitude
+            # (We multiplied by scales before quantization, so divide to undo)
+            # awq_scales has shape [input_dim], W has shape [input_dim, out_dim]
+            # Expand dims for proper broadcasting.
+            W = ops.divide(W, ops.expand_dims(self.awq_scales, -1))
+            W = ops.reshape(W, self.original_kernel_shape)
+        y = ops.einsum(self.equation, inputs, W)
+        if self.bias is not None:
+            y = ops.add(y, self.bias)
+        if self.activation is not None:
+            y = self.activation(y)
+        return y
     def _int4_build(self, kernel_shape, config=None):
         """Build variables for int4 quantization.
@@ -1010,7 +1152,7 @@ class EinsumDense(Layer):
         self.quantization_config = config
         kernel_shape = self._kernel.shape
-        if mode in ("int8", "int4", "gptq"):
+        if mode in ("int8", "int4", "gptq", "awq"):
             self._set_quantization_info()
         if mode == "int8":
@@ -1058,6 +1200,8 @@ class EinsumDense(Layer):
             policy_name = mode
             if mode == "gptq":
                 policy_name = self.quantization_config.dtype_policy_string()
+            elif mode == "awq":
+                policy_name = self.quantization_config.dtype_policy_string()
             policy = dtype_policies.get(
                 f"{policy_name}_from_{self.dtype_policy.name}"
             )
@@ -1121,7 +1265,7 @@ class EinsumDense(Layer):
                     This is `None` if the layer is not quantized.
         """
         # If not a quantized layer, return the full-precision kernel directly.
-        if self.dtype_policy.quantization_mode in (None, "gptq"):
+        if self.dtype_policy.quantization_mode in (None, "gptq", "awq"):
             return self.kernel, None
         # If quantized but LoRA is not enabled, return the original quantized

keras/src/layers/core/embedding.py CHANGED Viewed

@@ -514,7 +514,7 @@ class Embedding(Layer):
                 `embeddings_scale`: The quantization scale for the merged
                     embeddings. This is `None` if the layer is not quantized.
         """
-        if self.dtype_policy.quantization_mode in (None, "gptq"):
+        if self.dtype_policy.quantization_mode in (None, "gptq", "awq"):
             return self.embeddings, None
         embeddings_value = self._embeddings

keras/src/layers/layer.py CHANGED Viewed

@@ -1337,6 +1337,8 @@ class Layer(BackendLayer, Operation):
             return self._int4_call(*args, **kwargs)
         elif self.quantization_mode == "gptq":
             return self._gptq_call(*args, **kwargs)
+        elif self.quantization_mode == "awq":
+            return self._awq_call(*args, **kwargs)
         else:
             raise self._quantization_mode_error(self.quantization_mode)
@@ -1352,6 +1354,9 @@ class Layer(BackendLayer, Operation):
     def _gptq_call(self, *args, **kwargs):
         raise self._not_implemented_error(self._gptq_call)
+    def _awq_call(self, *args, **kwargs):
+        raise self._not_implemented_error(self._awq_call)
     def _not_implemented_error(self, attr, msg=None):
         if callable(attr):
             attr_name = attr.__name__

keras/src/models/model.py CHANGED Viewed

@@ -9,6 +9,7 @@ from keras.src import utils
 from keras.src.api_export import keras_export
 from keras.src.layers.layer import Layer
 from keras.src.models.variable_mapping import map_saveable_variables
+from keras.src.quantizers.awq_core import awq_quantize
 from keras.src.quantizers.gptq_core import gptq_quantize
 from keras.src.quantizers.utils import should_quantize_layer
 from keras.src.saving import saving_api
@@ -547,7 +548,7 @@ class Model(Trainer, base_trainer.Trainer, Layer):
                 except AttributeError:
                     pass
-        if mode == "gptq":
+        if mode in ["gptq", "awq"]:
             # Resolve model structure.
             # 1. If quantization_layer_structure is provided inside the config,
             # use that.
@@ -559,14 +560,17 @@ class Model(Trainer, base_trainer.Trainer, Layer):
             if structure is None:
                 raise ValueError(
-                    "For 'gptq' mode, a valid quantization structure must be "
+                    f"For {mode=}, a valid quantization structure must be "
                     "provided either via `config.quantization_layer_structure` "
                     "or by overriding "
                     "`model.get_quantization_layer_structure(mode)`. The "
                     "structure should be a dictionary with keys "
                     "'pre_block_layers' and 'sequential_blocks'."
                 )
-            gptq_quantize(config, structure, filters=filters)
+            if mode == "gptq":
+                gptq_quantize(config, structure, filters=filters)
+            elif mode == "awq":
+                awq_quantize(config, structure, filters=filters)
         # If any layer was changed, we must rebuild the execution functions.
         if graph_modified:

keras/src/ops/numpy.py CHANGED Viewed

@@ -7802,6 +7802,15 @@ def correlate(x1, x2, mode="valid"):
     Returns:
         Output tensor, cross-correlation of `x1` and `x2`.
+    Notes:
+        Complex-valued inputs are currently not fully supported on the
+        TensorFlow and PyTorch backends. When complex tensors are passed,
+        they are cast to floating-point types and the imaginary component
+        is discarded.
+        This behavior is documented for clarity and may change in the
+        future. See discussion in issue #21617.
     """
     if any_symbolic_tensors((x1, x2)):
         return Correlate(mode=mode).symbolic_call(x1, x2)

keras/src/quantizers/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import inspect
 from keras.src.api_export import keras_export
+from keras.src.quantizers.awq_config import AWQConfig
 from keras.src.quantizers.quantization_config import Float8QuantizationConfig
 from keras.src.quantizers.quantization_config import Int4QuantizationConfig
 from keras.src.quantizers.quantization_config import Int8QuantizationConfig
@@ -24,6 +25,7 @@ ALL_OBJECTS = {
     Int8QuantizationConfig,
     Int4QuantizationConfig,
     Float8QuantizationConfig,
+    AWQConfig,
 }
 ALL_OBJECTS_DICT = {cls.__name__: cls for cls in ALL_OBJECTS}
 ALL_OBJECTS_DICT.update(

keras-nightly 3.14.0.dev2026011404__py3-none-any.whl → 3.14.0.dev2026011504__py3-none-any.whl

keras-nightly 3.14.0.dev2026011404py3-none-any.whl → 3.14.0.dev2026011504py3-none-any.whl