PyPI - keras-nightly - Versions diffs - 3.14.0.dev2026011304__py3-none-any.whl → 3.14.0.dev2026011504__py3-none-any.whl - Mend

keras-nightly 3.14.0.dev2026011304py3-none-any.whl → 3.14.0.dev2026011504py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/quantizers/__init__.py +1 -0
keras/dtype_policies/__init__.py +3 -0
keras/quantizers/__init__.py +1 -0
keras/src/backend/openvino/numpy.py +145 -7
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +90 -1
keras/src/layers/core/dense.py +122 -6
keras/src/layers/core/einsum_dense.py +151 -7
keras/src/layers/core/embedding.py +1 -1
keras/src/layers/layer.py +5 -0
keras/src/models/model.py +7 -3
keras/src/ops/numpy.py +9 -0
keras/src/quantizers/__init__.py +2 -0
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +1 -2
keras/src/quantizers/gptq_core.py +1 -1
keras/src/quantizers/quantization_config.py +14 -0
keras/src/quantizers/quantizers.py +61 -52
keras/src/version.py +1 -1
{keras_nightly-3.14.0.dev2026011304.dist-info → keras_nightly-3.14.0.dev2026011504.dist-info}/METADATA +1 -1
{keras_nightly-3.14.0.dev2026011304.dist-info → keras_nightly-3.14.0.dev2026011504.dist-info}/RECORD +26 -23
{keras_nightly-3.14.0.dev2026011304.dist-info → keras_nightly-3.14.0.dev2026011504.dist-info}/WHEEL +0 -0
{keras_nightly-3.14.0.dev2026011304.dist-info → keras_nightly-3.14.0.dev2026011504.dist-info}/top_level.txt +0 -0

keras/_tf_keras/keras/dtype_policies/__init__.py CHANGED Viewed

@@ -7,6 +7,9 @@ since your modifications would be overwritten.
 from keras.src.dtype_policies import deserialize as deserialize
 from keras.src.dtype_policies import get as get
 from keras.src.dtype_policies import serialize as serialize
+from keras.src.dtype_policies.dtype_policy import (
+    AWQDTypePolicy as AWQDTypePolicy,
+)
 from keras.src.dtype_policies.dtype_policy import DTypePolicy as DTypePolicy
 from keras.src.dtype_policies.dtype_policy import (
     FloatDTypePolicy as FloatDTypePolicy,

keras/_tf_keras/keras/quantizers/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ since your modifications would be overwritten.
 from keras.src.quantizers import deserialize as deserialize
 from keras.src.quantizers import get as get
 from keras.src.quantizers import serialize as serialize
+from keras.src.quantizers.awq_config import AWQConfig as AWQConfig
 from keras.src.quantizers.gptq_config import GPTQConfig as GPTQConfig
 from keras.src.quantizers.quantization_config import (
     Float8QuantizationConfig as Float8QuantizationConfig,

keras/dtype_policies/__init__.py CHANGED Viewed

@@ -7,6 +7,9 @@ since your modifications would be overwritten.
 from keras.src.dtype_policies import deserialize as deserialize
 from keras.src.dtype_policies import get as get
 from keras.src.dtype_policies import serialize as serialize
+from keras.src.dtype_policies.dtype_policy import (
+    AWQDTypePolicy as AWQDTypePolicy,
+)
 from keras.src.dtype_policies.dtype_policy import DTypePolicy as DTypePolicy
 from keras.src.dtype_policies.dtype_policy import (
     FloatDTypePolicy as FloatDTypePolicy,

keras/quantizers/__init__.py CHANGED Viewed

@@ -7,6 +7,7 @@ since your modifications would be overwritten.
 from keras.src.quantizers import deserialize as deserialize
 from keras.src.quantizers import get as get
 from keras.src.quantizers import serialize as serialize
+from keras.src.quantizers.awq_config import AWQConfig as AWQConfig
 from keras.src.quantizers.gptq_config import GPTQConfig as GPTQConfig
 from keras.src.quantizers.quantization_config import (
     Float8QuantizationConfig as Float8QuantizationConfig,

keras/src/backend/openvino/numpy.py CHANGED Viewed

@@ -1126,22 +1126,28 @@ def expm1(x):
 def flip(x, axis=None):
     x_node = get_ov_output(x)
-    ndim = x.ndim
+    # Using OpenVINO tensor shape
+    ndim = len(x_node.get_partial_shape())
     if ndim is None:
         raise ValueError(
-            "The `flip` operation does not support tensors with dynamic rank"
+            "The `flip` operation does not support tensors with dynamic rank "
             "for the OpenVINO backend."
         )
     if axis is None:
         axis = list(range(ndim))
     elif isinstance(axis, int):
         axis = [axis]
     axis = [a + ndim if a < 0 else a for a in axis]
     begin = [0] * ndim
     end = [0] * ndim
     strides = [1] * ndim
     for a in axis:
         strides[a] = -1
     all_ones_mask = [1] * ndim
     result = ov_opset.strided_slice(
         data=x_node,
@@ -1154,6 +1160,61 @@ def flip(x, axis=None):
     return OpenVINOKerasTensor(result.output(0))
+def rot90(array, k=1, axes=(0, 1)):
+    """Rotate an array by 90 degrees in the plane specified by axes."""
+    array = get_ov_output(array)
+    if not isinstance(axes, (tuple, list)) or len(axes) != 2:
+        raise ValueError("axes must be a tuple of length 2")
+    shape = array.get_partial_shape()
+    ndim = shape.rank.get_length()
+    if ndim is None:
+        raise ValueError(
+            "`rot90` does not support tensors with dynamic rank "
+            "for the OpenVINO backend."
+        )
+    axis1 = canonicalize_axis(axes[0], ndim)
+    axis2 = canonicalize_axis(axes[1], ndim)
+    if axis1 == axis2:
+        raise ValueError("axes must be different")
+    k = k % 4
+    if k == 0:
+        return OpenVINOKerasTensor(array)
+    result = array
+    for _ in range(k):
+        # 1️ Transpose axis1 <-> axis2
+        perm = list(range(ndim))
+        perm[axis1], perm[axis2] = perm[axis2], perm[axis1]
+        perm_const = ov_opset.constant(perm, Type.i32).output(0)
+        result = ov_opset.transpose(result, perm_const).output(0)
+        # 2️ Reverse along axis1 using StridedSlice
+        begin = [0] * ndim
+        end = [0] * ndim
+        strides = [1] * ndim
+        strides[axis1] = -1
+        begin_mask = [1] * ndim
+        end_mask = [1] * ndim
+        result = ov_opset.strided_slice(
+            data=result,
+            begin=begin,
+            end=end,
+            strides=strides,
+            begin_mask=begin_mask,
+            end_mask=end_mask,
+        ).output(0)
+    return OpenVINOKerasTensor(result)
 def floor(x):
     x = get_ov_output(x)
     x_type = x.get_element_type()
@@ -1394,7 +1455,66 @@ def isreal(x):
 def kron(x1, x2):
-    raise NotImplementedError("`kron` is not supported with openvino backend")
+    x1 = get_ov_output(x1)
+    x2 = get_ov_output(x2)
+    x1, x2 = _align_operand_types(x1, x2, "kron()")
+    x1_shape = x1.get_partial_shape()
+    x2_shape = x2.get_partial_shape()
+    if x1_shape.rank.is_dynamic or x2_shape.rank.is_dynamic:
+        raise ValueError(
+            "`kron` does not support tensors with dynamic rank for "
+            "the OpenVINO backend."
+        )
+    ndim1 = x1_shape.rank.get_length()
+    ndim2 = x2_shape.rank.get_length()
+    if ndim1 < ndim2:
+        axes = ov_opset.range(
+            ov_opset.constant(0, Type.i32),
+            ov_opset.constant(ndim2 - ndim1, Type.i32),
+            ov_opset.constant(1, Type.i32),
+        )
+        x1 = ov_opset.unsqueeze(x1, axes)
+        ndim1 = ndim2
+    elif ndim2 < ndim1:
+        axes = ov_opset.range(
+            ov_opset.constant(0, Type.i32),
+            ov_opset.constant(ndim1 - ndim2, Type.i32),
+            ov_opset.constant(1, Type.i32),
+        )
+        x2 = ov_opset.unsqueeze(x2, axes)
+        ndim2 = ndim1
+    shape1 = ov_opset.shape_of(x1, Type.i32)
+    shape2 = ov_opset.shape_of(x2, Type.i32)
+    ones = ov_opset.broadcast(
+        ov_opset.constant(1, Type.i32), ov_opset.constant([ndim1], Type.i32)
+    )
+    axis = ov_opset.constant(1, Type.i32)
+    flatten = ov_opset.constant([-1], Type.i32)
+    unsqueezed_ones = ov_opset.unsqueeze(ones, axis)
+    x1_new_shape = ov_opset.reshape(
+        ov_opset.concat(
+            [ov_opset.unsqueeze(shape1, axis), unsqueezed_ones],
+            axis=1,
+        ),
+        flatten,
+        False,
+    )
+    x2_new_shape = ov_opset.reshape(
+        ov_opset.concat(
+            [unsqueezed_ones, ov_opset.unsqueeze(shape2, axis)],
+            axis=1,
+        ),
+        flatten,
+        False,
+    )
+    result = ov_opset.multiply(
+        ov_opset.reshape(x1, x1_new_shape, False),
+        ov_opset.reshape(x2, x2_new_shape, False),
+    )
+    result = ov_opset.reshape(
+        result, ov_opset.multiply(shape1, shape2), False
+    ).output(0)
+    return OpenVINOKerasTensor(result)
 def lcm(x1, x2):
@@ -2226,7 +2346,14 @@ def sinh(x):
 def size(x):
-    raise NotImplementedError("`size` is not supported with openvino backend")
+    x = get_ov_output(x)
+    shape_tensor = ov_opset.shape_of(x, output_type=Type.i64)
+    final_size = ov_opset.reduce_prod(
+        shape_tensor,
+        ov_opset.constant([0], Type.i64),
+        keep_dims=False,
+    )
+    return OpenVINOKerasTensor(final_size.output(0))
 def sort(x, axis=-1):
@@ -2368,9 +2495,20 @@ def std(x, axis=None, keepdims=False):
 def swapaxes(x, axis1, axis2):
-    raise NotImplementedError(
-        "`swapaxes` is not supported with openvino backend"
-    )
+    x = get_ov_output(x)
+    x_shape = x.get_partial_shape()
+    if x_shape.rank.is_dynamic:
+        raise ValueError(
+            "`swapaxes` does not support tensors with dynamic rank for the "
+            "OpenVINO backend."
+        )
+    rank = x_shape.rank.get_length()
+    axis1 = canonicalize_axis(axis1, rank)
+    axis2 = canonicalize_axis(axis2, rank)
+    axes = list(range(rank))
+    axes[axis1], axes[axis2] = axes[axis2], axes[axis1]
+    result = ov_opset.transpose(x, ov_opset.constant(axes, Type.i32))
+    return OpenVINOKerasTensor(result.output(0))
 def take(x, indices, axis=None):

keras/src/dtype_policies/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@ from keras.src import backend
 from keras.src.api_export import keras_export
 from keras.src.dtype_policies import dtype_policy
 from keras.src.dtype_policies.dtype_policy import QUANTIZATION_MODES
+from keras.src.dtype_policies.dtype_policy import AWQDTypePolicy
 from keras.src.dtype_policies.dtype_policy import DTypePolicy
 from keras.src.dtype_policies.dtype_policy import FloatDTypePolicy
 from keras.src.dtype_policies.dtype_policy import GPTQDTypePolicy
@@ -10,6 +11,7 @@ from keras.src.dtype_policies.dtype_policy import QuantizedFloat8DTypePolicy
 from keras.src.dtype_policies.dtype_policy_map import DTypePolicyMap
 ALL_OBJECTS = {
+    AWQDTypePolicy,
     DTypePolicy,
     FloatDTypePolicy,
     QuantizedDTypePolicy,

keras/src/dtype_policies/dtype_policy.py CHANGED Viewed

@@ -3,7 +3,7 @@ from keras.src import ops
 from keras.src.api_export import keras_export
 from keras.src.backend.common import global_state
-QUANTIZATION_MODES = ("int8", "float8", "int4", "gptq")
+QUANTIZATION_MODES = ("int8", "float8", "int4", "gptq", "awq")
 @keras_export(
@@ -376,6 +376,93 @@ class GPTQDTypePolicy(QuantizedDTypePolicy):
         return config
+@keras_export("keras.dtype_policies.AWQDTypePolicy")
+class AWQDTypePolicy(QuantizedDTypePolicy):
+    """Quantized dtype policy for AWQ quantization.
+    This policy helps propagate quantization settings for AWQ
+    when loading an AWQ quantized model in Keras format.
+    Args:
+        mode: The quantization mode. This should be a string in the format
+            `"awq/<weight_bits>/<group_size>"`.
+            -   `"awq"`: The identifier for the quantization algorithm.
+            -   `<weight_bits>`: Number of bits to quantize weights to.
+                AWQ presently only supports 4-bit quantization.
+            -   `<group_size>`: The group size for quantization. Supported
+                values are -1 (for per-channel quantization) or any
+                positive integer.
+            Example: `"awq/4/128"`.
+        source_name: The source dtype policy name, e.g. "float32".
+    """
+    def __init__(
+        self,
+        mode,
+        source_name=None,
+    ):
+        parts = mode.split("/")
+        expected_format = "'awq/<weight_bits>/<group_size>'"
+        # Validate format.
+        if len(parts) != 3 or parts[0] != "awq":
+            raise ValueError(
+                "Invalid mode for AWQDTypePolicy. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # Validate and cast weight_bits and group_size.
+        try:
+            weight_bits = int(parts[1])
+            group_size = int(parts[2])
+        except ValueError:
+            raise ValueError(
+                "Invalid mode for AWQDTypePolicy. <weight_bits> and "
+                "<group_size> must be integers. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # AWQ presently only supports 4-bit quantization.
+        if weight_bits != 4:
+            raise ValueError(
+                "Invalid weight_bits in mode. AWQ only supports 4-bit "
+                f"quantization, but got {weight_bits} from '{mode}'."
+            )
+        if group_size < -1 or group_size == 0:
+            raise ValueError(
+                "Invalid group_size in mode. Supported values are "
+                "-1 (per-channel) or a positive integer, "
+                f"but got {group_size} from '{mode}'."
+            )
+        base_mode = parts[0]
+        super().__init__(
+            mode=base_mode,
+            source_name=source_name,
+        )
+        self._name = f"{mode}_from_{source_name}"
+        self.mode = base_mode
+        self.weight_bits = weight_bits
+        self.group_size = group_size
+    def __eq__(self, other):
+        if super().__eq__(other) is False:
+            return False
+        return (
+            self.weight_bits == other.weight_bits
+            and self.group_size == other.group_size
+        )
+    def get_config(self):
+        config = super().get_config()
+        # Reconstruct the full mode string for serialization
+        mode = f"{self.mode}/{self.weight_bits}/{self.group_size}"
+        config.update({"mode": mode})
+        return config
 @keras_export(
     [
         "keras.config.set_dtype_policy",
@@ -442,6 +529,8 @@ def _get_quantized_dtype_policy_by_str(policy):
         return QuantizedDTypePolicy(mode, source_name)
     elif policy.startswith("gptq"):
         return GPTQDTypePolicy(mode, source_name)
+    elif policy.startswith("awq"):
+        return AWQDTypePolicy(mode, source_name)
     elif policy.startswith("float8"):
         return QuantizedFloat8DTypePolicy(mode, source_name)
     else:

keras/src/layers/core/dense.py CHANGED Viewed

@@ -128,7 +128,7 @@ class Dense(Layer):
                 mode=self.quantization_mode,
                 config=self.quantization_config,
             )
-        if self.quantization_mode not in ("int8", "int4", "gptq"):
+        if self.quantization_mode not in ("int8", "int4", "gptq", "awq"):
             # If the layer is quantized to int8 or int4, `self._kernel` will be
             # added in `self._int8_build` or `_int4_build`. Therefore, we skip
             # it here.
@@ -165,15 +165,17 @@ class Dense(Layer):
         mode = self.quantization_mode
         is_gptq = mode == "gptq"
+        is_awq = mode == "awq"
         is_int4 = mode == "int4"
-        calibrated = bool(getattr(self, "is_gptq_calibrated", False))
+        gptq_calibrated = bool(getattr(self, "is_gptq_calibrated", False))
+        awq_calibrated = bool(getattr(self, "is_awq_calibrated", False))
         gptq_bits = (
             gptq_core.get_weight_bits_for_layer(self, None) if is_gptq else None
         )
         # Decide the source tensor first (packed vs already-quantized vs plain
         # kernel)
-        if is_gptq and calibrated and gptq_bits != 4:
+        if is_gptq and gptq_calibrated and gptq_bits != 4:
             # calibrated GPTQ, not 4-bit, no unpacking needed
             kernel = self.quantized_kernel
         else:
@@ -183,7 +185,15 @@ class Dense(Layer):
             # Handle int4 unpacking cases in one place
             if is_int4:
                 kernel = quantizers.unpack_int4(kernel, self._orig_input_dim)
-            elif is_gptq and calibrated and gptq_bits == 4:
+            elif is_gptq and gptq_calibrated and gptq_bits == 4:
+                kernel = quantizers.unpack_int4(
+                    self.quantized_kernel,
+                    orig_len=self.units,
+                    axis=0,
+                    dtype="uint8",
+                )
+            elif is_awq and awq_calibrated:
+                # AWQ always uses 4-bit quantization
                 kernel = quantizers.unpack_int4(
                     self.quantized_kernel,
                     orig_len=self.units,
@@ -304,8 +314,9 @@ class Dense(Layer):
         if mode not in self.variable_serialization_spec:
             raise self._quantization_mode_error(mode)
-        # A saved GPTQ quantized model will always be calibrated.
+        # A saved GPTQ/AWQ quantized model will always be calibrated.
         self.is_gptq_calibrated = mode == "gptq"
+        self.is_awq_calibrated = mode == "awq"
         idx = 0
         for name in self.variable_serialization_spec[mode]:
@@ -395,6 +406,14 @@ class Dense(Layer):
                 "kernel_zero",
                 "g_idx",
             ],
+            "awq": [
+                "bias",
+                "quantized_kernel",
+                "kernel_scale",
+                "kernel_zero",
+                "awq_scales",
+                "g_idx",
+            ],
         }
     def quantized_build(self, kernel_shape, mode, config=None):
@@ -406,6 +425,8 @@ class Dense(Layer):
             self._float8_build()
         elif mode == "gptq":
             self._gptq_build(kernel_shape, config)
+        elif mode == "awq":
+            self._awq_build(kernel_shape, config)
         else:
             raise self._quantization_mode_error(mode)
         self._is_quantized = True
@@ -515,6 +536,97 @@ class Dense(Layer):
             y = self.activation(y)
         return y
+    def _awq_build(self, kernel_shape, config):
+        """Build variables for AWQ quantization.
+        AWQ uses 4-bit quantization with per-channel AWQ scales that protect
+        salient weights based on activation magnitudes.
+        """
+        from keras.src.quantizers import awq_core
+        # Ensures the forward pass uses the original high-precision kernel
+        # until calibration has been performed.
+        self.is_awq_calibrated = False
+        self.kernel_shape = kernel_shape
+        # For 4-bit weights, we pack two values per byte.
+        units = (kernel_shape[1] + 1) // 2
+        self.quantized_kernel = self.add_weight(
+            name="kernel",
+            shape=(units, kernel_shape[0]),
+            initializer="zeros",
+            dtype="uint8",
+            trainable=False,
+        )
+        group_size = awq_core.get_group_size_for_layer(self, config)
+        num_groups = (
+            1 if group_size == -1 else math.ceil(kernel_shape[0] / group_size)
+        )
+        self.kernel_scale = self.add_weight(
+            name="kernel_scale",
+            shape=(self.units, num_groups),
+            initializer="ones",
+            trainable=False,
+        )
+        self.kernel_zero = self.add_weight(
+            name="kernel_zero",
+            shape=(self.units, num_groups),
+            initializer="zeros",
+            dtype="uint8",
+            trainable=False,
+        )
+        # Per-channel AWQ scales from activation magnitudes
+        self.awq_scales = self.add_weight(
+            name="awq_scales",
+            shape=(kernel_shape[0],),
+            initializer="ones",
+            trainable=False,
+        )
+        self.g_idx = self.add_weight(
+            name="g_idx",
+            shape=(kernel_shape[0],),
+            initializer="zeros",
+            dtype="float32",
+            trainable=False,
+        )
+    def _awq_call(self, inputs, training=False):
+        """Forward pass for AWQ quantized layer."""
+        if not self.is_awq_calibrated:
+            W = self._kernel
+        else:
+            # Unpack 4-bit weights
+            W = quantizers.unpack_int4(
+                self.quantized_kernel,
+                orig_len=self.units,
+                axis=0,
+                dtype="uint8",
+            )
+            # Dequantize using scale/zero maps
+            W = ops.transpose(
+                dequantize_with_sz_map(
+                    W,
+                    self.kernel_scale,
+                    self.kernel_zero,
+                    self.g_idx,
+                )
+            )
+            # Apply AWQ scales by dividing to restore original magnitude
+            # (We multiplied by scales before quantization, so divide to undo)
+            # awq_scales has shape [input_dim], W has shape [input_dim, units]
+            # Expand dims for proper broadcasting.
+            W = ops.divide(W, ops.expand_dims(self.awq_scales, -1))
+        y = ops.matmul(inputs, W)
+        if self.bias is not None:
+            y = ops.add(y, self.bias)
+        if self.activation is not None:
+            y = self.activation(y)
+        return y
     def _int4_build(self, kernel_shape, config=None):
         """Build variables for int4 quantization.
@@ -835,6 +947,8 @@ class Dense(Layer):
             self.kernel_scale.assign(kernel_scale)
         elif mode == "gptq":
             self.quantized_build(kernel_shape, mode, self.quantization_config)
+        elif mode == "awq":
+            self.quantized_build(kernel_shape, mode, self.quantization_config)
         elif mode == "float8":
             self.quantized_build(kernel_shape, mode)
         else:
@@ -847,6 +961,8 @@ class Dense(Layer):
             policy_name = mode
             if mode == "gptq":
                 policy_name = self.quantization_config.dtype_policy_string()
+            elif mode == "awq":
+                policy_name = self.quantization_config.dtype_policy_string()
             policy = dtype_policies.get(
                 f"{policy_name}_from_{self.dtype_policy.name}"
             )
@@ -881,7 +997,7 @@ class Dense(Layer):
                 `kernel_scale`: The quantization scale for the merged kernel.
                     This is `None` if the layer is not quantized.
         """
-        if self.dtype_policy.quantization_mode in (None, "gptq"):
+        if self.dtype_policy.quantization_mode in (None, "gptq", "awq"):
             return self.kernel, None
         kernel_value = self._kernel

keras-nightly 3.14.0.dev2026011304__py3-none-any.whl → 3.14.0.dev2026011504__py3-none-any.whl

keras-nightly 3.14.0.dev2026011304py3-none-any.whl → 3.14.0.dev2026011504py3-none-any.whl