PyPI - keras-nightly - Versions diffs - 3.14.0.dev2025122704__py3-none-any.whl → 3.14.0.dev2026012204__py3-none-any.whl - Mend

keras-nightly 3.14.0.dev2025122704py3-none-any.whl → 3.14.0.dev2026012204py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/ops/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +3 -0
keras/_tf_keras/keras/quantizers/__init__.py +1 -0
keras/dtype_policies/__init__.py +3 -0
keras/ops/__init__.py +3 -0
keras/ops/numpy/__init__.py +3 -0
keras/quantizers/__init__.py +1 -0
keras/src/backend/jax/nn.py +26 -9
keras/src/backend/jax/numpy.py +16 -0
keras/src/backend/numpy/numpy.py +23 -0
keras/src/backend/openvino/numpy.py +369 -16
keras/src/backend/tensorflow/numpy.py +34 -1
keras/src/backend/tensorflow/rnn.py +17 -7
keras/src/backend/torch/numpy.py +36 -0
keras/src/backend/torch/rnn.py +28 -11
keras/src/callbacks/orbax_checkpoint.py +75 -42
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +90 -1
keras/src/layers/core/dense.py +122 -6
keras/src/layers/core/einsum_dense.py +151 -7
keras/src/layers/core/embedding.py +1 -1
keras/src/layers/core/reversible_embedding.py +10 -1
keras/src/layers/layer.py +5 -0
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/center_crop.py +13 -15
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/losses/losses.py +24 -0
keras/src/models/model.py +18 -9
keras/src/ops/image.py +109 -96
keras/src/ops/numpy.py +181 -0
keras/src/quantizers/__init__.py +2 -0
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +1 -2
keras/src/quantizers/gptq_core.py +1 -1
keras/src/quantizers/quantization_config.py +14 -0
keras/src/quantizers/quantizers.py +61 -52
keras/src/random/seed_generator.py +2 -2
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +50 -0
keras/src/saving/saving_api.py +37 -14
keras/src/utils/jax_layer.py +69 -31
keras/src/utils/module_utils.py +11 -0
keras/src/utils/tracking.py +5 -5
keras/src/version.py +1 -1
{keras_nightly-3.14.0.dev2025122704.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/METADATA +1 -1
{keras_nightly-3.14.0.dev2025122704.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/RECORD +53 -49
{keras_nightly-3.14.0.dev2025122704.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/WHEEL +1 -1
{keras_nightly-3.14.0.dev2025122704.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/top_level.txt +0 -0

keras/src/callbacks/orbax_checkpoint.py CHANGED Viewed

@@ -8,7 +8,6 @@ from keras.src.api_export import keras_export
 from keras.src.callbacks.monitor_callback import (
     MonitorCallback,  # For metric monitoring logic
 )
-from keras.src.utils.io_utils import print_msg
 from keras.src.utils.module_utils import ocp
 # Context and AsyncOptions are accessed through the lazy-loaded ocp module
@@ -62,6 +61,11 @@ class OrbaxCheckpoint(MonitorCallback):
     This callback saves the model's weights and optimizer state asynchronously
     using Orbax, allowing training to continue without blocking for I/O.
+    **Multi-host Support**: When running in a multi-host distributed training
+    environment with JAX backend, this callback automatically coordinates
+    checkpointing across all hosts to ensure consistency and proper
+    synchronization. Multi-host checkpointing is only supported on JAX.
     Example:
     ```python
@@ -92,10 +96,6 @@ class OrbaxCheckpoint(MonitorCallback):
         verbose: Verbosity mode, 0 or 1.
         save_best_only: if `save_best_only=True`, it only saves when the model
             is considered the "best" based on the monitored quantity.
-        save_weights_only: if `save_weights_only=True`, only the model's
-            weights will be saved. Otherwise, the full model state
-            (weights, non-trainable variables, optimizer state, and
-            metrics state) will be saved. Defaults to False.
         mode: one of {'auto', 'min', 'max'}. Used with `save_best_only`.
         save_freq: `'epoch'` or integer. Frequency to save checkpoints.
         max_to_keep: Integer, maximum number of recent checkpoints to keep.
@@ -112,7 +112,6 @@ class OrbaxCheckpoint(MonitorCallback):
         monitor="val_loss",
         verbose=0,
         save_best_only=False,
-        save_weights_only=False,
         mode="auto",
         save_freq="epoch",
         initial_value_threshold=None,
@@ -129,7 +128,6 @@ class OrbaxCheckpoint(MonitorCallback):
         self.directory = directory
         self.verbose = verbose
         self.save_best_only = save_best_only
-        self.save_weights_only = save_weights_only
         self.save_freq = save_freq
         self.max_to_keep = max_to_keep
         self.save_on_background = save_on_background
@@ -138,6 +136,9 @@ class OrbaxCheckpoint(MonitorCallback):
         self._current_epoch = 0  # Keep track of epoch
         self._total_batches_seen = 0  # Global batch counter for step tracking
+        # Multi-host support
+        self._multihost_initialized = self._is_multihost_initialized()
         if self.save_freq != "epoch" and not isinstance(self.save_freq, int):
             raise ValueError(
                 f"Unrecognized save_freq: {self.save_freq}. "
@@ -151,14 +152,18 @@ class OrbaxCheckpoint(MonitorCallback):
                 ocp.training.preservation_policies.LatestN(max_to_keep)
             )
-        # Use AnyPreservationPolicy to combine them.
+        # Use AnyPreservationPolicy to combine them, or use directly
+        # if single policy
         preservation_policy = None
         if policies:
-            preservation_policy = (
-                ocp.training.preservation_policies.AnyPreservationPolicy(
-                    policies
+            if len(policies) == 1:
+                preservation_policy = policies[0]
+            else:
+                preservation_policy = (
+                    ocp.training.preservation_policies.AnyPreservationPolicy(
+                        policies
+                    )
                 )
-            )
         # Create the V1 Checkpointer with direct parameter passing
         # Orbax will handle directory creation on all processes as needed
@@ -167,6 +172,54 @@ class OrbaxCheckpoint(MonitorCallback):
             preservation_policy=preservation_policy,
         )
+    def _is_multihost_initialized(self):
+        """Check if multi-host environment is initialized."""
+        # Multi-host checkpointing is only supported on JAX backend
+        if backend.backend() != "jax":
+            return False
+        multihost = ocp.multihost
+        # Check if JAX distributed client is initialized
+        # (indicates multihost setup)
+        return multihost.is_jax_distributed_client_initialized()
+    def _sync_processes(self, key=None):
+        """Synchronize all processes across hosts."""
+        if not self._multihost_initialized:
+            return  # No-op for single host
+        multihost = ocp.multihost
+        sync_key = key or "orbax_checkpoint_sync"
+        multihost.sync_global_processes(sync_key)
+    def is_multihost_enabled(self):
+        """Return True if multi-host checkpointing is enabled and initialized.
+        This method can be used to check if the callback is operating in
+        a multi-host distributed training environment. Multi-host checkpointing
+        is only supported on JAX backend.
+        Returns:
+            bool: True if multi-host support is active, False otherwise.
+        """
+        return self._multihost_initialized
+    def is_primary_host(self):
+        """Return True if this process is the primary host in multi-host setup.
+        In multi-host environments, only the primary host typically handles
+        logging and coordination tasks. Multi-host checkpointing is only
+        supported on JAX backend.
+        Returns:
+            bool: True if this is the primary host, False otherwise.
+            Always returns True in single-host environments.
+        """
+        if not self._multihost_initialized:
+            return True  # Single host is always primary
+        multihost = ocp.multihost
+        return multihost.is_primary_host()
     def _should_save_on_batch(self, batch):
         """Check if we should save on this batch."""
         if self.save_freq == "epoch":
@@ -186,32 +239,14 @@ class OrbaxCheckpoint(MonitorCallback):
         return False
     def _save_checkpoint(self, step, logs=None):
-        """Save a checkpoint at the given step."""
+        """Save a checkpoint at the given step with multi-host coordination."""
         # --- Prepare Composite State (Backend-Agnostic) ---
         state_tree = _get_state_tree(self.model)
         # Save the nested state structures directly (preserving layer
         # names and structure)
-        if self.save_weights_only:
-            composite_state = {
-                "trainable_variables": state_tree["trainable_variables"],
-            }
-            if "non_trainable_variables" in state_tree:
-                composite_state["non_trainable_variables"] = state_tree[
-                    "non_trainable_variables"
-                ]
-        else:
-            composite_state = state_tree
-        # --- Save Logic (V1 API) ---
-        # All processes participate in distributed checkpointing
-        # Checkpointer is configured to save unconditionally when
-        # save_pytree is called
-        if self.verbose > 0:
-            print_msg(
-                f"OrbaxCheckpoint: Triggering async save for step {step}..."
-            )
+        composite_state = state_tree
         # Use a single with statement. If context_options is empty,
         # Context() uses defaults.
@@ -282,18 +317,16 @@ class OrbaxCheckpoint(MonitorCallback):
         except Exception:
             pass  # Ignore errors during cleanup
+        # Multi-host synchronization: ensure all hosts complete cleanup
+        self._sync_processes("checkpoint_cleanup")
     def wait_until_finished(self):
         """Wait for any in-progress checkpoint operations to complete.
         This method blocks until all asynchronous checkpoint save operations
-        have completed. It should be called before attempting to load
-        checkpoints if there might be pending save operations.
+        have completed across all hosts in a multi-host setup.
         """
-        # Wait for any async operations to complete
-        if hasattr(self.checkpointer, "wait"):
-            self.checkpointer.wait()
-        else:
-            # Fallback for older Orbax versions that don't have wait() method
-            while self.checkpointer.is_saving_in_progress():
-                import time
+        # Wait for any async operations to complete on this host
+        self.checkpointer.wait()
-                time.sleep(0.1)
+        # Multi-host synchronization: ensure all hosts complete
+        self._sync_processes("checkpoint_wait_complete")

keras/src/dtype_policies/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@ from keras.src import backend
 from keras.src.api_export import keras_export
 from keras.src.dtype_policies import dtype_policy
 from keras.src.dtype_policies.dtype_policy import QUANTIZATION_MODES
+from keras.src.dtype_policies.dtype_policy import AWQDTypePolicy
 from keras.src.dtype_policies.dtype_policy import DTypePolicy
 from keras.src.dtype_policies.dtype_policy import FloatDTypePolicy
 from keras.src.dtype_policies.dtype_policy import GPTQDTypePolicy
@@ -10,6 +11,7 @@ from keras.src.dtype_policies.dtype_policy import QuantizedFloat8DTypePolicy
 from keras.src.dtype_policies.dtype_policy_map import DTypePolicyMap
 ALL_OBJECTS = {
+    AWQDTypePolicy,
     DTypePolicy,
     FloatDTypePolicy,
     QuantizedDTypePolicy,

keras/src/dtype_policies/dtype_policy.py CHANGED Viewed

@@ -3,7 +3,7 @@ from keras.src import ops
 from keras.src.api_export import keras_export
 from keras.src.backend.common import global_state
-QUANTIZATION_MODES = ("int8", "float8", "int4", "gptq")
+QUANTIZATION_MODES = ("int8", "float8", "int4", "gptq", "awq")
 @keras_export(
@@ -376,6 +376,93 @@ class GPTQDTypePolicy(QuantizedDTypePolicy):
         return config
+@keras_export("keras.dtype_policies.AWQDTypePolicy")
+class AWQDTypePolicy(QuantizedDTypePolicy):
+    """Quantized dtype policy for AWQ quantization.
+    This policy helps propagate quantization settings for AWQ
+    when loading an AWQ quantized model in Keras format.
+    Args:
+        mode: The quantization mode. This should be a string in the format
+            `"awq/<weight_bits>/<group_size>"`.
+            -   `"awq"`: The identifier for the quantization algorithm.
+            -   `<weight_bits>`: Number of bits to quantize weights to.
+                AWQ presently only supports 4-bit quantization.
+            -   `<group_size>`: The group size for quantization. Supported
+                values are -1 (for per-channel quantization) or any
+                positive integer.
+            Example: `"awq/4/128"`.
+        source_name: The source dtype policy name, e.g. "float32".
+    """
+    def __init__(
+        self,
+        mode,
+        source_name=None,
+    ):
+        parts = mode.split("/")
+        expected_format = "'awq/<weight_bits>/<group_size>'"
+        # Validate format.
+        if len(parts) != 3 or parts[0] != "awq":
+            raise ValueError(
+                "Invalid mode for AWQDTypePolicy. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # Validate and cast weight_bits and group_size.
+        try:
+            weight_bits = int(parts[1])
+            group_size = int(parts[2])
+        except ValueError:
+            raise ValueError(
+                "Invalid mode for AWQDTypePolicy. <weight_bits> and "
+                "<group_size> must be integers. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # AWQ presently only supports 4-bit quantization.
+        if weight_bits != 4:
+            raise ValueError(
+                "Invalid weight_bits in mode. AWQ only supports 4-bit "
+                f"quantization, but got {weight_bits} from '{mode}'."
+            )
+        if group_size < -1 or group_size == 0:
+            raise ValueError(
+                "Invalid group_size in mode. Supported values are "
+                "-1 (per-channel) or a positive integer, "
+                f"but got {group_size} from '{mode}'."
+            )
+        base_mode = parts[0]
+        super().__init__(
+            mode=base_mode,
+            source_name=source_name,
+        )
+        self._name = f"{mode}_from_{source_name}"
+        self.mode = base_mode
+        self.weight_bits = weight_bits
+        self.group_size = group_size
+    def __eq__(self, other):
+        if super().__eq__(other) is False:
+            return False
+        return (
+            self.weight_bits == other.weight_bits
+            and self.group_size == other.group_size
+        )
+    def get_config(self):
+        config = super().get_config()
+        # Reconstruct the full mode string for serialization
+        mode = f"{self.mode}/{self.weight_bits}/{self.group_size}"
+        config.update({"mode": mode})
+        return config
 @keras_export(
     [
         "keras.config.set_dtype_policy",
@@ -442,6 +529,8 @@ def _get_quantized_dtype_policy_by_str(policy):
         return QuantizedDTypePolicy(mode, source_name)
     elif policy.startswith("gptq"):
         return GPTQDTypePolicy(mode, source_name)
+    elif policy.startswith("awq"):
+        return AWQDTypePolicy(mode, source_name)
     elif policy.startswith("float8"):
         return QuantizedFloat8DTypePolicy(mode, source_name)
     else:

keras/src/layers/core/dense.py CHANGED Viewed

@@ -128,7 +128,7 @@ class Dense(Layer):
                 mode=self.quantization_mode,
                 config=self.quantization_config,
             )
-        if self.quantization_mode not in ("int8", "int4", "gptq"):
+        if self.quantization_mode not in ("int8", "int4", "gptq", "awq"):
             # If the layer is quantized to int8 or int4, `self._kernel` will be
             # added in `self._int8_build` or `_int4_build`. Therefore, we skip
             # it here.
@@ -165,15 +165,17 @@ class Dense(Layer):
         mode = self.quantization_mode
         is_gptq = mode == "gptq"
+        is_awq = mode == "awq"
         is_int4 = mode == "int4"
-        calibrated = bool(getattr(self, "is_gptq_calibrated", False))
+        gptq_calibrated = bool(getattr(self, "is_gptq_calibrated", False))
+        awq_calibrated = bool(getattr(self, "is_awq_calibrated", False))
         gptq_bits = (
             gptq_core.get_weight_bits_for_layer(self, None) if is_gptq else None
         )
         # Decide the source tensor first (packed vs already-quantized vs plain
         # kernel)
-        if is_gptq and calibrated and gptq_bits != 4:
+        if is_gptq and gptq_calibrated and gptq_bits != 4:
             # calibrated GPTQ, not 4-bit, no unpacking needed
             kernel = self.quantized_kernel
         else:
@@ -183,7 +185,15 @@ class Dense(Layer):
             # Handle int4 unpacking cases in one place
             if is_int4:
                 kernel = quantizers.unpack_int4(kernel, self._orig_input_dim)
-            elif is_gptq and calibrated and gptq_bits == 4:
+            elif is_gptq and gptq_calibrated and gptq_bits == 4:
+                kernel = quantizers.unpack_int4(
+                    self.quantized_kernel,
+                    orig_len=self.units,
+                    axis=0,
+                    dtype="uint8",
+                )
+            elif is_awq and awq_calibrated:
+                # AWQ always uses 4-bit quantization
                 kernel = quantizers.unpack_int4(
                     self.quantized_kernel,
                     orig_len=self.units,
@@ -304,8 +314,9 @@ class Dense(Layer):
         if mode not in self.variable_serialization_spec:
             raise self._quantization_mode_error(mode)
-        # A saved GPTQ quantized model will always be calibrated.
+        # A saved GPTQ/AWQ quantized model will always be calibrated.
         self.is_gptq_calibrated = mode == "gptq"
+        self.is_awq_calibrated = mode == "awq"
         idx = 0
         for name in self.variable_serialization_spec[mode]:
@@ -395,6 +406,14 @@ class Dense(Layer):
                 "kernel_zero",
                 "g_idx",
             ],
+            "awq": [
+                "bias",
+                "quantized_kernel",
+                "kernel_scale",
+                "kernel_zero",
+                "awq_scales",
+                "g_idx",
+            ],
         }
     def quantized_build(self, kernel_shape, mode, config=None):
@@ -406,6 +425,8 @@ class Dense(Layer):
             self._float8_build()
         elif mode == "gptq":
             self._gptq_build(kernel_shape, config)
+        elif mode == "awq":
+            self._awq_build(kernel_shape, config)
         else:
             raise self._quantization_mode_error(mode)
         self._is_quantized = True
@@ -515,6 +536,97 @@ class Dense(Layer):
             y = self.activation(y)
         return y
+    def _awq_build(self, kernel_shape, config):
+        """Build variables for AWQ quantization.
+        AWQ uses 4-bit quantization with per-channel AWQ scales that protect
+        salient weights based on activation magnitudes.
+        """
+        from keras.src.quantizers import awq_core
+        # Ensures the forward pass uses the original high-precision kernel
+        # until calibration has been performed.
+        self.is_awq_calibrated = False
+        self.kernel_shape = kernel_shape
+        # For 4-bit weights, we pack two values per byte.
+        units = (kernel_shape[1] + 1) // 2
+        self.quantized_kernel = self.add_weight(
+            name="kernel",
+            shape=(units, kernel_shape[0]),
+            initializer="zeros",
+            dtype="uint8",
+            trainable=False,
+        )
+        group_size = awq_core.get_group_size_for_layer(self, config)
+        num_groups = (
+            1 if group_size == -1 else math.ceil(kernel_shape[0] / group_size)
+        )
+        self.kernel_scale = self.add_weight(
+            name="kernel_scale",
+            shape=(self.units, num_groups),
+            initializer="ones",
+            trainable=False,
+        )
+        self.kernel_zero = self.add_weight(
+            name="kernel_zero",
+            shape=(self.units, num_groups),
+            initializer="zeros",
+            dtype="uint8",
+            trainable=False,
+        )
+        # Per-channel AWQ scales from activation magnitudes
+        self.awq_scales = self.add_weight(
+            name="awq_scales",
+            shape=(kernel_shape[0],),
+            initializer="ones",
+            trainable=False,
+        )
+        self.g_idx = self.add_weight(
+            name="g_idx",
+            shape=(kernel_shape[0],),
+            initializer="zeros",
+            dtype="float32",
+            trainable=False,
+        )
+    def _awq_call(self, inputs, training=False):
+        """Forward pass for AWQ quantized layer."""
+        if not self.is_awq_calibrated:
+            W = self._kernel
+        else:
+            # Unpack 4-bit weights
+            W = quantizers.unpack_int4(
+                self.quantized_kernel,
+                orig_len=self.units,
+                axis=0,
+                dtype="uint8",
+            )
+            # Dequantize using scale/zero maps
+            W = ops.transpose(
+                dequantize_with_sz_map(
+                    W,
+                    self.kernel_scale,
+                    self.kernel_zero,
+                    self.g_idx,
+                )
+            )
+            # Apply AWQ scales by dividing to restore original magnitude
+            # (We multiplied by scales before quantization, so divide to undo)
+            # awq_scales has shape [input_dim], W has shape [input_dim, units]
+            # Expand dims for proper broadcasting.
+            W = ops.divide(W, ops.expand_dims(self.awq_scales, -1))
+        y = ops.matmul(inputs, W)
+        if self.bias is not None:
+            y = ops.add(y, self.bias)
+        if self.activation is not None:
+            y = self.activation(y)
+        return y
     def _int4_build(self, kernel_shape, config=None):
         """Build variables for int4 quantization.
@@ -835,6 +947,8 @@ class Dense(Layer):
             self.kernel_scale.assign(kernel_scale)
         elif mode == "gptq":
             self.quantized_build(kernel_shape, mode, self.quantization_config)
+        elif mode == "awq":
+            self.quantized_build(kernel_shape, mode, self.quantization_config)
         elif mode == "float8":
             self.quantized_build(kernel_shape, mode)
         else:
@@ -847,6 +961,8 @@ class Dense(Layer):
             policy_name = mode
             if mode == "gptq":
                 policy_name = self.quantization_config.dtype_policy_string()
+            elif mode == "awq":
+                policy_name = self.quantization_config.dtype_policy_string()
             policy = dtype_policies.get(
                 f"{policy_name}_from_{self.dtype_policy.name}"
             )
@@ -881,7 +997,7 @@ class Dense(Layer):
                 `kernel_scale`: The quantization scale for the merged kernel.
                     This is `None` if the layer is not quantized.
         """
-        if self.dtype_policy.quantization_mode in (None, "gptq"):
+        if self.dtype_policy.quantization_mode in (None, "gptq", "awq"):
             return self.kernel, None
         kernel_value = self._kernel

keras-nightly 3.14.0.dev2025122704__py3-none-any.whl → 3.14.0.dev2026012204__py3-none-any.whl

keras-nightly 3.14.0.dev2025122704py3-none-any.whl → 3.14.0.dev2026012204py3-none-any.whl