PyPI - keras-nightly - Versions diffs - 3.14.0.dev2026010104__py3-none-any.whl → 3.14.0.dev2026012204__py3-none-any.whl - Mend

keras-nightly 3.14.0.dev2026010104py3-none-any.whl → 3.14.0.dev2026012204py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/ops/__init__.py +2 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +2 -0
keras/_tf_keras/keras/quantizers/__init__.py +1 -0
keras/dtype_policies/__init__.py +3 -0
keras/ops/__init__.py +2 -0
keras/ops/numpy/__init__.py +2 -0
keras/quantizers/__init__.py +1 -0
keras/src/backend/jax/nn.py +26 -9
keras/src/backend/jax/numpy.py +10 -0
keras/src/backend/numpy/numpy.py +15 -0
keras/src/backend/openvino/numpy.py +338 -17
keras/src/backend/tensorflow/numpy.py +24 -1
keras/src/backend/tensorflow/rnn.py +17 -7
keras/src/backend/torch/numpy.py +26 -0
keras/src/backend/torch/rnn.py +28 -11
keras/src/callbacks/orbax_checkpoint.py +75 -42
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +90 -1
keras/src/layers/core/dense.py +122 -6
keras/src/layers/core/einsum_dense.py +151 -7
keras/src/layers/core/embedding.py +1 -1
keras/src/layers/core/reversible_embedding.py +10 -1
keras/src/layers/layer.py +5 -0
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/center_crop.py +13 -15
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/losses/losses.py +24 -0
keras/src/models/model.py +18 -9
keras/src/ops/image.py +106 -93
keras/src/ops/numpy.py +138 -0
keras/src/quantizers/__init__.py +2 -0
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +1 -2
keras/src/quantizers/gptq_core.py +1 -1
keras/src/quantizers/quantization_config.py +14 -0
keras/src/quantizers/quantizers.py +61 -52
keras/src/random/seed_generator.py +2 -2
keras/src/saving/orbax_util.py +50 -0
keras/src/saving/saving_api.py +37 -14
keras/src/utils/jax_layer.py +69 -31
keras/src/utils/module_utils.py +11 -0
keras/src/utils/tracking.py +5 -5
keras/src/version.py +1 -1
{keras_nightly-3.14.0.dev2026010104.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/METADATA +1 -1
{keras_nightly-3.14.0.dev2026010104.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/RECORD +52 -48
{keras_nightly-3.14.0.dev2026010104.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/WHEEL +1 -1
{keras_nightly-3.14.0.dev2026010104.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/top_level.txt +0 -0

keras/src/backend/tensorflow/numpy.py CHANGED Viewed

@@ -2125,6 +2125,22 @@ def moveaxis(x, source, destination):
     return tf.transpose(x, perm)
+def nansum(x, axis=None, keepdims=False):
+    x = convert_to_tensor(x)
+    dtype = standardize_dtype(x.dtype)
+    x_clean = tf.where(
+        tf.math.is_nan(cast(x, config.floatx())), tf.zeros((), dtype=dtype), x
+    )
+    if dtype in ("bool", "int8", "int16"):
+        dtype = "int32"
+    elif dtype in ("uint8", "uint16"):
+        dtype = "uint32"
+    x_clean = cast(x_clean, dtype)
+    return tf.reduce_sum(x_clean, axis=axis, keepdims=keepdims)
 def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     x = convert_to_tensor(x)
@@ -2151,7 +2167,7 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
 def ndim(x):
     x = convert_to_tensor(x)
-    return x.ndim
+    return x.shape.rank
 def nonzero(x):
@@ -2215,6 +2231,13 @@ def prod(x, axis=None, keepdims=False, dtype=None):
     return tf.reduce_prod(x, axis=axis, keepdims=keepdims)
+def ptp(x, axis=None, keepdims=False):
+    x = convert_to_tensor(x)
+    return tf.reduce_max(x, axis=axis, keepdims=keepdims) - tf.reduce_min(
+        x, axis=axis, keepdims=keepdims
+    )
 def _quantile(x, q, axis=None, method="linear", keepdims=False):
     # ref: tfp.stats.percentile
     # float64 is needed here and below, else we get the wrong index if the array

keras/src/backend/tensorflow/rnn.py CHANGED Viewed

@@ -539,11 +539,21 @@ def _do_lstm_arguments_support_cudnn(
 def _has_fully_masked_sequence(mask):
-    # Cudnn kernel will error out if the input sequence contains any
-    # fully masked data. We walk around this issue by rerouting the computation
-    # to standard kernel, until the issue on cudnn side has been fixed.  For a
-    # fully masked sequence, it will contain all Falses. To make it easy to
-    # check, we inverse the boolean, check if any of the sequence has all True.
+    """Check if input sequence contains any fully masked data.
+    cuDNN kernel will error out if the input sequence contains any fully masked
+    data. We work around this issue by rerouting the computation to the
+    standard kernel until the issue on the cuDNN side has been fixed. For a
+    fully masked sequence, it will contain all `False` values. To make it easy
+    to check, we invert the boolean and check if any of the sequences has all
+    `True` values.
+    Args:
+        mask: The mask tensor.
+    Returns:
+        A boolean tensor, `True` if the mask contains a fully masked sequence.
+    """
     return tf.reduce_any(
         tf.reduce_all(tf.logical_not(tf.cast(mask, dtype="bool")), axis=1)
     )
@@ -900,8 +910,8 @@ def _cudnn_lstm(
     if tf.sysconfig.get_build_info()["is_rocm_build"]:
         # ROCm MIOpen's weight sequence for LSTM is different from both
-        # canonical and Cudnn format
-        # MIOpen: [i, f, o, c] Cudnn/Canonical: [i, f, c, o]
+        # canonical and cuDNN format
+        # MIOpen: [i, f, o, c] cuDNN/Canonical: [i, f, c, o]
         # i is input gate weights.
         # f is forget gate weights.
         # o is output gate weights.

keras/src/backend/torch/numpy.py CHANGED Viewed

@@ -1272,6 +1272,20 @@ def moveaxis(x, source, destination):
     return torch.moveaxis(x, source=source, destination=destination)
+def nansum(x, axis=None, keepdims=False):
+    if isinstance(x, (list, tuple)):
+        x = stack(x)
+    x = convert_to_tensor(x)
+    dtype = standardize_dtype(x.dtype)
+    if dtype in ("bool", "uint8", "int8", "int16"):
+        dtype = "int32"
+    if axis == () or axis == []:
+        return cast(torch.nan_to_num(x, nan=0), dtype)
+    return cast(torch.nansum(x, dim=axis, keepdim=keepdims), dtype)
 def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     x = convert_to_tensor(x)
     return torch.nan_to_num(x, nan=nan, posinf=posinf, neginf=neginf)
@@ -1382,6 +1396,18 @@ def prod(x, axis=None, keepdims=False, dtype=None):
     return x
+def ptp(x, axis=None, keepdims=False):
+    x = convert_to_tensor(x)
+    if axis is None:
+        return x.max() - x.min()
+    elif axis == ():
+        return torch.zeros_like(x)
+    else:
+        return torch.amax(x, dim=axis, keepdim=keepdims) - torch.amin(
+            x, dim=axis, keepdim=keepdims
+        )
 def quantile(x, q, axis=None, method="linear", keepdims=False):
     x = convert_to_tensor(x)
     q = convert_to_tensor(q)

keras/src/backend/torch/rnn.py CHANGED Viewed

@@ -413,11 +413,21 @@ def _is_sequence_right_padded(mask):
 def _has_fully_masked_sequence(mask):
-    # Cudnn kernel will error out if the input sequence contains any
-    # fully masked data. We walk around this issue by rerouting the computation
-    # to standard kernel, until the issue on cudnn side has been fixed.  For a
-    # fully masked sequence, it will contain all Falses. To make it easy to
-    # check, we inverse the boolean, check if any of the sequence has all True.
+    """Check if input sequence contains any fully masked data.
+    cuDNN kernel will error out if the input sequence contains any fully masked
+    data. We work around this issue by rerouting the computation to the
+    standard kernel until the issue on the cuDNN side has been fixed. For a
+    fully masked sequence, it will contain all `False` values. To make it easy
+    to check, we invert the boolean and check if any of the sequences has all
+    `True` values.
+    Args:
+        mask: The mask tensor.
+    Returns:
+        A boolean tensor, `True` if the mask contains a fully masked sequence.
+    """
     return torch.any(torch.all(~mask, dim=1))
@@ -447,8 +457,8 @@ def _compute_sequence_length_from_mask(mask, batch_first):
     The masking tensor is a 2D boolean tensor with shape [batch, timestep]. For
     any timestep that should be masked, the corresponding field will be False.
     Consider the following example:
-      a = [[True, True, False, False]
-           [True, True, True, False]]
+        a = [[True, True, False, False]
+             [True, True, True, False]]
     It is a (2, 4) tensor, and the corresponding sequence length result should
     be 1D tensor with value [2, 3]. Note that the masking tensor must be right
     padded that could be checked by, e.g., `is_sequence_right_padded()`.
@@ -467,12 +477,19 @@ def _compute_sequence_length_from_mask(mask, batch_first):
 def prepare_lstm_weights(lstm, kernel, recurrent_kernel, bias, device):
-    """Copies kernel and recurrent kernel weights in the Pytorch format
+    """Copies kernel and recurrent kernel weights into the PyTorch format.
     We split the kernel and recurrent kernel weights, create associated
-    torch tensors adapted to be in line with the Cudnn optimization.
-    After we have copied the weights, we ensure the paramters are on
-    the same device and memory layout is optimized for Cudnn.
+    torch tensors adapted to be in line with the cuDNN optimization.
+    After we have copied the weights, we ensure the parameters are on
+    the same device and memory layout is optimized for cuDNN.
+    Args:
+        lstm: The PyTorch LSTM layer to prepare weights for.
+        kernel: The kernel weights tensor.
+        recurrent_kernel: The recurrent kernel weights tensor.
+        bias: The bias tensor.
+        device: The device to place the tensors on.
     """
     lstm = lstm.to(device)

keras/src/callbacks/orbax_checkpoint.py CHANGED Viewed

@@ -8,7 +8,6 @@ from keras.src.api_export import keras_export
 from keras.src.callbacks.monitor_callback import (
     MonitorCallback,  # For metric monitoring logic
 )
-from keras.src.utils.io_utils import print_msg
 from keras.src.utils.module_utils import ocp
 # Context and AsyncOptions are accessed through the lazy-loaded ocp module
@@ -62,6 +61,11 @@ class OrbaxCheckpoint(MonitorCallback):
     This callback saves the model's weights and optimizer state asynchronously
     using Orbax, allowing training to continue without blocking for I/O.
+    **Multi-host Support**: When running in a multi-host distributed training
+    environment with JAX backend, this callback automatically coordinates
+    checkpointing across all hosts to ensure consistency and proper
+    synchronization. Multi-host checkpointing is only supported on JAX.
     Example:
     ```python
@@ -92,10 +96,6 @@ class OrbaxCheckpoint(MonitorCallback):
         verbose: Verbosity mode, 0 or 1.
         save_best_only: if `save_best_only=True`, it only saves when the model
             is considered the "best" based on the monitored quantity.
-        save_weights_only: if `save_weights_only=True`, only the model's
-            weights will be saved. Otherwise, the full model state
-            (weights, non-trainable variables, optimizer state, and
-            metrics state) will be saved. Defaults to False.
         mode: one of {'auto', 'min', 'max'}. Used with `save_best_only`.
         save_freq: `'epoch'` or integer. Frequency to save checkpoints.
         max_to_keep: Integer, maximum number of recent checkpoints to keep.
@@ -112,7 +112,6 @@ class OrbaxCheckpoint(MonitorCallback):
         monitor="val_loss",
         verbose=0,
         save_best_only=False,
-        save_weights_only=False,
         mode="auto",
         save_freq="epoch",
         initial_value_threshold=None,
@@ -129,7 +128,6 @@ class OrbaxCheckpoint(MonitorCallback):
         self.directory = directory
         self.verbose = verbose
         self.save_best_only = save_best_only
-        self.save_weights_only = save_weights_only
         self.save_freq = save_freq
         self.max_to_keep = max_to_keep
         self.save_on_background = save_on_background
@@ -138,6 +136,9 @@ class OrbaxCheckpoint(MonitorCallback):
         self._current_epoch = 0  # Keep track of epoch
         self._total_batches_seen = 0  # Global batch counter for step tracking
+        # Multi-host support
+        self._multihost_initialized = self._is_multihost_initialized()
         if self.save_freq != "epoch" and not isinstance(self.save_freq, int):
             raise ValueError(
                 f"Unrecognized save_freq: {self.save_freq}. "
@@ -151,14 +152,18 @@ class OrbaxCheckpoint(MonitorCallback):
                 ocp.training.preservation_policies.LatestN(max_to_keep)
             )
-        # Use AnyPreservationPolicy to combine them.
+        # Use AnyPreservationPolicy to combine them, or use directly
+        # if single policy
         preservation_policy = None
         if policies:
-            preservation_policy = (
-                ocp.training.preservation_policies.AnyPreservationPolicy(
-                    policies
+            if len(policies) == 1:
+                preservation_policy = policies[0]
+            else:
+                preservation_policy = (
+                    ocp.training.preservation_policies.AnyPreservationPolicy(
+                        policies
+                    )
                 )
-            )
         # Create the V1 Checkpointer with direct parameter passing
         # Orbax will handle directory creation on all processes as needed
@@ -167,6 +172,54 @@ class OrbaxCheckpoint(MonitorCallback):
             preservation_policy=preservation_policy,
         )
+    def _is_multihost_initialized(self):
+        """Check if multi-host environment is initialized."""
+        # Multi-host checkpointing is only supported on JAX backend
+        if backend.backend() != "jax":
+            return False
+        multihost = ocp.multihost
+        # Check if JAX distributed client is initialized
+        # (indicates multihost setup)
+        return multihost.is_jax_distributed_client_initialized()
+    def _sync_processes(self, key=None):
+        """Synchronize all processes across hosts."""
+        if not self._multihost_initialized:
+            return  # No-op for single host
+        multihost = ocp.multihost
+        sync_key = key or "orbax_checkpoint_sync"
+        multihost.sync_global_processes(sync_key)
+    def is_multihost_enabled(self):
+        """Return True if multi-host checkpointing is enabled and initialized.
+        This method can be used to check if the callback is operating in
+        a multi-host distributed training environment. Multi-host checkpointing
+        is only supported on JAX backend.
+        Returns:
+            bool: True if multi-host support is active, False otherwise.
+        """
+        return self._multihost_initialized
+    def is_primary_host(self):
+        """Return True if this process is the primary host in multi-host setup.
+        In multi-host environments, only the primary host typically handles
+        logging and coordination tasks. Multi-host checkpointing is only
+        supported on JAX backend.
+        Returns:
+            bool: True if this is the primary host, False otherwise.
+            Always returns True in single-host environments.
+        """
+        if not self._multihost_initialized:
+            return True  # Single host is always primary
+        multihost = ocp.multihost
+        return multihost.is_primary_host()
     def _should_save_on_batch(self, batch):
         """Check if we should save on this batch."""
         if self.save_freq == "epoch":
@@ -186,32 +239,14 @@ class OrbaxCheckpoint(MonitorCallback):
         return False
     def _save_checkpoint(self, step, logs=None):
-        """Save a checkpoint at the given step."""
+        """Save a checkpoint at the given step with multi-host coordination."""
         # --- Prepare Composite State (Backend-Agnostic) ---
         state_tree = _get_state_tree(self.model)
         # Save the nested state structures directly (preserving layer
         # names and structure)
-        if self.save_weights_only:
-            composite_state = {
-                "trainable_variables": state_tree["trainable_variables"],
-            }
-            if "non_trainable_variables" in state_tree:
-                composite_state["non_trainable_variables"] = state_tree[
-                    "non_trainable_variables"
-                ]
-        else:
-            composite_state = state_tree
-        # --- Save Logic (V1 API) ---
-        # All processes participate in distributed checkpointing
-        # Checkpointer is configured to save unconditionally when
-        # save_pytree is called
-        if self.verbose > 0:
-            print_msg(
-                f"OrbaxCheckpoint: Triggering async save for step {step}..."
-            )
+        composite_state = state_tree
         # Use a single with statement. If context_options is empty,
         # Context() uses defaults.
@@ -282,18 +317,16 @@ class OrbaxCheckpoint(MonitorCallback):
         except Exception:
             pass  # Ignore errors during cleanup
+        # Multi-host synchronization: ensure all hosts complete cleanup
+        self._sync_processes("checkpoint_cleanup")
     def wait_until_finished(self):
         """Wait for any in-progress checkpoint operations to complete.
         This method blocks until all asynchronous checkpoint save operations
-        have completed. It should be called before attempting to load
-        checkpoints if there might be pending save operations.
+        have completed across all hosts in a multi-host setup.
         """
-        # Wait for any async operations to complete
-        if hasattr(self.checkpointer, "wait"):
-            self.checkpointer.wait()
-        else:
-            # Fallback for older Orbax versions that don't have wait() method
-            while self.checkpointer.is_saving_in_progress():
-                import time
+        # Wait for any async operations to complete on this host
+        self.checkpointer.wait()
-                time.sleep(0.1)
+        # Multi-host synchronization: ensure all hosts complete
+        self._sync_processes("checkpoint_wait_complete")

keras/src/dtype_policies/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@ from keras.src import backend
 from keras.src.api_export import keras_export
 from keras.src.dtype_policies import dtype_policy
 from keras.src.dtype_policies.dtype_policy import QUANTIZATION_MODES
+from keras.src.dtype_policies.dtype_policy import AWQDTypePolicy
 from keras.src.dtype_policies.dtype_policy import DTypePolicy
 from keras.src.dtype_policies.dtype_policy import FloatDTypePolicy
 from keras.src.dtype_policies.dtype_policy import GPTQDTypePolicy
@@ -10,6 +11,7 @@ from keras.src.dtype_policies.dtype_policy import QuantizedFloat8DTypePolicy
 from keras.src.dtype_policies.dtype_policy_map import DTypePolicyMap
 ALL_OBJECTS = {
+    AWQDTypePolicy,
     DTypePolicy,
     FloatDTypePolicy,
     QuantizedDTypePolicy,

keras/src/dtype_policies/dtype_policy.py CHANGED Viewed

@@ -3,7 +3,7 @@ from keras.src import ops
 from keras.src.api_export import keras_export
 from keras.src.backend.common import global_state
-QUANTIZATION_MODES = ("int8", "float8", "int4", "gptq")
+QUANTIZATION_MODES = ("int8", "float8", "int4", "gptq", "awq")
 @keras_export(
@@ -376,6 +376,93 @@ class GPTQDTypePolicy(QuantizedDTypePolicy):
         return config
+@keras_export("keras.dtype_policies.AWQDTypePolicy")
+class AWQDTypePolicy(QuantizedDTypePolicy):
+    """Quantized dtype policy for AWQ quantization.
+    This policy helps propagate quantization settings for AWQ
+    when loading an AWQ quantized model in Keras format.
+    Args:
+        mode: The quantization mode. This should be a string in the format
+            `"awq/<weight_bits>/<group_size>"`.
+            -   `"awq"`: The identifier for the quantization algorithm.
+            -   `<weight_bits>`: Number of bits to quantize weights to.
+                AWQ presently only supports 4-bit quantization.
+            -   `<group_size>`: The group size for quantization. Supported
+                values are -1 (for per-channel quantization) or any
+                positive integer.
+            Example: `"awq/4/128"`.
+        source_name: The source dtype policy name, e.g. "float32".
+    """
+    def __init__(
+        self,
+        mode,
+        source_name=None,
+    ):
+        parts = mode.split("/")
+        expected_format = "'awq/<weight_bits>/<group_size>'"
+        # Validate format.
+        if len(parts) != 3 or parts[0] != "awq":
+            raise ValueError(
+                "Invalid mode for AWQDTypePolicy. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # Validate and cast weight_bits and group_size.
+        try:
+            weight_bits = int(parts[1])
+            group_size = int(parts[2])
+        except ValueError:
+            raise ValueError(
+                "Invalid mode for AWQDTypePolicy. <weight_bits> and "
+                "<group_size> must be integers. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # AWQ presently only supports 4-bit quantization.
+        if weight_bits != 4:
+            raise ValueError(
+                "Invalid weight_bits in mode. AWQ only supports 4-bit "
+                f"quantization, but got {weight_bits} from '{mode}'."
+            )
+        if group_size < -1 or group_size == 0:
+            raise ValueError(
+                "Invalid group_size in mode. Supported values are "
+                "-1 (per-channel) or a positive integer, "
+                f"but got {group_size} from '{mode}'."
+            )
+        base_mode = parts[0]
+        super().__init__(
+            mode=base_mode,
+            source_name=source_name,
+        )
+        self._name = f"{mode}_from_{source_name}"
+        self.mode = base_mode
+        self.weight_bits = weight_bits
+        self.group_size = group_size
+    def __eq__(self, other):
+        if super().__eq__(other) is False:
+            return False
+        return (
+            self.weight_bits == other.weight_bits
+            and self.group_size == other.group_size
+        )
+    def get_config(self):
+        config = super().get_config()
+        # Reconstruct the full mode string for serialization
+        mode = f"{self.mode}/{self.weight_bits}/{self.group_size}"
+        config.update({"mode": mode})
+        return config
 @keras_export(
     [
         "keras.config.set_dtype_policy",
@@ -442,6 +529,8 @@ def _get_quantized_dtype_policy_by_str(policy):
         return QuantizedDTypePolicy(mode, source_name)
     elif policy.startswith("gptq"):
         return GPTQDTypePolicy(mode, source_name)
+    elif policy.startswith("awq"):
+        return AWQDTypePolicy(mode, source_name)
     elif policy.startswith("float8"):
         return QuantizedFloat8DTypePolicy(mode, source_name)
     else:

keras-nightly 3.14.0.dev2026010104__py3-none-any.whl → 3.14.0.dev2026012204__py3-none-any.whl

keras-nightly 3.14.0.dev2026010104py3-none-any.whl → 3.14.0.dev2026012204py3-none-any.whl