PyPI - keras-nightly - Versions diffs - 3.14.0.dev2026012804__py3-none-any.whl → 3.14.0.dev2026013004__py3-none-any.whl - Mend

keras-nightly 3.14.0.dev2026012804py3-none-any.whl → 3.14.0.dev2026013004py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/quantizers/__init__.py +3 -0
keras/dtype_policies/__init__.py +3 -0
keras/quantizers/__init__.py +3 -0
keras/src/backend/jax/core.py +12 -2
keras/src/callbacks/orbax_checkpoint.py +41 -8
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +80 -1
keras/src/export/tfsm_layer.py +34 -0
keras/src/layers/core/dense.py +278 -95
keras/src/layers/core/einsum_dense.py +350 -181
keras/src/layers/core/embedding.py +236 -49
keras/src/layers/core/reversible_embedding.py +177 -35
keras/src/layers/preprocessing/discretization.py +30 -1
keras/src/quantizers/__init__.py +6 -0
keras/src/quantizers/quantization_config.py +98 -4
keras/src/quantizers/quantizers.py +262 -32
keras/src/saving/saving_api.py +66 -2
keras/src/version.py +1 -1
{keras_nightly-3.14.0.dev2026012804.dist-info → keras_nightly-3.14.0.dev2026013004.dist-info}/METADATA +1 -1
{keras_nightly-3.14.0.dev2026012804.dist-info → keras_nightly-3.14.0.dev2026013004.dist-info}/RECORD +23 -23
{keras_nightly-3.14.0.dev2026012804.dist-info → keras_nightly-3.14.0.dev2026013004.dist-info}/WHEEL +0 -0
{keras_nightly-3.14.0.dev2026012804.dist-info → keras_nightly-3.14.0.dev2026013004.dist-info}/top_level.txt +0 -0

keras/_tf_keras/keras/dtype_policies/__init__.py CHANGED Viewed

@@ -17,6 +17,9 @@ from keras.src.dtype_policies.dtype_policy import (
 from keras.src.dtype_policies.dtype_policy import (
     GPTQDTypePolicy as GPTQDTypePolicy,
 )
+from keras.src.dtype_policies.dtype_policy import (
+    Int4DTypePolicy as Int4DTypePolicy,
+)
 from keras.src.dtype_policies.dtype_policy import (
     QuantizedDTypePolicy as QuantizedDTypePolicy,
 )

keras/_tf_keras/keras/quantizers/__init__.py CHANGED Viewed

@@ -24,6 +24,9 @@ from keras.src.quantizers.quantization_config import (
 from keras.src.quantizers.quantizers import AbsMaxQuantizer as AbsMaxQuantizer
 from keras.src.quantizers.quantizers import Quantizer as Quantizer
 from keras.src.quantizers.quantizers import abs_max_quantize as abs_max_quantize
+from keras.src.quantizers.quantizers import (
+    abs_max_quantize_grouped_with_zero_point as abs_max_quantize_grouped_with_zero_point,
+)
 from keras.src.quantizers.quantizers import (
     compute_float8_amax_history as compute_float8_amax_history,
 )

keras/dtype_policies/__init__.py CHANGED Viewed

@@ -17,6 +17,9 @@ from keras.src.dtype_policies.dtype_policy import (
 from keras.src.dtype_policies.dtype_policy import (
     GPTQDTypePolicy as GPTQDTypePolicy,
 )
+from keras.src.dtype_policies.dtype_policy import (
+    Int4DTypePolicy as Int4DTypePolicy,
+)
 from keras.src.dtype_policies.dtype_policy import (
     QuantizedDTypePolicy as QuantizedDTypePolicy,
 )

keras/quantizers/__init__.py CHANGED Viewed

@@ -24,6 +24,9 @@ from keras.src.quantizers.quantization_config import (
 from keras.src.quantizers.quantizers import AbsMaxQuantizer as AbsMaxQuantizer
 from keras.src.quantizers.quantizers import Quantizer as Quantizer
 from keras.src.quantizers.quantizers import abs_max_quantize as abs_max_quantize
+from keras.src.quantizers.quantizers import (
+    abs_max_quantize_grouped_with_zero_point as abs_max_quantize_grouped_with_zero_point,
+)
 from keras.src.quantizers.quantizers import (
     compute_float8_amax_history as compute_float8_amax_history,
 )

keras/src/backend/jax/core.py CHANGED Viewed

@@ -98,7 +98,7 @@ if config.is_nnx_enabled():
         ):
             # Ensure 'mutable' is in nnx_metadata, but explicit 'mutable'
             # param takes precedence.
-            nnx_metadata["mutable"] = trainable if mutable is None else mutable
+            nnx_metadata["mutable"] = True if mutable is None else mutable
             # First, initialize a basic nnx.Variable with a dummy value
             # This sets up the NNX variable structure
@@ -603,7 +603,17 @@ def random_seed_dtype():
 def custom_gradient(fun):
-    return jax.custom_gradient(fun=fun)
+    fun_with_custom_gradient = jax.custom_gradient(fun=fun)
+    # Add a wrapper to unwrap variables, otherwise custom_gradient will fail
+    def fun_with_custom_gradient_wrapper(*args, **kwargs):
+        args, kwargs = tree.map_shape_structure(
+            lambda x: x.value if isinstance(x, KerasVariable) else x,
+            (args, kwargs),
+        )
+        return fun_with_custom_gradient(*args, **kwargs)
+    return fun_with_custom_gradient_wrapper
 def remat(f):

keras/src/callbacks/orbax_checkpoint.py CHANGED Viewed

@@ -8,6 +8,7 @@ from keras.src.api_export import keras_export
 from keras.src.callbacks.monitor_callback import (
     MonitorCallback,  # For metric monitoring logic
 )
+from keras.src.saving import saving_lib
 from keras.src.utils.module_utils import ocp
 # Context and AsyncOptions are accessed through the lazy-loaded ocp module
@@ -117,6 +118,7 @@ class OrbaxCheckpoint(MonitorCallback):
         initial_value_threshold=None,
         max_to_keep=1,
         save_on_background=True,
+        save_weights_only=False,
     ):
         # Ensure orbax is available
         ocp.initialize()
@@ -131,10 +133,12 @@ class OrbaxCheckpoint(MonitorCallback):
         self.save_freq = save_freq
         self.max_to_keep = max_to_keep
         self.save_on_background = save_on_background
+        self.save_weights_only = save_weights_only
         self._batches_seen_since_last_saving = 0
         self._last_batch_seen = 0
         self._current_epoch = 0  # Keep track of epoch
         self._total_batches_seen = 0  # Global batch counter for step tracking
+        self._async_futures = []  # Track async save futures
         # Multi-host support
         self._multihost_initialized = self._is_multihost_initialized()
@@ -167,9 +171,14 @@ class OrbaxCheckpoint(MonitorCallback):
         # Create the V1 Checkpointer with direct parameter passing
         # Orbax will handle directory creation on all processes as needed
+        # save_decision_policy is required for proper coordination of
+        # rapid async saves
         self.checkpointer = ocp.training.Checkpointer(
             directory=directory,
             preservation_policy=preservation_policy,
+            save_decision_policy=ocp.training.save_decision_policies.FixedIntervalPolicy(
+                1
+            ),
         )
     def _is_multihost_initialized(self):
@@ -246,15 +255,35 @@ class OrbaxCheckpoint(MonitorCallback):
         # Save the nested state structures directly (preserving layer
         # names and structure)
-        composite_state = state_tree
+        if self.save_weights_only:
+            composite_state = {
+                "trainable_variables": state_tree["trainable_variables"],
+                "non_trainable_variables": state_tree[
+                    "non_trainable_variables"
+                ],
+            }
+        else:
+            composite_state = state_tree
+            # Include model configuration for full model restoration
+            # Use saving_lib helper to properly handle shared objects
+            config_json, _ = saving_lib._serialize_model_as_json(self.model)
+            composite_state["model_config"] = config_json
         # Use a single with statement. If context_options is empty,
         # Context() uses defaults.
         with ocp.Context():
-            if self.save_on_background:
-                self.checkpointer.save_pytree_async(step, composite_state)
-            else:
+            # Determine sync vs async based on save_on_background setting
+            use_sync = not self.save_on_background
+            if use_sync:
+                # Synchronous save
                 self.checkpointer.save_pytree(step, composite_state)
+            else:
+                # Async save
+                future = self.checkpointer.save_pytree_async(
+                    step, composite_state
+                )
+                self._async_futures.append(future)
     def on_train_batch_end(self, batch, logs=None):
         if self._should_save_on_batch(batch):
@@ -306,12 +335,11 @@ class OrbaxCheckpoint(MonitorCallback):
         if should_save:
             # Use epoch number as the step for Orbax save
-            # Keras has already made the save decision - Checkpointer will
-            # save unconditionally
             self._save_checkpoint(step=epoch, logs=logs)
     def on_train_end(self, logs=None):
-        # Close the Checkpointer to ensure all pending saves complete
+        # Close the Checkpointer - this waits for any pending async saves
+        # to complete before closing
         try:
             self.checkpointer.close()
         except Exception:
@@ -325,7 +353,12 @@ class OrbaxCheckpoint(MonitorCallback):
         This method blocks until all asynchronous checkpoint save operations
         have completed across all hosts in a multi-host setup.
         """
-        # Wait for any async operations to complete on this host
+        # Wait for all tracked async futures to complete
+        for future in self._async_futures:
+            future.result()  # Wait for completion
+        self._async_futures.clear()  # Clear completed futures
+        # Wait for any remaining async operations to complete on this host
         self.checkpointer.wait()
         # Multi-host synchronization: ensure all hosts complete

keras/src/dtype_policies/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from keras.src.dtype_policies.dtype_policy import AWQDTypePolicy
 from keras.src.dtype_policies.dtype_policy import DTypePolicy
 from keras.src.dtype_policies.dtype_policy import FloatDTypePolicy
 from keras.src.dtype_policies.dtype_policy import GPTQDTypePolicy
+from keras.src.dtype_policies.dtype_policy import Int4DTypePolicy
 from keras.src.dtype_policies.dtype_policy import QuantizedDTypePolicy
 from keras.src.dtype_policies.dtype_policy import QuantizedFloat8DTypePolicy
 from keras.src.dtype_policies.dtype_policy_map import DTypePolicyMap
@@ -18,6 +19,7 @@ ALL_OBJECTS = {
     QuantizedFloat8DTypePolicy,
     DTypePolicyMap,
     GPTQDTypePolicy,
+    Int4DTypePolicy,
 }
 ALL_OBJECTS_DICT = {cls.__name__: cls for cls in ALL_OBJECTS}

keras/src/dtype_policies/dtype_policy.py CHANGED Viewed

@@ -288,6 +288,79 @@ class QuantizedFloat8DTypePolicy(QuantizedDTypePolicy):
         return config
+@keras_export("keras.dtype_policies.Int4DTypePolicy")
+class Int4DTypePolicy(QuantizedDTypePolicy):
+    """Quantized dtype policy for int4 quantization.
+    This policy helps propagate quantization settings for int4 sub-channel
+    quantization when loading a quantized model in Keras format.
+    Args:
+        mode: The quantization mode. This should be a string in the format
+            `"int4/<block_size>"`.
+            -   `"int4"`: The identifier for the quantization algorithm.
+            -   `<block_size>`: The block size for sub-channel quantization.
+                Use -1 for per-channel (legacy) quantization. Any positive
+                integer enables sub-channel quantization with that block size.
+            Example: `"int4/128"` for sub-channel with 128-element groups.
+        source_name: The source dtype policy name, e.g. "float32".
+    """
+    def __init__(
+        self,
+        mode,
+        source_name=None,
+    ):
+        parts = mode.split("/")
+        expected_format = "'int4/<block_size>'"
+        # Validate format
+        if len(parts) != 2 or parts[0] != "int4":
+            raise ValueError(
+                "Invalid mode for Int4DTypePolicy. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # Validate and cast block_size
+        try:
+            block_size = int(parts[1])
+        except ValueError:
+            raise ValueError(
+                "Invalid mode for Int4DTypePolicy. <block_size> must be an "
+                f"integer. Expected format {expected_format}, but got '{mode}'."
+            )
+        # Validate supported values
+        if block_size < -1 or block_size == 0:
+            raise ValueError(
+                "Invalid block_size in mode. Supported values are "
+                "-1 (per-channel) or a positive integer (sub-channel), "
+                f"but got {block_size} from '{mode}'."
+            )
+        base_mode = parts[0]
+        super().__init__(
+            mode=base_mode,
+            source_name=source_name,
+        )
+        self._name = f"{mode}_from_{source_name}"
+        self.mode = base_mode
+        self.block_size = block_size
+    def __eq__(self, other):
+        if super().__eq__(other) is False:
+            return False
+        return self.block_size == other.block_size
+    def get_config(self):
+        config = super().get_config()
+        # Reconstruct the full mode string for serialization
+        mode = f"{self.mode}/{self.block_size}"
+        config.update({"mode": mode})
+        return config
 @keras_export("keras.dtype_policies.GPTQDTypePolicy")
 class GPTQDTypePolicy(QuantizedDTypePolicy):
     """Quantized dtype policy for GPTQ quantization.
@@ -525,8 +598,14 @@ def _get_quantized_dtype_policy_by_str(policy):
             f"Received: policy={policy}"
         )
     mode, source_name = split_name
-    if policy.startswith("int8") or policy.startswith("int4"):
+    if policy.startswith("int8"):
         return QuantizedDTypePolicy(mode, source_name)
+    elif policy.startswith("int4"):
+        # Check if mode has block_size component (e.g., "int4/128")
+        if "/" in mode:
+            return Int4DTypePolicy(mode, source_name)
+        else:
+            return QuantizedDTypePolicy(mode, source_name)
     elif policy.startswith("gptq"):
         return GPTQDTypePolicy(mode, source_name)
     elif policy.startswith("awq"):

keras/src/export/tfsm_layer.py CHANGED Viewed

@@ -2,6 +2,7 @@ from keras.src import backend
 from keras.src import layers
 from keras.src.api_export import keras_export
 from keras.src.export.saved_model import _list_variables_used_by_fns
+from keras.src.saving import serialization_lib
 from keras.src.utils.module_utils import tensorflow as tf
@@ -146,3 +147,36 @@ class TFSMLayer(layers.Layer):
             "call_training_endpoint": self.call_training_endpoint,
         }
         return {**base_config, **config}
+    @classmethod
+    def from_config(cls, config, custom_objects=None, safe_mode=None):
+        """Creates a TFSMLayer from its config.
+        Args:
+            config: A Python dictionary, typically the output of `get_config`.
+            custom_objects: Optional dictionary mapping names to custom objects.
+            safe_mode: Boolean, whether to disallow loading TFSMLayer.
+                When `safe_mode=True`, loading is disallowed because TFSMLayer
+                loads external SavedModels that may contain attacker-controlled
+                executable graph code. Defaults to `True`.
+        Returns:
+            A TFSMLayer instance.
+        """
+        # Follow the same pattern as Lambda layer for safe_mode handling
+        effective_safe_mode = (
+            safe_mode
+            if safe_mode is not None
+            else serialization_lib.in_safe_mode()
+        )
+        if effective_safe_mode is not False:
+            raise ValueError(
+                "Requested the deserialization of a `TFSMLayer`, which "
+                "loads an external SavedModel. This carries a potential risk "
+                "of arbitrary code execution and thus it is disallowed by "
+                "default. If you trust the source of the artifact, you can "
+                "override this error by passing `safe_mode=False` to the "
+                "loading function, or calling "
+                "`keras.config.enable_unsafe_deserialization()."
+            )
+        return cls(**config)

keras-nightly 3.14.0.dev2026012804__py3-none-any.whl → 3.14.0.dev2026013004__py3-none-any.whl

keras-nightly 3.14.0.dev2026012804py3-none-any.whl → 3.14.0.dev2026013004py3-none-any.whl