PyPI - keras-nightly - Versions diffs - 3.14.0.dev2026010104__py3-none-any.whl → 3.14.0.dev2026012204__py3-none-any.whl - Mend

keras-nightly 3.14.0.dev2026010104py3-none-any.whl → 3.14.0.dev2026012204py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/ops/__init__.py +2 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +2 -0
keras/_tf_keras/keras/quantizers/__init__.py +1 -0
keras/dtype_policies/__init__.py +3 -0
keras/ops/__init__.py +2 -0
keras/ops/numpy/__init__.py +2 -0
keras/quantizers/__init__.py +1 -0
keras/src/backend/jax/nn.py +26 -9
keras/src/backend/jax/numpy.py +10 -0
keras/src/backend/numpy/numpy.py +15 -0
keras/src/backend/openvino/numpy.py +338 -17
keras/src/backend/tensorflow/numpy.py +24 -1
keras/src/backend/tensorflow/rnn.py +17 -7
keras/src/backend/torch/numpy.py +26 -0
keras/src/backend/torch/rnn.py +28 -11
keras/src/callbacks/orbax_checkpoint.py +75 -42
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +90 -1
keras/src/layers/core/dense.py +122 -6
keras/src/layers/core/einsum_dense.py +151 -7
keras/src/layers/core/embedding.py +1 -1
keras/src/layers/core/reversible_embedding.py +10 -1
keras/src/layers/layer.py +5 -0
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/center_crop.py +13 -15
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/losses/losses.py +24 -0
keras/src/models/model.py +18 -9
keras/src/ops/image.py +106 -93
keras/src/ops/numpy.py +138 -0
keras/src/quantizers/__init__.py +2 -0
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +1 -2
keras/src/quantizers/gptq_core.py +1 -1
keras/src/quantizers/quantization_config.py +14 -0
keras/src/quantizers/quantizers.py +61 -52
keras/src/random/seed_generator.py +2 -2
keras/src/saving/orbax_util.py +50 -0
keras/src/saving/saving_api.py +37 -14
keras/src/utils/jax_layer.py +69 -31
keras/src/utils/module_utils.py +11 -0
keras/src/utils/tracking.py +5 -5
keras/src/version.py +1 -1
{keras_nightly-3.14.0.dev2026010104.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/METADATA +1 -1
{keras_nightly-3.14.0.dev2026010104.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/RECORD +52 -48
{keras_nightly-3.14.0.dev2026010104.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/WHEEL +1 -1
{keras_nightly-3.14.0.dev2026010104.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/top_level.txt +0 -0

keras/src/quantizers/awq_core.py ADDED Viewed

@@ -0,0 +1,217 @@
+"""AWQ core functionality for layer-wise quantization.
+This module provides the orchestration logic for applying AWQ quantization
+to transformer models in a layer-by-layer fashion.
+"""
+from contextlib import contextmanager
+from absl import logging
+from keras.src import ops
+from keras.src import utils as keras_utils
+from keras.src.dtype_policies.dtype_policy import AWQDTypePolicy
+from keras.src.dtype_policies.dtype_policy_map import DTypePolicyMap
+from keras.src.quantizers.awq import AWQ
+from keras.src.quantizers.awq_config import AWQConfig
+from keras.src.quantizers.gptq_core import find_layers_in_block
+from keras.src.quantizers.gptq_core import get_dataloader
+from keras.src.quantizers.utils import should_quantize_layer
+@contextmanager
+def stream_activations(layers_map, awq_objects):
+    """Context manager to capture activations for AWQ calibration.
+    Temporarily patches layer.call methods to capture activation statistics
+    for computing per-channel scaling factors.
+    Args:
+        layers_map: Dict[str, Layer]. Mapping from layer names to layers.
+        awq_objects: Dict[str, AWQ]. Mapping from names to AWQ instances.
+    Yields:
+        None: The patched state is active only within the `with` block.
+    """
+    original_calls = {}
+    def create_hook(name, original_call_func):
+        def hook(*args, **kwargs):
+            inp = args[0] if args else kwargs["inputs"]
+            num_features = awq_objects[name].rows
+            input_2d = ops.reshape(inp, (-1, num_features))
+            awq_objects[name].update_activation_magnitudes(input_2d)
+            return original_call_func(*args, **kwargs)
+        return hook
+    try:
+        for name, layer in layers_map.items():
+            original_calls[name] = layer.call
+            layer.call = create_hook(name, layer.call)
+        yield
+    finally:
+        for name, layer in layers_map.items():
+            layer.call = original_calls[name]
+def apply_awq_layerwise(dataloader, config, structure, filters=None):
+    """Apply AWQ quantization layer-by-layer to a Keras model.
+    This function processes the model sequentially, one block at a time:
+    1. Captures activation statistics through calibration data forward pass
+    2. Uses activation magnitudes to determine weight saliency
+    3. Finds optimal per-channel scales via grid search
+    4. Quantizes weights with AWQ scaling
+    Args:
+        dataloader: Calibration data as numpy array.
+        config: AWQConfig instance.
+        structure: Dict with 'pre_block_layers' and 'sequential_blocks'.
+        filters: Optional layer filters.
+    """
+    num_samples = config.num_samples
+    logging.info("Starting AWQ quantization...")
+    pre_layers = structure.get("pre_block_layers", [])
+    transformer_blocks = structure.get("sequential_blocks", [])
+    if not transformer_blocks:
+        raise ValueError(
+            "No sequential blocks found in the structure to quantize."
+        )
+    # Process inputs through pre-block layers (e.g., embedding)
+    inputs = []
+    for batch in dataloader:
+        batch = ops.convert_to_tensor(batch, dtype="int32")
+        for layer in pre_layers:
+            batch = layer(batch)
+        inputs.append(batch)
+    num_samples = min(num_samples, len(inputs))
+    progbar = keras_utils.Progbar(target=len(transformer_blocks))
+    for block_idx, block in enumerate(transformer_blocks):
+        logging.info(f"Quantizing Block {block_idx}")
+        sub_layers_map = find_layers_in_block(block)
+        # Apply filters
+        final_sub_layers_map = {}
+        for name, layer in sub_layers_map.items():
+            if not should_quantize_layer(layer, filters):
+                continue
+            final_sub_layers_map[name] = layer
+        sub_layers_map = final_sub_layers_map
+        if not sub_layers_map:
+            logging.info(
+                f"  No quantizable layers found in block {block_idx}. Skipping."
+            )
+        else:
+            logging.info(f"Found layers: {list(sub_layers_map.keys())}")
+            # Create AWQ objects for each layer
+            awq_objects = {
+                name: AWQ(layer, config)
+                for name, layer in sub_layers_map.items()
+            }
+            # Capture activation statistics
+            with stream_activations(sub_layers_map, awq_objects):
+                for sample_idx in range(num_samples):
+                    current_input = inputs[sample_idx]
+                    if len(current_input.shape) == 2:
+                        current_input = ops.expand_dims(current_input, axis=0)
+                    _ = block(current_input)
+            # Quantize each layer
+            for name, awq_object in awq_objects.items():
+                logging.info(f"Quantizing {name}...")
+                awq_object.quantize_layer()
+                awq_object.free()
+            del awq_objects
+        # Generate inputs for next block
+        if block_idx < len(transformer_blocks) - 1:
+            logging.info(f"Generating inputs for block {block_idx + 1}...")
+            next_block_inputs = []
+            for sample_idx in range(num_samples):
+                current_input = inputs[sample_idx]
+                if len(current_input.shape) == 2:
+                    current_input = ops.expand_dims(current_input, axis=0)
+                output = block(current_input)[0]
+                next_block_inputs.append(output)
+            inputs = next_block_inputs
+        progbar.update(current=block_idx + 1)
+    logging.info("AWQ quantization complete.")
+def awq_quantize(config, quantization_layer_structure, filters=None):
+    """Main entry point for AWQ quantization.
+    Args:
+        config: AWQConfig instance.
+        quantization_layer_structure: Model structure dictionary.
+        filters: Optional layer filters.
+    """
+    if config.dataset is None or config.tokenizer is None:
+        raise ValueError(
+            "AWQ quantization requires a dataset and tokenizer. "
+            "Please provide them in the AWQConfig."
+        )
+    if quantization_layer_structure is None:
+        raise ValueError(
+            "For 'awq' mode, a valid quantization structure must be provided "
+            "either via `config.quantization_layer_structure` or by overriding "
+            "`model.get_quantization_layer_structure(mode)`. The structure "
+            "should be a dictionary with keys 'pre_block_layers' and "
+            "'sequential_blocks'."
+        )
+    # Load calibration data
+    dataloader = get_dataloader(
+        config.tokenizer,
+        config.sequence_length,
+        config.dataset,
+        num_samples=config.num_samples,
+    )
+    apply_awq_layerwise(
+        dataloader[: config.num_samples],
+        config,
+        quantization_layer_structure,
+        filters=filters,
+    )
+def get_group_size_for_layer(layer, config):
+    """Get group size from config or dtype policy.
+    Args:
+        layer: The layer to get group size for.
+        config: Optional AWQConfig instance.
+    Returns:
+        int: The group size for quantization.
+    Raises:
+        ValueError: If group size cannot be determined.
+    """
+    if config and isinstance(config, AWQConfig):
+        return config.group_size
+    elif isinstance(layer.dtype_policy, AWQDTypePolicy):
+        return layer.dtype_policy.group_size
+    elif isinstance(layer.dtype_policy, DTypePolicyMap):
+        policy = layer.dtype_policy[layer.path]
+        if isinstance(policy, AWQDTypePolicy):
+            return policy.group_size
+    raise ValueError(
+        "For AWQ quantization, group_size must be specified "
+        "through AWQConfig or AWQDTypePolicy."
+    )

keras/src/quantizers/gptq.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import types
-from functools import partial
 from keras.src import ops
 from keras.src import quantizers
@@ -466,7 +465,7 @@ class GPTQ:
             group_size=self.config.group_size,
             activation_order=self.config.activation_order,
             order_metric=ops.diagonal(hessian_matrix),
-            compute_scale_zero=partial(self.quantizer.find_params, weight=True),
+            compute_scale_zero=self.quantizer.find_params,
         )
         quantized = ops.cast(
             quantized, self.original_layer.quantized_kernel.dtype

keras/src/quantizers/gptq_core.py CHANGED Viewed

@@ -131,7 +131,7 @@ def get_dataloader(
     pieces = []
     if isinstance(dataset_list[0], str):
         for i, s in enumerate(dataset_list):
-            toks = np.asarray(tokenizer.tokenize(s)).reshape(-1)
+            toks = ops.convert_to_numpy(tokenizer.tokenize(s)).reshape(-1)
             pieces.append(toks)
             # avoid windows that span document boundaries
             if eos_id is not None and i < len(dataset_list) - 1:

keras/src/quantizers/quantization_config.py CHANGED Viewed

@@ -182,6 +182,11 @@ def validate_and_resolve_config(mode, config):
                 "For GPTQ, you must pass a `GPTQConfig` object in the "
                 "`config` argument."
             )
+        elif mode == "awq":
+            raise ValueError(
+                "For AWQ, you must pass an `AWQConfig` object in the "
+                "`config` argument."
+            )
         else:
             if mode is not None:
                 raise ValueError(
@@ -220,6 +225,15 @@ def validate_and_resolve_config(mode, config):
                 f"`GPTQConfig`. Received: {type(config)}"
             )
+    if mode == "awq":
+        from keras.src.quantizers.awq_config import AWQConfig
+        if not isinstance(config, AWQConfig):
+            raise ValueError(
+                "Mode 'awq' requires a valid `config` argument of type "
+                f"`AWQConfig`. Received: {type(config)}"
+            )
     return config

keras/src/quantizers/quantizers.py CHANGED Viewed

@@ -653,11 +653,14 @@ def unpack_int4(packed, orig_len, axis=0, dtype="int8"):
         )
     def to_signed(x):
-        """Converts unpacked nibbles [0, 15] to signed int4 [-8, 7]."""
+        """Converts unpacked nibbles [0, 15] to signed int4 [-8, 7].
+        Uses a branchless XOR approach: (x ^ 8) - 8
+        This maps: 0->0, 1->1, ..., 7->7, 8->-8, 9->-7, ..., 15->-1
+        """
         dtype_x = backend.standardize_dtype(x.dtype)
         eight = ops.cast(8, dtype_x)
-        sixteen = ops.cast(16, dtype_x)
-        return ops.where(x < eight, x, x - sixteen)
+        return ops.subtract(ops.bitwise_xor(x, eight), eight)
     rank = getattr(packed.shape, "rank", None) or len(packed.shape)
     if axis < 0:
@@ -748,7 +751,7 @@ class GPTQQuantizer(Quantizer):
         self.zero = None
         self.maxq = None
-    def find_params(self, input_tensor, weight=True):
+    def find_params(self, input_tensor):
         """Finds quantization parameters (scale and zero) for a given tensor."""
         self.scale, self.zero, self.maxq = compute_quantization_parameters(
             input_tensor,
@@ -756,7 +759,6 @@ class GPTQQuantizer(Quantizer):
             symmetric=self.symmetric,
             per_channel=self.per_channel,
             group_size=self.group_size,
-            weight=weight,
             compute_dtype=self.compute_dtype,
         )
         return self.scale, self.zero, self.maxq
@@ -793,98 +795,105 @@ def compute_quantization_parameters(
     symmetric=False,
     per_channel=False,
     group_size=-1,
-    weight=False,
     compute_dtype="float32",
 ):
     """
-    Computes the scale and zero-point for quantization.
+    Computes the scale and zero-point for quantizing weight tensors.
     This function calculates the scale and zero-point required for quantizing
-    a given tensor `x` based on the specified parameters. It supports grouped,
-    per-channel, per-tensor, symmetric, and asymmetric quantization - along
-    with any combinations of these.
+    a given weight tensor `x` based on the specified parameters. It supports
+    grouped, per-channel, per-tensor, symmetric, and asymmetric quantization.
+    For grouped quantization (per_channel=True, group_size > 0), the output
+    shapes are [out_features, n_groups] where n_groups is the number of groups
+    along the in_features dimension.
     Args:
-        x: KerasTensor. The input tensor to quantize.
+        x: KerasTensor. The weight tensor to quantize with shape
+            [out_features, in_features].
         bits: int. The number of bits to quantize to (e.g., 4).
         symmetric: bool. Whether to use symmetric quantization.
         per_channel: bool. Whether to quantize per channel.
-        group_size: int. The group size for quantization.
-        weight: bool. Whether the input tensor is a weight tensor.
+        group_size: int. The group size for quantization. -1 means no grouping.
+        compute_dtype: str. The dtype for computation. Defaults to "float32".
     Returns:
         scale: KerasTensor. The scale tensor for quantization.
         zero: KerasTensor. The zero tensor for quantization.
         maxq: scalar. The maximum quantization value.
     """
+    # Input validation
     if x is None:
         raise ValueError(f"Input tensor {x} cannot be None.")
-    # For weights, we typically expect at least a 2D tensor.
-    if weight and len(x.shape) < 2:
+    if len(x.shape) < 2:
         raise ValueError(
             f"Input weight tensor {x} must have a rank of at "
             f"least 2, but got rank {len(x.shape)}."
         )
     if ops.size(x) == 0:
         raise ValueError("Input tensor 'x' cannot be empty.")
-    original_shape = x.shape
-    if per_channel:
-        if weight:
-            if group_size != -1:
-                input_reshaped = ops.reshape(x, [-1, group_size])
-            else:
-                input_reshaped = ops.reshape(x, [original_shape[0], -1])
-    else:  # per-tensor
-        input_reshaped = ops.reshape(x, [1, -1])
+    out_features, in_features = x.shape[0], x.shape[1]
-    # Find min/max values
-    min_values = ops.min(input_reshaped, axis=1)
-    max_values = ops.max(input_reshaped, axis=1)
+    # Determine number of groups for quantization
+    if per_channel and group_size > 0:
+        n_groups = (in_features + group_size - 1) // group_size
+    else:
+        n_groups = 1
+    # Compute min/max values based on quantization mode
+    if n_groups > 1:
+        # Grouped quantization: output shape [out_features, n_groups]
+        remainder = in_features % group_size
+        if remainder != 0:
+            pad_size = group_size - remainder
+            x = ops.pad(x, [[0, 0], [0, pad_size]], constant_values=0.0)
+        x_grouped = ops.reshape(x, [out_features, n_groups, group_size])
+        min_values = ops.min(x_grouped, axis=2)
+        max_values = ops.max(x_grouped, axis=2)
+    else:
+        # Per-channel or per-tensor: compute stats along rows
+        reduction_shape = [out_features, -1] if per_channel else [1, -1]
+        x_reshaped = ops.reshape(x, reduction_shape)
+        min_values = ops.min(x_reshaped, axis=1)
+        max_values = ops.max(x_reshaped, axis=1)
-    # Apply symmetric quantization logic if enabled
+    # Symmetric quantization: make range symmetric around zero
     if symmetric:
-        max_values = ops.maximum(ops.abs(min_values), max_values)
+        max_abs = ops.maximum(ops.abs(min_values), max_values)
         min_values = ops.where(
-            ops.less(min_values, 0), ops.negative(max_values), min_values
+            ops.less(min_values, 0), ops.negative(max_abs), min_values
         )
+        max_values = max_abs
-    # Ensure range is not zero to avoid division errors
+    # Ensure non-zero range to avoid division errors
     zero_range = ops.equal(min_values, max_values)
     min_values = ops.where(zero_range, ops.subtract(min_values, 1), min_values)
     max_values = ops.where(zero_range, ops.add(max_values, 1), max_values)
+    # Compute scale and zero-point
     maxq = ops.cast(ops.subtract(ops.power(2, bits), 1), compute_dtype)
-    # Calculate scale and zero-point
     scale = ops.divide(ops.subtract(max_values, min_values), maxq)
+    scale = ops.where(ops.less_equal(scale, 0), 1e-8, scale)
     if symmetric:
         zero = ops.full_like(scale, ops.divide(ops.add(maxq, 1), 2))
     else:
         zero = ops.round(ops.divide(ops.negative(min_values), scale))
-    # Ensure scale is non-zero
-    scale = ops.where(ops.less_equal(scale, 0), 1e-8, scale)
-    if weight:
-        # Per-channel, non-grouped case: simple reshape is correct.
-        if per_channel and group_size == -1:
-            scale = ops.reshape(scale, [-1, 1])
-            zero = ops.reshape(zero, [-1, 1])
-        elif not per_channel:
-            num_rows = original_shape[0]
-            scale = ops.tile(ops.reshape(scale, (1, 1)), (num_rows, 1))
-            zero = ops.tile(ops.reshape(zero, (1, 1)), (num_rows, 1))
-    if per_channel:
+    # Reshape output to [out_features, n_groups] or [out_features, 1]
+    if n_groups > 1:
+        pass  # Already [out_features, n_groups]
+    elif per_channel:
         scale = ops.reshape(scale, [-1, 1])
         zero = ops.reshape(zero, [-1, 1])
+    else:
+        # Per-tensor: tile single value to [out_features, 1]
+        scale = ops.tile(ops.reshape(scale, (1, 1)), (out_features, 1))
+        zero = ops.tile(ops.reshape(zero, (1, 1)), (out_features, 1))
-    zero = ops.cast(zero, "uint8")
-    return scale, zero, maxq
+    return scale, ops.cast(zero, "uint8"), maxq
 def quantize_with_zero_point(input_tensor, scale, zero, maxq):

keras/src/random/seed_generator.py CHANGED Viewed

@@ -29,7 +29,7 @@ class SeedGenerator:
     a local `StateGenerator` with either a deterministic or random initial
     state.
-    Remark concerning the JAX backen: Note that the use of a local
+    Remark concerning the JAX backend: Note that the use of a local
     `StateGenerator` as seed argument is required for JIT compilation of
     RNG with the JAX backend, because the use of global state is not
     supported.
@@ -111,7 +111,7 @@ class SeedGenerator:
         return new_seed_value
     def get_config(self):
-        return {"seed": self._initial_seed}
+        return {"seed": self._initial_seed, "name": self.name}
     @classmethod
     def from_config(cls, config):

keras/src/saving/orbax_util.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""Orbax checkpoint loading functionality."""
+import os
+from keras.src.utils.module_utils import ocp
+def is_orbax_checkpoint(filepath):
+    """Check if the given path is an Orbax checkpoint directory.
+    This function implements custom detection logic instead of relying on
+    Orbax APIs which may be unreliable in some environments.
+    """
+    if not os.path.exists(filepath) or not os.path.isdir(filepath):
+        return False
+    try:
+        # List directory contents
+        contents = os.listdir(filepath)
+        # A set is more efficient for membership testing
+        orbax_indicators = {
+            "orbax.checkpoint",
+            "pytree.orbax-checkpoint",
+            "checkpoint_metadata",
+        }
+        # Fast check for standard files
+        if not orbax_indicators.isdisjoint(contents):
+            return True
+        # Check for step directories or temporary files in a single pass
+        return any(
+            ".orbax-checkpoint-tmp" in item
+            or (item.isdigit() and os.path.isdir(os.path.join(filepath, item)))
+            for item in contents
+        )
+    except (OSError, PermissionError):
+        # If we can't read the directory, assume it's not a checkpoint
+        return False
+def find_latest_orbax_checkpoint(checkpoint_dir):
+    """Find the latest checkpoint in an Orbax checkpoint directory."""
+    checkpointer = ocp.training.Checkpointer(directory=checkpoint_dir)
+    latest = checkpointer.latest
+    if latest is None:
+        raise ValueError(f"No valid checkpoints found in {checkpoint_dir}")
+    return os.path.join(checkpoint_dir, str(latest.step))

keras/src/saving/saving_api.py CHANGED Viewed

@@ -6,13 +6,11 @@ from absl import logging
 from keras.src.api_export import keras_export
 from keras.src.legacy.saving import legacy_h5_format
 from keras.src.saving import saving_lib
+from keras.src.saving.orbax_util import find_latest_orbax_checkpoint
+from keras.src.saving.orbax_util import is_orbax_checkpoint
 from keras.src.utils import file_utils
 from keras.src.utils import io_utils
-try:
-    import h5py
-except ImportError:
-    h5py = None
+from keras.src.utils.module_utils import h5py
 @keras_export(["keras.saving.save_model", "keras.models.save_model"])
@@ -149,8 +147,6 @@ def load_model(filepath, custom_objects=None, compile=True, safe_mode=True):
         keras.layers.Softmax()])
     model.save("model.keras")
     loaded_model = keras.saving.load_model("model.keras")
-    x = np.random.random((10, 3))
-    assert np.allclose(model.predict(x), loaded_model.predict(x))
     ```
     Note that the model variables may have different name values
@@ -208,7 +204,7 @@ def load_model(filepath, custom_objects=None, compile=True, safe_mode=True):
     else:
         raise ValueError(
             f"File format not supported: filepath={filepath}. "
-            "Keras 3 only supports V3 `.keras` files and "
+            "Keras 3 only supports V3 `.keras` files, "
             "legacy H5 format files (`.h5` extension). "
             "Note that the legacy SavedModel format is not "
             "supported by `load_model()` in Keras 3. In "
@@ -288,15 +284,16 @@ def load_weights(model, filepath, skip_mismatch=False, **kwargs):
             objects_to_skip=objects_to_skip,
         )
     elif filepath_str.endswith(".h5") or filepath_str.endswith(".hdf5"):
-        if not h5py:
-            raise ImportError(
-                "Loading a H5 file requires `h5py` to be installed."
-            )
         if objects_to_skip is not None:
             raise ValueError(
                 "`objects_to_skip` only supports loading '.weights.h5' files."
                 f"Received: {filepath}"
             )
+        if not h5py.available:
+            raise ImportError(
+                "Loading HDF5 files requires the h5py package. "
+                "You can install it via `pip install h5py`"
+            )
         with h5py.File(filepath, "r") as f:
             if "layer_names" not in f.attrs and "model_weights" in f:
                 f = f["model_weights"]
@@ -308,9 +305,35 @@ def load_weights(model, filepath, skip_mismatch=False, **kwargs):
                 legacy_h5_format.load_weights_from_hdf5_group(
                     f, model, skip_mismatch
                 )
+    elif is_orbax_checkpoint(filepath):
+        # Load weights from Orbax checkpoint
+        from keras.src.utils.module_utils import ocp
+        filepath = str(filepath)
+        # Determine if this is a root directory or a step directory
+        items = os.listdir(filepath)
+        has_step_subdirs = any(
+            os.path.isdir(os.path.join(filepath, item)) and item.isdigit()
+            for item in items
+        )
+        if has_step_subdirs:
+            # It's a root directory, find the latest checkpoint
+            checkpoint_path = find_latest_orbax_checkpoint(filepath)
+        else:
+            # It's a step directory, use it directly
+            checkpoint_path = filepath
+        # Load checkpoint
+        loaded_state = ocp.load_pytree(checkpoint_path)
+        # Set the model state directly from the loaded state
+        model.set_state_tree(loaded_state)
     else:
         raise ValueError(
             f"File format not supported: filepath={filepath}. "
-            "Keras 3 only supports V3 `.keras` and `.weights.h5` "
-            "files, or legacy V1/V2 `.h5` files."
+            "Keras 3 only supports V3 `.keras` files, "
+            "`.weights.h5` files, legacy H5 format files "
+            "(`.h5` extension), or Orbax checkpoints."
         )

keras-nightly 3.14.0.dev2026010104__py3-none-any.whl → 3.14.0.dev2026012204__py3-none-any.whl

keras-nightly 3.14.0.dev2026010104py3-none-any.whl → 3.14.0.dev2026012204py3-none-any.whl