PyPI - keras-nightly - Versions diffs - 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl - Mend

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

keras/__init__.py +1 -0
keras/_tf_keras/keras/__init__.py +1 -0
keras/_tf_keras/keras/callbacks/__init__.py +3 -0
keras/_tf_keras/keras/distillation/__init__.py +16 -0
keras/_tf_keras/keras/distribution/__init__.py +3 -0
keras/_tf_keras/keras/dtype_policies/__init__.py +6 -0
keras/_tf_keras/keras/layers/__init__.py +21 -0
keras/_tf_keras/keras/ops/__init__.py +16 -0
keras/_tf_keras/keras/ops/image/__init__.py +1 -0
keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +12 -0
keras/_tf_keras/keras/quantizers/__init__.py +13 -0
keras/callbacks/__init__.py +3 -0
keras/distillation/__init__.py +16 -0
keras/distribution/__init__.py +3 -0
keras/dtype_policies/__init__.py +6 -0
keras/layers/__init__.py +21 -0
keras/ops/__init__.py +16 -0
keras/ops/image/__init__.py +1 -0
keras/ops/linalg/__init__.py +1 -0
keras/ops/nn/__init__.py +3 -0
keras/ops/numpy/__init__.py +12 -0
keras/quantizers/__init__.py +13 -0
keras/src/applications/imagenet_utils.py +4 -1
keras/src/backend/common/backend_utils.py +30 -6
keras/src/backend/common/dtypes.py +6 -12
keras/src/backend/common/name_scope.py +2 -1
keras/src/backend/common/variables.py +38 -20
keras/src/backend/jax/core.py +126 -78
keras/src/backend/jax/distribution_lib.py +16 -2
keras/src/backend/jax/layer.py +3 -1
keras/src/backend/jax/linalg.py +4 -0
keras/src/backend/jax/nn.py +511 -29
keras/src/backend/jax/numpy.py +109 -23
keras/src/backend/jax/optimizer.py +3 -2
keras/src/backend/jax/trainer.py +18 -3
keras/src/backend/numpy/linalg.py +4 -0
keras/src/backend/numpy/nn.py +313 -2
keras/src/backend/numpy/numpy.py +97 -8
keras/src/backend/openvino/__init__.py +1 -0
keras/src/backend/openvino/core.py +6 -23
keras/src/backend/openvino/linalg.py +4 -0
keras/src/backend/openvino/nn.py +271 -20
keras/src/backend/openvino/numpy.py +1369 -195
keras/src/backend/openvino/random.py +7 -14
keras/src/backend/tensorflow/layer.py +43 -9
keras/src/backend/tensorflow/linalg.py +24 -0
keras/src/backend/tensorflow/nn.py +545 -1
keras/src/backend/tensorflow/numpy.py +351 -56
keras/src/backend/tensorflow/trainer.py +6 -2
keras/src/backend/torch/core.py +3 -1
keras/src/backend/torch/linalg.py +4 -0
keras/src/backend/torch/nn.py +125 -0
keras/src/backend/torch/numpy.py +109 -9
keras/src/backend/torch/trainer.py +8 -2
keras/src/callbacks/__init__.py +1 -0
keras/src/callbacks/callback_list.py +45 -11
keras/src/callbacks/model_checkpoint.py +5 -0
keras/src/callbacks/orbax_checkpoint.py +332 -0
keras/src/callbacks/terminate_on_nan.py +54 -5
keras/src/datasets/cifar10.py +5 -0
keras/src/distillation/__init__.py +1 -0
keras/src/distillation/distillation_loss.py +390 -0
keras/src/distillation/distiller.py +598 -0
keras/src/distribution/distribution_lib.py +14 -0
keras/src/dtype_policies/__init__.py +4 -0
keras/src/dtype_policies/dtype_policy.py +180 -1
keras/src/export/__init__.py +2 -0
keras/src/export/export_utils.py +39 -2
keras/src/export/litert.py +248 -0
keras/src/export/onnx.py +6 -0
keras/src/export/openvino.py +1 -1
keras/src/export/tf2onnx_lib.py +3 -0
keras/src/layers/__init__.py +13 -0
keras/src/layers/activations/softmax.py +9 -4
keras/src/layers/attention/attention.py +1 -1
keras/src/layers/attention/multi_head_attention.py +4 -1
keras/src/layers/core/dense.py +406 -102
keras/src/layers/core/einsum_dense.py +521 -116
keras/src/layers/core/embedding.py +257 -99
keras/src/layers/core/input_layer.py +1 -0
keras/src/layers/core/reversible_embedding.py +399 -0
keras/src/layers/input_spec.py +17 -17
keras/src/layers/layer.py +50 -15
keras/src/layers/merging/concatenate.py +6 -5
keras/src/layers/merging/dot.py +4 -1
keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
keras/src/layers/preprocessing/discretization.py +6 -5
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py +5 -5
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/layers/preprocessing/index_lookup.py +19 -1
keras/src/layers/preprocessing/normalization.py +16 -1
keras/src/layers/preprocessing/string_lookup.py +26 -28
keras/src/layers/regularization/dropout.py +43 -1
keras/src/layers/rnn/gru.py +1 -1
keras/src/layers/rnn/lstm.py +2 -2
keras/src/layers/rnn/rnn.py +19 -0
keras/src/layers/rnn/simple_rnn.py +1 -1
keras/src/legacy/preprocessing/image.py +4 -1
keras/src/legacy/preprocessing/sequence.py +20 -12
keras/src/losses/loss.py +1 -1
keras/src/losses/losses.py +24 -0
keras/src/metrics/confusion_metrics.py +7 -6
keras/src/models/cloning.py +4 -0
keras/src/models/functional.py +11 -3
keras/src/models/model.py +195 -44
keras/src/ops/image.py +257 -20
keras/src/ops/linalg.py +93 -0
keras/src/ops/nn.py +268 -2
keras/src/ops/numpy.py +701 -44
keras/src/ops/operation.py +90 -29
keras/src/ops/operation_utils.py +2 -0
keras/src/optimizers/adafactor.py +29 -10
keras/src/optimizers/base_optimizer.py +22 -3
keras/src/optimizers/loss_scale_optimizer.py +51 -18
keras/src/optimizers/muon.py +65 -31
keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
keras/src/quantizers/__init__.py +14 -1
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +346 -207
keras/src/quantizers/gptq_config.py +63 -13
keras/src/quantizers/gptq_core.py +328 -215
keras/src/quantizers/quantization_config.py +246 -0
keras/src/quantizers/quantizers.py +407 -38
keras/src/quantizers/utils.py +23 -0
keras/src/random/seed_generator.py +6 -4
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +26 -0
keras/src/saving/saving_api.py +37 -14
keras/src/saving/saving_lib.py +1 -1
keras/src/testing/__init__.py +1 -0
keras/src/testing/test_case.py +45 -5
keras/src/trainers/compile_utils.py +38 -17
keras/src/trainers/data_adapters/grain_dataset_adapter.py +1 -5
keras/src/tree/torchtree_impl.py +215 -0
keras/src/tree/tree_api.py +6 -1
keras/src/utils/backend_utils.py +31 -4
keras/src/utils/dataset_utils.py +234 -35
keras/src/utils/file_utils.py +49 -11
keras/src/utils/image_utils.py +14 -2
keras/src/utils/jax_layer.py +244 -55
keras/src/utils/module_utils.py +29 -0
keras/src/utils/progbar.py +10 -12
keras/src/utils/python_utils.py +5 -0
keras/src/utils/rng_utils.py +9 -1
keras/src/utils/tracking.py +70 -5
keras/src/version.py +1 -1
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +163 -142
keras/src/quantizers/gptq_quant.py +0 -133
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0

keras/src/backend/common/backend_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import functools
+import math
 import operator
 import re
 import warnings
@@ -96,13 +97,13 @@ def _convert_conv_transpose_padding_args_from_keras_to_torch(
         )
     if torch_output_padding >= stride:
-        raise ValueError(
-            f"The padding arguments (padding={padding}) and "
-            f"output_padding={output_padding}) lead to a Torch "
-            f"output_padding ({torch_output_padding}) that is greater than "
-            f"strides ({stride}). This is not supported. You can change the "
-            f"padding arguments, kernel or stride, or run on another backend. "
+        warnings.warn(
+            f"Torch backend requires output_padding < stride. "
+            f"Clamping output_padding {torch_output_padding} -> {stride - 1} "
+            f"for stride {stride}.",
+            UserWarning,
         )
+        torch_output_padding = stride - 1
     return torch_padding, torch_output_padding
@@ -184,6 +185,22 @@ def compute_conv_transpose_padding_args_for_torch(
         torch_paddings.append(torch_padding)
         torch_output_paddings.append(torch_output_padding)
+    # --- FIX FOR TORCH CONSTRAINT: output_padding < stride ---
+    corrected_output_paddings = []
+    for s, op in zip(
+        strides
+        if isinstance(strides, (list, tuple))
+        else [strides] * num_spatial_dims,
+        torch_output_paddings,
+    ):
+        max_allowed = max(0, s - 1)
+        if op > max_allowed:
+            corrected_output_paddings.append(max_allowed)
+        else:
+            corrected_output_paddings.append(op)
+    torch_output_paddings = corrected_output_paddings
     return torch_paddings, torch_output_paddings
@@ -523,3 +540,10 @@ def slice_along_axis(x, start=0, stop=None, step=1, axis=0):
             -1 - axis
         )
     return x[tuple(slices)]
+def compute_adaptive_pooling_window_sizes(input_dim, output_dim):
+    """Compute small and big window sizes for adaptive pooling."""
+    small = math.ceil(input_dim / output_dim)
+    big = small + 1
+    return small, big

keras/src/backend/common/dtypes.py CHANGED Viewed

@@ -232,18 +232,12 @@ def _resolve_weak_type(dtype, precision="32"):
         return f"float{precision}"
-BIT64_TO_BIT16_DTYPE = {
-    "int32": "int16",
-    "int64": "int16",
-    "uint32": "uint16",
-    "uint64": "uint16",
-    "float32": "float16",
-    "float64": "float16",
-}
 BIT64_TO_BIT32_DTYPE = {
-    "int64": "int32",
+    # Since TF variables require int64 to be placed on the GPU, we exclusively
+    # enable the int64 dtype for TF.
+    "int64": "int64" if config.backend() == "tensorflow" else "int32",
     "uint64": "uint32",
-    "float64": "float32",
+    "float64": "float64" if config.backend() == "tensorflow" else "float32",
     "complex128": "complex64",
 }
@@ -277,8 +271,8 @@ def _lattice_result_type(*args):
     if out_weak_type:
         out_dtype = _resolve_weak_type(out_dtype, precision=precision)
-    # Force to be 32-bit dtype when encountering 64-bit dtype.
-    # TODO(hongyu): Add a config to enable 64-bit dtypes.
+    # Force to be 32-bit dtype when encountering 64-bit dtype. This is to
+    # be aligned with JAX's default behavior.
     out_dtype = BIT64_TO_BIT32_DTYPE.get(out_dtype, out_dtype)
     return out_dtype

keras/src/backend/common/name_scope.py CHANGED Viewed

@@ -58,7 +58,8 @@ class name_scope:
             name_scope_stack = global_state.get_global_attribute(
                 "name_scope_stack"
             )
-            name_scope_stack.pop()
+            if name_scope_stack:
+                name_scope_stack.pop()
 def current_path():

keras/src/backend/common/variables.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import os.path
 import numpy as np
 from keras.src import backend
@@ -144,7 +142,7 @@ class Variable:
         self._name = name
         parent_path = current_path()
         if parent_path:
-            self._path = os.path.join(current_path(), name)
+            self._path = f"{parent_path}/{name}"
         else:
             self._path = name
         self._shape = None
@@ -278,13 +276,13 @@ class Variable:
         return self._maybe_autocast(self._value)
     def assign(self, value):
-        value = self._convert_to_tensor(value, dtype=self.dtype)
+        value = self._convert_to_tensor(value, dtype=self._dtype)
         if not shape_equal(value.shape, self.shape):
             raise ValueError(
                 "The shape of the target variable and "
                 "the shape of the target value in "
                 "`variable.assign(value)` must match. "
-                f"variable.shape={self.value.shape}, "
+                f"variable.shape={self.shape}, "
                 f"Received: value.shape={value.shape}. "
                 f"Target variable: {self}"
             )
@@ -401,7 +399,11 @@ class Variable:
     def __repr__(self):
         value = None
         if hasattr(self, "_value") and self._value is not None:
-            value = backend.core.convert_to_numpy(self._value)
+            try:
+                value = backend.core.convert_to_numpy(self._value)
+            except:
+                # In some cases the conversion to numpy can fail.
+                pass
         value_str = f", value={value}" if value is not None else ""
         return (
             f"<Variable path={self.path}, shape={self.shape}, "
@@ -597,30 +599,46 @@ def standardize_shape(shape):
                 # `tf.TensorShape` may contain `Dimension` objects.
                 # We need to convert the items in it to either int or `None`
                 shape = shape.as_list()
-        shape = tuple(shape)
-    if config.backend() == "torch":
-        # `shape` might be `torch.Size`. We need to convert the items in it to
-        # either int or `None`
-        shape = tuple(map(lambda x: int(x) if x is not None else None, shape))
+    if config.backend() == "jax":
+        # Replace `_DimExpr` (dimension expression) with None
+        from jax import export as jax_export
-    for e in shape:
-        if e is None:
-            continue
-        if config.backend() == "jax" and "_DimExpr" in str(type(e)):
-            # JAX2TF tracing uses JAX-native dimension expressions
+        shape = tuple(
+            None if jax_export.is_symbolic_dim(d) else d for d in shape
+        )
+    # Handle dimensions that are not ints and not None, verify they're >= 0.
+    standardized_shape = []
+    for d in shape:
+        if d is None:
+            standardized_shape.append(d)
             continue
-        if not is_int_dtype(type(e)):
+        # Reject these even if they can be cast to int successfully.
+        if isinstance(d, (str, float)):
             raise ValueError(
                 f"Cannot convert '{shape}' to a shape. "
-                f"Found invalid entry '{e}' of type '{type(e)}'. "
+                f"Found invalid dimension '{d}' of type '{type(d)}'. "
             )
-        if e < 0:
+        try:
+            # Cast numpy scalars, tf constant tensors, etc.
+            d = int(d)
+        except Exception as e:
+            raise ValueError(
+                f"Cannot convert '{shape}' to a shape. "
+                f"Found invalid dimension '{d}' of type '{type(d)}'. "
+            ) from e
+        if d < 0:
             raise ValueError(
                 f"Cannot convert '{shape}' to a shape. "
                 "Negative dimensions are not allowed."
             )
-    return shape
+        standardized_shape.append(d)
+    # This also turns subclasses of `tuple` (e.g. `torch.Size`) to plain tuple.
+    return tuple(standardized_shape)
 def shape_equal(a_shape, b_shape):

keras/src/backend/jax/core.py CHANGED Viewed

@@ -3,6 +3,7 @@ import jax.experimental.sparse as jax_sparse
 import jax.numpy as jnp
 import ml_dtypes
 import numpy as np
+from jax import export as jax_export
 from keras.src import tree
 from keras.src.backend import config
@@ -29,9 +30,7 @@ class JaxVariable(KerasVariable):
         self._layout = layout
         super().__init__(*args, **kwargs)
-    def _initialize(self, value):
-        # Note that variable.shape is needed by distribution_lib
-        self._shape = self._validate_shape(value.shape)
+    def _initialize_layout(self):
         # We can't import the keras/distribution/distribution_lib
         # due to circular dependency.
         distribution = global_state.get_global_attribute("distribution")
@@ -43,8 +42,28 @@ class JaxVariable(KerasVariable):
                 self._layout = tensor_layout.backend_layout
             else:
                 self._layout = tensor_layout
+    def _initialize(self, value):
+        # Note that variable.shape is needed by distribution_lib
+        self._shape = self._validate_shape(value.shape)
+        self._initialize_layout()
         self._direct_assign(value)
+    def _initialize_with_initializer(self, initializer):
+        self._initialize_layout()
+        layout = self._layout
+        shape = self._shape
+        if should_shard_at_init(layout, shape):
+            jitted_initializer = jax.jit(
+                initializer.__call__,
+                out_shardings=layout,
+                static_argnames=["shape", "dtype"],
+            )
+            value = jitted_initializer(shape=self._shape, dtype=self._dtype)
+            self._value = value
+        else:
+            super()._initialize_with_initializer(initializer)
     def _direct_assign(self, value):
         if self._layout is not None:
             value = distribution_lib.distribute_variable(value, self._layout)
@@ -111,6 +130,12 @@ if config.is_nnx_enabled():
             # The real value is now set in self._value, sync it to raw_value
             object.__setattr__(self, "raw_value", self._value)
+        def _initialize_with_initializer(self, initializer):
+            value = self._convert_to_tensor(
+                initializer(self._shape, dtype=self._dtype)
+            )
+            self._initialize(value)
         @property
         def _value(self):
             if hasattr(self, "raw_value"):
@@ -233,6 +258,71 @@ if config.is_nnx_enabled():
     Variable = NnxVariable
+    def _flatten_nnx_variable(variable):
+        children = (variable.raw_value,)
+        # We copy __dict__ to avoid side effects
+        keras_state = variable.__dict__.copy()
+        # Remove elements that might be problematic or redundant if
+        # nnx.Variable's __getstate__
+        keras_state.pop("raw_value", None)
+        aux_data = (
+            variable._var_metadata,
+            getattr(variable, "_trace_state", None),
+            keras_state,
+        )
+        return children, aux_data
+    def _unflatten_nnx_variable(aux_data, children):
+        var_metadata, trace_state, keras_state = aux_data
+        raw_value = children[0]
+        # Create uninitialized instance
+        variable = NnxVariable.__new__(NnxVariable)
+        # Restore state
+        variable._var_metadata = var_metadata
+        if trace_state is not None:
+            variable._trace_state = trace_state
+        variable.__dict__.update(keras_state)
+        variable.raw_value = raw_value
+        return variable
+    try:
+        jax.tree_util.register_pytree_node(
+            NnxVariable,
+            _flatten_nnx_variable,
+            _unflatten_nnx_variable,
+        )
+    except ValueError:
+        pass
+    def __setattr__(self, name, value):
+        # Mirror Keras attributes to _var_metadata to ensure persistence
+        # if the Pytree registration is not respected by NNX.
+        if (
+            name != "_var_metadata"
+            and name not in ("_raw_value", "_trace_state")
+            and hasattr(self, "_var_metadata")
+        ):
+            self._var_metadata[name] = value
+        object.__setattr__(self, name, value)
+    NnxVariable.__setattr__ = __setattr__
+def should_shard_at_init(init_layout, shape):
+    if not isinstance(init_layout, jax.sharding.NamedSharding):
+        return False
+    if all(dim is None for dim in init_layout.spec):
+        return False
+    size_threshold = 250 * 1024 * 1024
+    array_size = np.prod(shape) * 4
+    return array_size >= size_threshold
 def convert_to_tensor(x, dtype=None, sparse=None, ragged=None):
     if ragged:
@@ -282,8 +372,6 @@ def is_tensor(x):
 def shape(x):
-    # This will work as long as we disallow
-    # dynamic shapes in JAX.
     return x.shape
@@ -315,31 +403,29 @@ def compute_output_spec(fn, *args, **kwargs):
             else:
                 maybe_symbolic_kwargs[k] = v
-        # Second, find out if there are dynamic shapes
-        has_none = False
-        for x in tree.flatten((maybe_symbolic_args, maybe_symbolic_kwargs)):
-            if isinstance(x, KerasTensor) and any(d is None for d in x.shape):
-                has_none = True
-        def convert_keras_tensor_to_jax(x, fill_value=None):
+        # Create a _DimExpr instance for one dimension by creating a symbolic
+        # shape with one dimension and extracting it.
+        #
+        # We create a single dynamic dimension and reuse it instead of creating
+        # N dynamic dimensions. This is for backwards compatibility. Previously
+        # we would fill all dynamic dimensions with the same concrete value.
+        # This can handle the case where there is an implicit assumption that
+        # two dimensions are the same (e.g. square images).
+        #
+        # We add the constraint "dynamic_dimension>=2" to prevent JAX from
+        # assuming that the dimension can be broadcastable or squeezable. It
+        # removes this ambiguity.
+        dynamic_dimension = jax_export.symbolic_shape(
+            "(dynamic_dimension)",
+            constraints=["dynamic_dimension>=2"],
+        )[0]
+        def convert_keras_tensor_to_jax(x):
             if isinstance(x, KerasTensor):
-                shape = list(x.shape)
-                if fill_value:
-                    for i, e in enumerate(shape):
-                        if e is None:
-                            shape[i] = fill_value
-                jax_tensor = jax.ShapeDtypeStruct(shape, dtype=x.dtype)
-                return jax_tensor
-            if isinstance(x, dict):
-                return {
-                    k: convert_keras_tensor_to_jax(v, fill_value=fill_value)
-                    for k, v in x.items()
-                }
-            if isinstance(x, list):
-                return [
-                    convert_keras_tensor_to_jax(xi, fill_value=fill_value)
-                    for xi in x
-                ]
+                shape = tuple(
+                    [d if d is not None else dynamic_dimension for d in x.shape]
+                )
+                return jax.ShapeDtypeStruct(shape, dtype=x.dtype)
             return x
         def wrapped_fn(*args, **kwargs):
@@ -374,63 +460,25 @@ def compute_output_spec(fn, *args, **kwargs):
             with StatelessScope():
                 return fn(*rec_args, **kwargs, **static_kwargs)
-        if has_none:
-            ms_args_1, ms_kwargs_1 = tree.map_structure(
-                lambda x: convert_keras_tensor_to_jax(x, fill_value=83),
-                (maybe_symbolic_args, maybe_symbolic_kwargs),
-            )
-            _, jax_out_1 = jax.make_jaxpr(wrapped_fn, return_shape=True)(
-                *ms_args_1, **ms_kwargs_1
-            )
-            ms_args_2, ms_kwargs_2 = tree.map_structure(
-                lambda x: convert_keras_tensor_to_jax(x, fill_value=89),
-                (maybe_symbolic_args, maybe_symbolic_kwargs),
-            )
-            _, jax_out_2 = jax.make_jaxpr(wrapped_fn, return_shape=True)(
-                *ms_args_2, **ms_kwargs_2
-            )
-            def merge_shapes(shape1, shape2):
-                return tuple(
-                    [d1 if d1 == d2 else None for d1, d2 in zip(shape1, shape2)]
-                )
-            def convert_jax_specs_to_keras_tensor(x1, x2):
-                if isinstance(x1, jax.ShapeDtypeStruct):
-                    if not isinstance(x2, jax.ShapeDtypeStruct):
-                        raise ValueError("Indeterministic output ordering.")
-                    return KerasTensor(
-                        merge_shapes(x1.shape, x2.shape), dtype=x1.dtype
-                    )
-                elif isinstance(x1, jax_sparse.BCOO):
-                    if not isinstance(x2, jax_sparse.BCOO):
-                        raise ValueError("Indeterministic output ordering.")
-                    return KerasTensor(
-                        merge_shapes(x1.shape, x2.shape),
-                        dtype=x1.dtype,
-                        sparse=True,
-                    )
-                else:
-                    return x1
-            return tree.map_structure(
-                convert_jax_specs_to_keras_tensor, jax_out_1, jax_out_2
-            )
-        maybe_symbolic_args, maybe_symbolic_kwargs = tree.map_structure(
+        maybe_symbolic_args_jax, maybe_symbolic_kwargs_jax = tree.map_structure(
             convert_keras_tensor_to_jax,
             (maybe_symbolic_args, maybe_symbolic_kwargs),
         )
-        _, jax_out = jax.make_jaxpr(wrapped_fn, return_shape=True)(
-            *maybe_symbolic_args, **maybe_symbolic_kwargs
+        jax_out = jax.eval_shape(
+            wrapped_fn, *maybe_symbolic_args_jax, **maybe_symbolic_kwargs_jax
         )
         def convert_jax_spec_to_keras_tensor(x):
             if isinstance(x, jax.ShapeDtypeStruct):
-                return KerasTensor(x.shape, x.dtype)
+                shape = tuple(
+                    d if isinstance(d, int) else None for d in x.shape
+                )
+                return KerasTensor(shape, x.dtype)
             elif isinstance(x, jax_sparse.BCOO):
-                return KerasTensor(x.shape, x.dtype, sparse=True)
+                shape = tuple(
+                    d if isinstance(d, int) else None for d in x.shape
+                )
+                return KerasTensor(shape, x.dtype, sparse=True)
             return x
         return tree.map_structure(convert_jax_spec_to_keras_tensor, jax_out)

keras/src/backend/jax/distribution_lib.py CHANGED Viewed

@@ -27,6 +27,20 @@ def list_devices(device_type=None):
     return [f"{device.platform}:{device.id}" for device in jax_devices]
+def get_device_count(device_type=None):
+    """Returns the number of available JAX devices.
+    Args:
+        device_type: Optional device type to count (e.g., "cpu", "gpu", "tpu").
+            If `None`, it defaults to counting "gpu" or "tpu" devices if
+            available, otherwise it counts "cpu" devices. It does not
+            return the sum of all device types.
+    Returns:
+        int: The total number of JAX devices for the specified type.
+    """
+    device_type = device_type.lower() if device_type else None
+    return jax.device_count(device_type)
 def distribute_variable(value, layout):
     """Create a distributed variable for JAX.
@@ -146,13 +160,13 @@ def initialize_rng():
     # Check if the global seed generator is set and ensure it has an initialized
     # seed.  Otherwise, reset the seed to the global seed.
     global_seed_generator = global_state.get_global_attribute(
-        "global_seed_generator"
+        seed_generator.GLOBAL_SEED_GENERATOR
     )
     if global_seed_generator is not None:
         seed = global_seed_generator.get_config()["seed"]
         if seed is None:
             global_state.set_global_attribute(
-                "global_seed_generator",
+                seed_generator.GLOBAL_SEED_GENERATOR,
                 seed_generator.SeedGenerator(
                     seed=global_seed,
                     name=global_seed_generator.name,

keras/src/backend/jax/layer.py CHANGED Viewed

@@ -3,7 +3,9 @@ from keras.src.backend.config import is_nnx_enabled
 if is_nnx_enabled():
     from flax import nnx
-    BaseLayer = nnx.Module
+    class BaseLayer(nnx.Module):
+        def __init_subclass__(cls, **kwargs):
+            super().__init_subclass__(pytree=False, **kwargs)
 else:
     BaseLayer = object

keras/src/backend/jax/linalg.py CHANGED Viewed

@@ -97,3 +97,7 @@ def lstsq(a, b, rcond=None):
     a = convert_to_tensor(a)
     b = convert_to_tensor(b)
     return jnp.linalg.lstsq(a, b, rcond=rcond)[0]
+def jvp(fun, primals, tangents, has_aux=False):
+    return jax.jvp(fun, primals, tangents, has_aux=has_aux)

keras-nightly 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl