PyPI - keras-nightly - Versions diffs - 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl - Mend

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

keras/__init__.py +1 -0
keras/_tf_keras/keras/__init__.py +1 -0
keras/_tf_keras/keras/callbacks/__init__.py +3 -0
keras/_tf_keras/keras/distillation/__init__.py +16 -0
keras/_tf_keras/keras/distribution/__init__.py +3 -0
keras/_tf_keras/keras/dtype_policies/__init__.py +6 -0
keras/_tf_keras/keras/layers/__init__.py +21 -0
keras/_tf_keras/keras/ops/__init__.py +16 -0
keras/_tf_keras/keras/ops/image/__init__.py +1 -0
keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +12 -0
keras/_tf_keras/keras/quantizers/__init__.py +13 -0
keras/callbacks/__init__.py +3 -0
keras/distillation/__init__.py +16 -0
keras/distribution/__init__.py +3 -0
keras/dtype_policies/__init__.py +6 -0
keras/layers/__init__.py +21 -0
keras/ops/__init__.py +16 -0
keras/ops/image/__init__.py +1 -0
keras/ops/linalg/__init__.py +1 -0
keras/ops/nn/__init__.py +3 -0
keras/ops/numpy/__init__.py +12 -0
keras/quantizers/__init__.py +13 -0
keras/src/applications/imagenet_utils.py +4 -1
keras/src/backend/common/backend_utils.py +30 -6
keras/src/backend/common/dtypes.py +6 -12
keras/src/backend/common/name_scope.py +2 -1
keras/src/backend/common/variables.py +38 -20
keras/src/backend/jax/core.py +126 -78
keras/src/backend/jax/distribution_lib.py +16 -2
keras/src/backend/jax/layer.py +3 -1
keras/src/backend/jax/linalg.py +4 -0
keras/src/backend/jax/nn.py +511 -29
keras/src/backend/jax/numpy.py +109 -23
keras/src/backend/jax/optimizer.py +3 -2
keras/src/backend/jax/trainer.py +18 -3
keras/src/backend/numpy/linalg.py +4 -0
keras/src/backend/numpy/nn.py +313 -2
keras/src/backend/numpy/numpy.py +97 -8
keras/src/backend/openvino/__init__.py +1 -0
keras/src/backend/openvino/core.py +6 -23
keras/src/backend/openvino/linalg.py +4 -0
keras/src/backend/openvino/nn.py +271 -20
keras/src/backend/openvino/numpy.py +1369 -195
keras/src/backend/openvino/random.py +7 -14
keras/src/backend/tensorflow/layer.py +43 -9
keras/src/backend/tensorflow/linalg.py +24 -0
keras/src/backend/tensorflow/nn.py +545 -1
keras/src/backend/tensorflow/numpy.py +351 -56
keras/src/backend/tensorflow/trainer.py +6 -2
keras/src/backend/torch/core.py +3 -1
keras/src/backend/torch/linalg.py +4 -0
keras/src/backend/torch/nn.py +125 -0
keras/src/backend/torch/numpy.py +109 -9
keras/src/backend/torch/trainer.py +8 -2
keras/src/callbacks/__init__.py +1 -0
keras/src/callbacks/callback_list.py +45 -11
keras/src/callbacks/model_checkpoint.py +5 -0
keras/src/callbacks/orbax_checkpoint.py +332 -0
keras/src/callbacks/terminate_on_nan.py +54 -5
keras/src/datasets/cifar10.py +5 -0
keras/src/distillation/__init__.py +1 -0
keras/src/distillation/distillation_loss.py +390 -0
keras/src/distillation/distiller.py +598 -0
keras/src/distribution/distribution_lib.py +14 -0
keras/src/dtype_policies/__init__.py +4 -0
keras/src/dtype_policies/dtype_policy.py +180 -1
keras/src/export/__init__.py +2 -0
keras/src/export/export_utils.py +39 -2
keras/src/export/litert.py +248 -0
keras/src/export/onnx.py +6 -0
keras/src/export/openvino.py +1 -1
keras/src/export/tf2onnx_lib.py +3 -0
keras/src/layers/__init__.py +13 -0
keras/src/layers/activations/softmax.py +9 -4
keras/src/layers/attention/attention.py +1 -1
keras/src/layers/attention/multi_head_attention.py +4 -1
keras/src/layers/core/dense.py +406 -102
keras/src/layers/core/einsum_dense.py +521 -116
keras/src/layers/core/embedding.py +257 -99
keras/src/layers/core/input_layer.py +1 -0
keras/src/layers/core/reversible_embedding.py +399 -0
keras/src/layers/input_spec.py +17 -17
keras/src/layers/layer.py +50 -15
keras/src/layers/merging/concatenate.py +6 -5
keras/src/layers/merging/dot.py +4 -1
keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
keras/src/layers/preprocessing/discretization.py +6 -5
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py +5 -5
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/layers/preprocessing/index_lookup.py +19 -1
keras/src/layers/preprocessing/normalization.py +16 -1
keras/src/layers/preprocessing/string_lookup.py +26 -28
keras/src/layers/regularization/dropout.py +43 -1
keras/src/layers/rnn/gru.py +1 -1
keras/src/layers/rnn/lstm.py +2 -2
keras/src/layers/rnn/rnn.py +19 -0
keras/src/layers/rnn/simple_rnn.py +1 -1
keras/src/legacy/preprocessing/image.py +4 -1
keras/src/legacy/preprocessing/sequence.py +20 -12
keras/src/losses/loss.py +1 -1
keras/src/losses/losses.py +24 -0
keras/src/metrics/confusion_metrics.py +7 -6
keras/src/models/cloning.py +4 -0
keras/src/models/functional.py +11 -3
keras/src/models/model.py +195 -44
keras/src/ops/image.py +257 -20
keras/src/ops/linalg.py +93 -0
keras/src/ops/nn.py +268 -2
keras/src/ops/numpy.py +701 -44
keras/src/ops/operation.py +90 -29
keras/src/ops/operation_utils.py +2 -0
keras/src/optimizers/adafactor.py +29 -10
keras/src/optimizers/base_optimizer.py +22 -3
keras/src/optimizers/loss_scale_optimizer.py +51 -18
keras/src/optimizers/muon.py +65 -31
keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
keras/src/quantizers/__init__.py +14 -1
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +346 -207
keras/src/quantizers/gptq_config.py +63 -13
keras/src/quantizers/gptq_core.py +328 -215
keras/src/quantizers/quantization_config.py +246 -0
keras/src/quantizers/quantizers.py +407 -38
keras/src/quantizers/utils.py +23 -0
keras/src/random/seed_generator.py +6 -4
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +26 -0
keras/src/saving/saving_api.py +37 -14
keras/src/saving/saving_lib.py +1 -1
keras/src/testing/__init__.py +1 -0
keras/src/testing/test_case.py +45 -5
keras/src/trainers/compile_utils.py +38 -17
keras/src/trainers/data_adapters/grain_dataset_adapter.py +1 -5
keras/src/tree/torchtree_impl.py +215 -0
keras/src/tree/tree_api.py +6 -1
keras/src/utils/backend_utils.py +31 -4
keras/src/utils/dataset_utils.py +234 -35
keras/src/utils/file_utils.py +49 -11
keras/src/utils/image_utils.py +14 -2
keras/src/utils/jax_layer.py +244 -55
keras/src/utils/module_utils.py +29 -0
keras/src/utils/progbar.py +10 -12
keras/src/utils/python_utils.py +5 -0
keras/src/utils/rng_utils.py +9 -1
keras/src/utils/tracking.py +70 -5
keras/src/version.py +1 -1
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +163 -142
keras/src/quantizers/gptq_quant.py +0 -133
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0

keras/src/backend/jax/numpy.py CHANGED Viewed

@@ -3,6 +3,7 @@ import math
 import jax.experimental.sparse as jax_sparse
 import jax.numpy as jnp
+from jax import export as jax_export
 from keras.src.backend import config
 from keras.src.backend.common import dtypes
@@ -306,14 +307,20 @@ def append(x1, x2, axis=None):
     return jnp.append(x1, x2, axis=axis)
-def arange(start, stop=None, step=1, dtype=None):
+def arange(start, stop=None, step=None, dtype=None):
+    def get_dtype(x):
+        if hasattr(x, "dtype"):
+            return x.dtype
+        if jax_export.is_symbolic_dim(x):
+            return int
+        return type(x)
     if dtype is None:
-        dtypes_to_resolve = [
-            getattr(start, "dtype", type(start)),
-            getattr(step, "dtype", type(step)),
-        ]
+        dtypes_to_resolve = [get_dtype(start)]
         if stop is not None:
-            dtypes_to_resolve.append(getattr(stop, "dtype", type(stop)))
+            dtypes_to_resolve.append(get_dtype(stop))
+        if step is not None:
+            dtypes_to_resolve.append(get_dtype(step))
         dtype = dtypes.result_type(*dtypes_to_resolve)
     dtype = standardize_dtype(dtype)
     return jnp.arange(start, stop, step=step, dtype=dtype)
@@ -439,6 +446,11 @@ def array(x, dtype=None):
     return jnp.array(x, dtype=dtype)
+def view(x, dtype=None):
+    x = convert_to_tensor(x)
+    return x.view(dtype=dtype)
 def average(x, axis=None, weights=None):
     x = convert_to_tensor(x)
     dtypes_to_resolve = [x.dtype, float]
@@ -536,15 +548,18 @@ def clip(x, x_min, x_max):
 def concatenate(xs, axis=0):
     bcoo_count = builtins.sum(isinstance(x, jax_sparse.BCOO) for x in xs)
-    if bcoo_count:
-        if bcoo_count == len(xs):
-            axis = canonicalize_axis(axis, len(xs[0].shape))
-            return jax_sparse.bcoo_concatenate(xs, dimension=axis)
-        else:
-            xs = [
-                x.todense() if isinstance(x, jax_sparse.JAXSparse) else x
-                for x in xs
-            ]
+    if bcoo_count == len(xs):
+        axis = canonicalize_axis(axis, len(xs[0].shape))
+        return jax_sparse.bcoo_concatenate(xs, dimension=axis)
+    elif bcoo_count:
+        xs = [
+            x.todense()
+            if isinstance(x, jax_sparse.JAXSparse)
+            else convert_to_tensor(x)
+            for x in xs
+        ]
+    else:
+        xs = [convert_to_tensor(x) for x in xs]
     return jnp.concatenate(xs, axis=axis)
@@ -663,6 +678,10 @@ def empty(shape, dtype=None):
     return jnp.empty(shape, dtype=dtype)
+def empty_like(x, dtype=None):
+    return jnp.empty_like(x, dtype=dtype)
 def equal(x1, x2):
     x1 = convert_to_tensor(x1)
     x2 = convert_to_tensor(x2)
@@ -809,6 +828,36 @@ def isposinf(x):
     return jnp.isposinf(x)
+def isreal(x):
+    x = convert_to_tensor(x)
+    return jnp.isreal(x)
+def kron(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    return jnp.kron(x1, x2)
+def lcm(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    return jnp.lcm(x1, x2)
+def ldexp(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    if standardize_dtype(x2.dtype) not in dtypes.INT_TYPES:
+        raise TypeError(
+            f"ldexp exponent must be an integer type. "
+            f"Received: x2 dtype={x2.dtype}"
+        )
+    return jnp.ldexp(x1, x2)
 def less(x1, x2):
     x1 = convert_to_tensor(x1)
     x2 = convert_to_tensor(x2)
@@ -876,6 +925,15 @@ def logaddexp(x1, x2):
     return jnp.logaddexp(x1, x2)
+def logaddexp2(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    dtype = dtypes.result_type(x1.dtype, x2.dtype, float)
+    x1 = cast(x1, dtype)
+    x2 = cast(x2, dtype)
+    return jnp.logaddexp2(x1, x2)
 def logical_and(x1, x2):
     x1 = convert_to_tensor(x1)
     x2 = convert_to_tensor(x2)
@@ -1005,6 +1063,11 @@ def prod(x, axis=None, keepdims=False, dtype=None):
     return jnp.prod(x, axis=axis, keepdims=keepdims, dtype=dtype)
+def ptp(x, axis=None, keepdims=False):
+    x = convert_to_tensor(x)
+    return jnp.ptp(x, axis=axis, keepdims=keepdims)
 def quantile(x, q, axis=None, method="linear", keepdims=False):
     x = convert_to_tensor(x)
     q = convert_to_tensor(q)
@@ -1059,6 +1122,7 @@ def reshape(x, newshape):
         if None not in output_shape:
             newshape = output_shape
         return jax_sparse.bcoo_reshape(x, new_sizes=newshape)
+    x = convert_to_tensor(x)
     return jnp.reshape(x, newshape)
@@ -1121,10 +1185,17 @@ def sort(x, axis=-1):
 def split(x, indices_or_sections, axis=0):
+    x = convert_to_tensor(x)
     return jnp.split(x, indices_or_sections, axis=axis)
+def array_split(x, indices_or_sections, axis=0):
+    x = convert_to_tensor(x)
+    return jnp.array_split(x, indices_or_sections, axis=axis)
 def stack(x, axis=0):
+    x = [convert_to_tensor(t) for t in x]
     return jnp.stack(x, axis=axis)
@@ -1147,6 +1218,8 @@ def take(x, indices, axis=None):
 def take_along_axis(x, indices, axis=None):
+    x = convert_to_tensor(x)
+    indices = convert_to_tensor(indices, sparse=False)
     return jnp.take_along_axis(x, indices, axis=axis)
@@ -1201,14 +1274,7 @@ def tile(x, repeats):
 def trace(x, offset=0, axis1=0, axis2=1):
     x = convert_to_tensor(x)
-    dtype = None
-    # TODO: Remove the condition of uint8 and uint16 once we have jax>=0.4.27
-    # for both CPU & GPU environments.
-    # uint8 and uint16 will be casted to uint32 when jax>=0.4.27 but to int32
-    # otherwise.
-    if standardize_dtype(x.dtype) in ("bool", "uint8", "uint16"):
-        dtype = "int32"
-    return jnp.trace(x, offset=offset, axis1=axis1, axis2=axis2, dtype=dtype)
+    return jnp.trace(x, offset=offset, axis1=axis1, axis2=axis2)
 def tri(N, M=None, k=0, dtype=None):
@@ -1290,6 +1356,12 @@ def negative(x):
     return jnp.negative(x)
+def nextafter(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    return jnp.nextafter(x1, x2)
 @sparse.elementwise_unary(linear=False)
 def square(x):
     x = convert_to_tensor(x)
@@ -1310,6 +1382,7 @@ def squeeze(x, axis=None):
             axis = tuple(i for i, d in enumerate(x.shape) if d == 1)
         axis = to_tuple_or_list(axis)
         return jax_sparse.bcoo_squeeze(x, dimensions=axis)
+    x = convert_to_tensor(x)
     return jnp.squeeze(x, axis=axis)
@@ -1328,6 +1401,19 @@ def transpose(x, axes=None):
     return jnp.transpose(x, axes=axes)
+def trapezoid(y, x=None, dx=1.0, axis=-1):
+    y = convert_to_tensor(y)
+    if x is not None:
+        x = convert_to_tensor(x)
+    dx = convert_to_tensor(dx)
+    return jnp.trapezoid(y, x, dx=dx, axis=axis)
+def vander(x, N=None, increasing=False):
+    x = convert_to_tensor(x)
+    return jnp.vander(x, N=N, increasing=increasing)
 def var(x, axis=None, keepdims=False):
     x = convert_to_tensor(x)
     # `jnp.var` does not handle low precision (e.g., float16) overflow

keras/src/backend/jax/optimizer.py CHANGED Viewed

@@ -36,13 +36,14 @@ class JaxOptimizer(base_optimizer.BaseOptimizer):
             new_g_accs = jax.lax.cond(
                 is_update_step,
                 lambda: [jnp.zeros(g.shape, dtype=g.dtype) for g in acc_grads],
-                lambda: [g + acc_g for g, acc_g in zip(grads, acc_grads)],
+                lambda: [g + acc_g.value for g, acc_g in zip(grads, acc_grads)],
             )
             grads = jax.lax.cond(
                 is_update_step,
                 lambda: [
-                    (g + acc_g) / steps for g, acc_g in zip(grads, acc_grads)
+                    (g + acc_g.value) / steps
+                    for g, acc_g in zip(grads, acc_grads)
                 ],
                 lambda: list(grads),
             )

keras/src/backend/jax/trainer.py CHANGED Viewed

@@ -105,7 +105,10 @@ class JAXTrainer(base_trainer.Trainer):
             ]
         ) as scope:
             self._loss_tracker.update_state(
-                unscaled_loss, sample_weight=tree.flatten(x)[0].shape[0]
+                unscaled_loss,
+                sample_weight=next(
+                    i for i in tree.flatten(x) if i is not None
+                ).shape[0],
             )
             logs = self.compute_metrics(x, y, y_pred, sample_weight)
@@ -263,8 +266,14 @@ class JAXTrainer(base_trainer.Trainer):
             if distribution_lib.distribution() is not None:
                 state_shardings = self._get_state_sharding_spec()
                 out_shardings = (None, state_shardings)
+            if is_nnx_enabled():
+                step_fn = lambda state, data: type(self).train_step(
+                    self, state, data
+                )
+            else:
+                step_fn = self.train_step
             train_step = jit(
-                self.train_step,
+                step_fn,
                 donate_argnums=0,
                 out_shardings=out_shardings,
             )
@@ -293,8 +302,14 @@ class JAXTrainer(base_trainer.Trainer):
                     metrics_shardings,
                 )
                 out_shardings = (None, state_shardings)
+            if is_nnx_enabled():
+                step_fn = lambda state, data: type(self).test_step(
+                    self, state, data
+                )
+            else:
+                step_fn = self.test_step
             test_step = jit(
-                self.test_step,
+                step_fn,
                 donate_argnums=0,
                 out_shardings=out_shardings,
             )

keras/src/backend/numpy/linalg.py CHANGED Viewed

@@ -96,3 +96,7 @@ def lstsq(a, b, rcond=None):
     a = convert_to_tensor(a)
     b = convert_to_tensor(b)
     return np.linalg.lstsq(a, b, rcond=rcond)[0]
+def jvp(fun, primals, tangents, has_aux=False):
+    raise NotImplementedError("JVP is not supported by the Numpy backend.")

keras/src/backend/numpy/nn.py CHANGED Viewed

@@ -3,6 +3,9 @@ import numpy as np
 from jax import lax
 from keras.src import backend
+from keras.src.backend.common.backend_utils import (
+    compute_adaptive_pooling_window_sizes,
+)
 from keras.src.backend.common.backend_utils import (
     compute_conv_transpose_padding_args_for_jax,
 )
@@ -164,13 +167,14 @@ def celu(x, alpha=1.0):
 def glu(x, axis=-1):
     x = convert_to_tensor(x)
+    dtype = x.dtype
     if x.shape[axis] % 2 != 0:
         raise ValueError(
             "axis size must be divisible by 2. "
             f"Received: x.shape={x.shape} with axis={axis}"
         )
     x1, x2 = np.split(x, 2, axis)
-    return x1 * (1 / (1 + np.exp(-x2)))
+    return (x1 * sigmoid(x2)).astype(dtype)
 def hard_tanh(x):
@@ -339,6 +343,252 @@ def average_pool(
         return pooled / window_counts
+def _compute_adaptive_pooling_gather_indices(
+    input_dim, output_size, big_window
+):
+    window_starts = np.floor(
+        (np.arange(output_size) * input_dim) / output_size
+    ).astype(np.int32)
+    window_ends = np.ceil(
+        (np.arange(1, output_size + 1) * input_dim) / output_size
+    ).astype(np.int32)
+    window_sizes = window_ends - window_starts
+    is_big = window_sizes == big_window
+    small_window = big_window - 1
+    small_pool_len = input_dim - small_window + 1
+    small_indices = window_starts
+    big_indices = window_starts + small_pool_len
+    gather = np.where(is_big, big_indices, small_indices)
+    return gather.astype(np.int32)
+def _strided_view_1d(x, window_size):
+    n, l, c = x.shape
+    out = l - window_size + 1
+    strides = x.strides
+    shape = (n, out, window_size, c)
+    new_strides = (strides[0], strides[1], strides[1], strides[2])
+    return np.lib.stride_tricks.as_strided(x, shape=shape, strides=new_strides)
+def _adaptive_pool1d_impl(inputs, output_size, mode, data_format):
+    if isinstance(output_size, int):
+        output_size = (output_size,)
+    if data_format == "channels_first":
+        inputs = np.transpose(inputs, (0, 2, 1))
+    n, l, c = inputs.shape
+    out_l = output_size[0]
+    small, big = compute_adaptive_pooling_window_sizes(l, out_l)
+    gather = _compute_adaptive_pooling_gather_indices(l, out_l, big)
+    sv_small = _strided_view_1d(inputs, small)
+    small_pool = (
+        np.mean(sv_small, axis=2)
+        if mode == "average"
+        else np.max(sv_small, axis=2)
+    )
+    sv_big = _strided_view_1d(inputs, big)
+    big_pool = (
+        np.mean(sv_big, axis=2) if mode == "average" else np.max(sv_big, axis=2)
+    )
+    combined = np.concatenate([small_pool, big_pool], axis=1)
+    out = combined[:, gather, :]
+    if data_format == "channels_first":
+        out = np.transpose(out, (0, 2, 1))
+    return out
+def _adaptive_pool2d_impl(inputs, output_size, mode, data_format):
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size)
+    if data_format == "channels_first":
+        inputs = np.transpose(inputs, (0, 2, 3, 1))
+    n, h, w, c = inputs.shape
+    out_h, out_w = output_size
+    small_h, big_h = compute_adaptive_pooling_window_sizes(h, out_h)
+    gather_h = _compute_adaptive_pooling_gather_indices(h, out_h, big_h)
+    x_h = np.transpose(inputs, (0, 2, 1, 3)).reshape(n * w, h, c)
+    sv_small_h = _strided_view_1d(x_h, small_h)
+    small_pool_h = (
+        np.mean(sv_small_h, axis=2)
+        if mode == "average"
+        else np.max(sv_small_h, axis=2)
+    )
+    sv_big_h = _strided_view_1d(x_h, big_h)
+    big_pool_h = (
+        np.mean(sv_big_h, axis=2)
+        if mode == "average"
+        else np.max(sv_big_h, axis=2)
+    )
+    combined_h = np.concatenate([small_pool_h, big_pool_h], axis=1)
+    pooled_h = combined_h[:, gather_h, :]
+    pooled_h = pooled_h.reshape(n, w, out_h, c)
+    pooled_h = np.transpose(pooled_h, (0, 2, 1, 3))
+    small_w, big_w = compute_adaptive_pooling_window_sizes(w, out_w)
+    gather_w = _compute_adaptive_pooling_gather_indices(w, out_w, big_w)
+    x_w = pooled_h.reshape(n * out_h, w, c)
+    sv_small_w = _strided_view_1d(x_w, small_w)
+    small_pool_w = (
+        np.mean(sv_small_w, axis=2)
+        if mode == "average"
+        else np.max(sv_small_w, axis=2)
+    )
+    sv_big_w = _strided_view_1d(x_w, big_w)
+    big_pool_w = (
+        np.mean(sv_big_w, axis=2)
+        if mode == "average"
+        else np.max(sv_big_w, axis=2)
+    )
+    combined_w = np.concatenate([small_pool_w, big_pool_w], axis=1)
+    out = combined_w[:, gather_w, :].reshape(n, out_h, out_w, c)
+    if data_format == "channels_first":
+        out = np.transpose(out, (0, 3, 1, 2))
+    return out
+def _adaptive_pool3d_impl(inputs, output_size, mode, data_format):
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size, output_size)
+    if data_format == "channels_first":
+        inputs = np.transpose(inputs, (0, 2, 3, 4, 1))
+    n, d, h, w, c = inputs.shape
+    out_d, out_h, out_w = output_size
+    small_d, big_d = compute_adaptive_pooling_window_sizes(d, out_d)
+    gather_d = _compute_adaptive_pooling_gather_indices(d, out_d, big_d)
+    x_d = np.transpose(inputs, (0, 2, 3, 1, 4)).reshape(n * h * w, d, c)
+    sv_small_d = _strided_view_1d(x_d, small_d)
+    small_pool_d = (
+        np.mean(sv_small_d, axis=2)
+        if mode == "average"
+        else np.max(sv_small_d, axis=2)
+    )
+    sv_big_d = _strided_view_1d(x_d, big_d)
+    big_pool_d = (
+        np.mean(sv_big_d, axis=2)
+        if mode == "average"
+        else np.max(sv_big_d, axis=2)
+    )
+    combined_d = np.concatenate([small_pool_d, big_pool_d], axis=1)
+    pooled_d = combined_d[:, gather_d, :].reshape(n, h, w, out_d, c)
+    pooled_d = np.transpose(pooled_d, (0, 3, 1, 2, 4))
+    small_h, big_h = compute_adaptive_pooling_window_sizes(h, out_h)
+    gather_h = _compute_adaptive_pooling_gather_indices(h, out_h, big_h)
+    x_h = np.transpose(pooled_d, (0, 1, 3, 2, 4)).reshape(n * out_d * w, h, c)
+    sv_small_h = _strided_view_1d(x_h, small_h)
+    small_pool_h = (
+        np.mean(sv_small_h, axis=2)
+        if mode == "average"
+        else np.max(sv_small_h, axis=2)
+    )
+    sv_big_h = _strided_view_1d(x_h, big_h)
+    big_pool_h = (
+        np.mean(sv_big_h, axis=2)
+        if mode == "average"
+        else np.max(sv_big_h, axis=2)
+    )
+    combined_h = np.concatenate([small_pool_h, big_pool_h], axis=1)
+    pooled_h = combined_h[:, gather_h, :].reshape(n, out_d, w, out_h, c)
+    pooled_h = np.transpose(pooled_h, (0, 1, 3, 2, 4))
+    small_w, big_w = compute_adaptive_pooling_window_sizes(w, out_w)
+    gather_w = _compute_adaptive_pooling_gather_indices(w, out_w, big_w)
+    x_w = pooled_h.reshape(n * out_d * out_h, w, c)
+    sv_small_w = _strided_view_1d(x_w, small_w)
+    small_pool_w = (
+        np.mean(sv_small_w, axis=2)
+        if mode == "average"
+        else np.max(sv_small_w, axis=2)
+    )
+    sv_big_w = _strided_view_1d(x_w, big_w)
+    big_pool_w = (
+        np.mean(sv_big_w, axis=2)
+        if mode == "average"
+        else np.max(sv_big_w, axis=2)
+    )
+    combined_w = np.concatenate([small_pool_w, big_pool_w], axis=1)
+    out = combined_w[:, gather_w, :].reshape(n, out_d, out_h, out_w, c)
+    if data_format == "channels_first":
+        out = np.transpose(out, (0, 4, 1, 2, 3))
+    return out
+def adaptive_average_pool(inputs, output_size, data_format=None):
+    data_format = backend.standardize_data_format(data_format)
+    dims = inputs.ndim - 2
+    if dims == 1:
+        return _adaptive_pool1d_impl(
+            inputs, output_size, "average", data_format
+        )
+    if dims == 2:
+        return _adaptive_pool2d_impl(
+            inputs, output_size, "average", data_format
+        )
+    if dims == 3:
+        return _adaptive_pool3d_impl(
+            inputs, output_size, "average", data_format
+        )
+    raise ValueError("adaptive_average_pool supports only 1D/2D/3D")
+def adaptive_max_pool(inputs, output_size, data_format=None):
+    data_format = backend.standardize_data_format(data_format)
+    dims = inputs.ndim - 2
+    if dims == 1:
+        return _adaptive_pool1d_impl(inputs, output_size, "max", data_format)
+    if dims == 2:
+        return _adaptive_pool2d_impl(inputs, output_size, "max", data_format)
+    if dims == 3:
+        return _adaptive_pool3d_impl(inputs, output_size, "max", data_format)
+    raise ValueError("adaptive_max_pool supports only 1D/2D/3D")
 def _convert_to_lax_conv_dimension_numbers(
     num_spatial_dims,
     data_format="channels_last",
@@ -403,7 +653,7 @@ def conv(
             f"kernel in_channels {kernel_in_channels}. "
         )
     feature_group_count = channels // kernel_in_channels
-    return np.array(
+    result = np.array(
         jax.lax.conv_general_dilated(
             inputs,
             kernel if is_tensor(kernel) else kernel.numpy(),
@@ -414,6 +664,14 @@ def conv(
             feature_group_count=feature_group_count,
         )
     )
+    if result.size == 0:
+        raise ValueError(
+            "The convolution operation resulted in an empty output. "
+            "This can happen if the input is too small for the given "
+            "kernel size, strides, dilation rate, and padding mode. "
+            "Please check the input shape and convolution parameters."
+        )
+    return result
 def depthwise_conv(
@@ -1175,3 +1433,56 @@ def dot_product_attention(
     return _dot_product_attention_xla(
         query, key, value, bias, mask, is_causal, scale
     )
+def unfold(input, kernel_size, dilation=1, padding=0, stride=1):
+    """NumPy implementation of Unfold.
+    Extract sliding local blocks from a **NCHW** batched image tensor.
+    Args:
+        input: 4-D tensor, shape (N, C, H, W)  **required**.
+        kernel_size: int or (kH, kW)
+        dilation: int or (dH, dW), default 1
+        padding: int or (pH, pW), default 0
+        stride: int or (sH, sW), default 1
+    Returns:
+        3-D tensor, shape (N, C*kH*kW, L)
+    """
+    def _pair(x):
+        return (x, x) if isinstance(x, int) else x
+    k = _pair(kernel_size)
+    d = _pair(dilation)
+    p = _pair(padding)
+    s = _pair(stride)
+    N, C, H, W = input.shape
+    # ---- padding ----
+    if any(_ > 0 for _ in p):
+        input = np.pad(
+            input, ((0, 0), (0, 0), (p[0], p[0]), (p[1], p[1])), mode="constant"
+        )
+    # ----  spatial size ----
+    oH = (input.shape[2] - (k[0] - 1) * d[0] - 1) // s[0] + 1
+    oW = (input.shape[3] - (k[1] - 1) * d[1] - 1) // s[1] + 1
+    i0 = np.arange(0, oH) * s[0]
+    j0 = np.arange(0, oW) * s[1]
+    i, j = np.meshgrid(i0, j0, indexing="ij")  # shape (oH, oW)
+    i = i.reshape(-1)
+    j = j.reshape(-1)
+    # ---- flatten patches ----
+    patches = np.empty((N, C, k[0], k[1], oH * oW), dtype=input.dtype)
+    for idx in range(k[0]):
+        for jdx in range(k[1]):
+            patches[:, :, idx, jdx, :] = input[
+                :, :, i + idx * d[0], j + jdx * d[1]
+            ]
+    # ---- reshape -> (N, C*kH*kW, L) ----
+    return patches.reshape(N, C * k[0] * k[1], -1)

keras-nightly 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl