PyPI - keras-nightly - Versions diffs - 3.14.0.dev2025122704__py3-none-any.whl → 3.14.0.dev2026012204__py3-none-any.whl - Mend

keras-nightly 3.14.0.dev2025122704py3-none-any.whl → 3.14.0.dev2026012204py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/ops/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +3 -0
keras/_tf_keras/keras/quantizers/__init__.py +1 -0
keras/dtype_policies/__init__.py +3 -0
keras/ops/__init__.py +3 -0
keras/ops/numpy/__init__.py +3 -0
keras/quantizers/__init__.py +1 -0
keras/src/backend/jax/nn.py +26 -9
keras/src/backend/jax/numpy.py +16 -0
keras/src/backend/numpy/numpy.py +23 -0
keras/src/backend/openvino/numpy.py +369 -16
keras/src/backend/tensorflow/numpy.py +34 -1
keras/src/backend/tensorflow/rnn.py +17 -7
keras/src/backend/torch/numpy.py +36 -0
keras/src/backend/torch/rnn.py +28 -11
keras/src/callbacks/orbax_checkpoint.py +75 -42
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +90 -1
keras/src/layers/core/dense.py +122 -6
keras/src/layers/core/einsum_dense.py +151 -7
keras/src/layers/core/embedding.py +1 -1
keras/src/layers/core/reversible_embedding.py +10 -1
keras/src/layers/layer.py +5 -0
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/center_crop.py +13 -15
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/losses/losses.py +24 -0
keras/src/models/model.py +18 -9
keras/src/ops/image.py +109 -96
keras/src/ops/numpy.py +181 -0
keras/src/quantizers/__init__.py +2 -0
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +1 -2
keras/src/quantizers/gptq_core.py +1 -1
keras/src/quantizers/quantization_config.py +14 -0
keras/src/quantizers/quantizers.py +61 -52
keras/src/random/seed_generator.py +2 -2
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +50 -0
keras/src/saving/saving_api.py +37 -14
keras/src/utils/jax_layer.py +69 -31
keras/src/utils/module_utils.py +11 -0
keras/src/utils/tracking.py +5 -5
keras/src/version.py +1 -1
{keras_nightly-3.14.0.dev2025122704.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/METADATA +1 -1
{keras_nightly-3.14.0.dev2025122704.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/RECORD +53 -49
{keras_nightly-3.14.0.dev2025122704.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/WHEEL +1 -1
{keras_nightly-3.14.0.dev2025122704.dist-info → keras_nightly-3.14.0.dev2026012204.dist-info}/top_level.txt +0 -0

keras/src/backend/openvino/numpy.py CHANGED Viewed

@@ -4,6 +4,7 @@ from openvino import Type
 from keras.src.backend import config
 from keras.src.backend.common import dtypes
+from keras.src.backend.common.backend_utils import canonicalize_axis
 from keras.src.backend.common.variables import standardize_dtype
 from keras.src.backend.openvino.core import DTYPES_MAX
 from keras.src.backend.openvino.core import DTYPES_MIN
@@ -705,7 +706,16 @@ def broadcast_to(x, shape):
 def cbrt(x):
-    raise NotImplementedError("`cbrt` is not supported with openvino backend")
+    x = get_ov_output(x)
+    x_type = x.get_element_type()
+    if x_type.is_integral() or x_type == Type.boolean:
+        x = ov_opset.convert(x, OPENVINO_DTYPES[config.floatx()]).output(0)
+    sign_x = ov_opset.sign(x)
+    abs_x = ov_opset.absolute(x)
+    one_third = ov_opset.constant(1.0 / 3.0, x.get_element_type())
+    root_abs = ov_opset.power(abs_x, one_third)
+    res = ov_opset.multiply(sign_x, root_abs)
+    return OpenVINOKerasTensor(res.output(0))
 def ceil(x):
@@ -893,9 +903,53 @@ def diag(x, k=0):
 def diagonal(x, offset=0, axis1=0, axis2=1):
-    raise NotImplementedError(
-        "`diagonal` is not supported with openvino backend"
-    )
+    x = get_ov_output(x)
+    shape = x.get_partial_shape()
+    rank = x.get_partial_shape().rank.get_length()
+    if rank is None:
+        raise ValueError("`diagonal` requires input tensor with static rank.")
+    if rank < 2:
+        raise ValueError(
+            f"diagonal requires input tensor with rank >= 2.Given rank: {rank}"
+        )
+    axis1 = canonicalize_axis(axis1, rank)
+    axis2 = canonicalize_axis(axis2, rank)
+    if axis1 == axis2:
+        raise ValueError("`axis1` and `axis2` cannot be the same.")
+    perm_order = [axis1, axis2] + [
+        i for i in range(rank) if i != axis1 and i != axis2
+    ]
+    perm_const = ov_opset.constant(perm_order, dtype=Type.i32).output(0)
+    x_transposed = ov_opset.transpose(x, perm_const)
+    N_dim = shape[axis1]
+    M_dim = shape[axis2]
+    if not N_dim.is_static or not M_dim.is_static:
+        raise ValueError(
+            "`diagonal` requires input tensor with static shape for axes "
+            f"`axis1` ({axis1}) and `axis2` ({axis2})."
+        )
+    N = N_dim.get_length()
+    M = M_dim.get_length()
+    if offset >= 0:
+        L = np.minimum(N, M - offset) if (M - offset) > 0 else 0
+        indices = [[i, i + offset] for i in range(L)]
+    else:
+        L = np.minimum(N + offset, M) if (N + offset) > 0 else 0
+        indices = [[i - offset, i] for i in range(L)]
+    indices = np.array(indices, dtype=np.int32).reshape(L, 2)
+    indices_const = ov_opset.constant(indices, dtype=Type.i32).output(0)
+    diag_gathered = ov_opset.gather_nd(x_transposed, indices_const)
+    out_rank = rank - 1
+    out_perm_order = list(range(1, out_rank)) + [0]
+    out_perm_const = ov_opset.constant(out_perm_order, dtype=Type.i32).output(0)
+    final_output = ov_opset.transpose(diag_gathered, out_perm_const)
+    return OpenVINOKerasTensor(final_output.output(0))
 def diff(a, n=1, axis=-1):
@@ -1071,7 +1125,94 @@ def expm1(x):
 def flip(x, axis=None):
-    raise NotImplementedError("`flip` is not supported with openvino backend")
+    x_node = get_ov_output(x)
+    # Using OpenVINO tensor shape
+    ndim = len(x_node.get_partial_shape())
+    if ndim is None:
+        raise ValueError(
+            "The `flip` operation does not support tensors with dynamic rank "
+            "for the OpenVINO backend."
+        )
+    if axis is None:
+        axis = list(range(ndim))
+    elif isinstance(axis, int):
+        axis = [axis]
+    axis = [a + ndim if a < 0 else a for a in axis]
+    begin = [0] * ndim
+    end = [0] * ndim
+    strides = [1] * ndim
+    for a in axis:
+        strides[a] = -1
+    all_ones_mask = [1] * ndim
+    result = ov_opset.strided_slice(
+        data=x_node,
+        begin=begin,
+        end=end,
+        strides=strides,
+        begin_mask=all_ones_mask,
+        end_mask=all_ones_mask,
+    )
+    return OpenVINOKerasTensor(result.output(0))
+def rot90(array, k=1, axes=(0, 1)):
+    """Rotate an array by 90 degrees in the plane specified by axes."""
+    array = get_ov_output(array)
+    if not isinstance(axes, (tuple, list)) or len(axes) != 2:
+        raise ValueError("axes must be a tuple of length 2")
+    shape = array.get_partial_shape()
+    ndim = shape.rank.get_length()
+    if ndim is None:
+        raise ValueError(
+            "`rot90` does not support tensors with dynamic rank "
+            "for the OpenVINO backend."
+        )
+    axis1 = canonicalize_axis(axes[0], ndim)
+    axis2 = canonicalize_axis(axes[1], ndim)
+    if axis1 == axis2:
+        raise ValueError("axes must be different")
+    k = k % 4
+    if k == 0:
+        return OpenVINOKerasTensor(array)
+    result = array
+    for _ in range(k):
+        # 1️ Transpose axis1 <-> axis2
+        perm = list(range(ndim))
+        perm[axis1], perm[axis2] = perm[axis2], perm[axis1]
+        perm_const = ov_opset.constant(perm, Type.i32).output(0)
+        result = ov_opset.transpose(result, perm_const).output(0)
+        # 2️ Reverse along axis1 using StridedSlice
+        begin = [0] * ndim
+        end = [0] * ndim
+        strides = [1] * ndim
+        strides[axis1] = -1
+        begin_mask = [1] * ndim
+        end_mask = [1] * ndim
+        result = ov_opset.strided_slice(
+            data=result,
+            begin=begin,
+            end=end,
+            strides=strides,
+            begin_mask=begin_mask,
+            end_mask=end_mask,
+        ).output(0)
+    return OpenVINOKerasTensor(result)
 def floor(x):
@@ -1150,7 +1291,34 @@ def hstack(xs):
 def hypot(x1, x2):
-    raise NotImplementedError("`hypot` is not supported with openvino backend")
+    element_type = None
+    if isinstance(x1, OpenVINOKerasTensor):
+        element_type = x1.output.get_element_type()
+    if isinstance(x2, OpenVINOKerasTensor):
+        element_type = x2.output.get_element_type()
+    x1 = get_ov_output(x1, element_type)
+    x2 = get_ov_output(x2, element_type)
+    x1, x2 = _align_operand_types(x1, x2, "hypot()")
+    x_type = x1.get_element_type()
+    if x_type.is_integral() or x_type == Type.boolean:
+        ov_type = OPENVINO_DTYPES[config.floatx()]
+        x1 = ov_opset.convert(x1, ov_type)
+        x2 = ov_opset.convert(x2, ov_type)
+    x1_abs = ov_opset.absolute(x1)
+    x2_abs = ov_opset.absolute(x2)
+    max_val = ov_opset.maximum(x1_abs, x2_abs)
+    min_val = ov_opset.minimum(x1_abs, x2_abs)
+    one = ov_opset.constant(1, max_val.get_element_type())
+    is_zero_mask = ov_opset.equal(
+        max_val, ov_opset.constant(0, max_val.get_element_type())
+    )
+    safe_divisor = ov_opset.select(is_zero_mask, one, max_val)
+    ratio = ov_opset.divide(min_val, safe_divisor)
+    result = ov_opset.multiply(
+        max_val,
+        ov_opset.sqrt(ov_opset.add(one, ov_opset.multiply(ratio, ratio))),
+    )
+    return OpenVINOKerasTensor(result.output(0))
 def identity(n, dtype=None):
@@ -1287,7 +1455,66 @@ def isreal(x):
 def kron(x1, x2):
-    raise NotImplementedError("`kron` is not supported with openvino backend")
+    x1 = get_ov_output(x1)
+    x2 = get_ov_output(x2)
+    x1, x2 = _align_operand_types(x1, x2, "kron()")
+    x1_shape = x1.get_partial_shape()
+    x2_shape = x2.get_partial_shape()
+    if x1_shape.rank.is_dynamic or x2_shape.rank.is_dynamic:
+        raise ValueError(
+            "`kron` does not support tensors with dynamic rank for "
+            "the OpenVINO backend."
+        )
+    ndim1 = x1_shape.rank.get_length()
+    ndim2 = x2_shape.rank.get_length()
+    if ndim1 < ndim2:
+        axes = ov_opset.range(
+            ov_opset.constant(0, Type.i32),
+            ov_opset.constant(ndim2 - ndim1, Type.i32),
+            ov_opset.constant(1, Type.i32),
+        )
+        x1 = ov_opset.unsqueeze(x1, axes)
+        ndim1 = ndim2
+    elif ndim2 < ndim1:
+        axes = ov_opset.range(
+            ov_opset.constant(0, Type.i32),
+            ov_opset.constant(ndim1 - ndim2, Type.i32),
+            ov_opset.constant(1, Type.i32),
+        )
+        x2 = ov_opset.unsqueeze(x2, axes)
+        ndim2 = ndim1
+    shape1 = ov_opset.shape_of(x1, Type.i32)
+    shape2 = ov_opset.shape_of(x2, Type.i32)
+    ones = ov_opset.broadcast(
+        ov_opset.constant(1, Type.i32), ov_opset.constant([ndim1], Type.i32)
+    )
+    axis = ov_opset.constant(1, Type.i32)
+    flatten = ov_opset.constant([-1], Type.i32)
+    unsqueezed_ones = ov_opset.unsqueeze(ones, axis)
+    x1_new_shape = ov_opset.reshape(
+        ov_opset.concat(
+            [ov_opset.unsqueeze(shape1, axis), unsqueezed_ones],
+            axis=1,
+        ),
+        flatten,
+        False,
+    )
+    x2_new_shape = ov_opset.reshape(
+        ov_opset.concat(
+            [unsqueezed_ones, ov_opset.unsqueeze(shape2, axis)],
+            axis=1,
+        ),
+        flatten,
+        False,
+    )
+    result = ov_opset.multiply(
+        ov_opset.reshape(x1, x1_new_shape, False),
+        ov_opset.reshape(x2, x2_new_shape, False),
+    )
+    result = ov_opset.reshape(
+        result, ov_opset.multiply(shape1, shape2), False
+    ).output(0)
+    return OpenVINOKerasTensor(result)
 def lcm(x1, x2):
@@ -1552,9 +1779,42 @@ def logaddexp(x1, x2):
 def logaddexp2(x1, x2):
-    raise NotImplementedError(
-        "`logaddexp2` is not supported with openvino backend"
+    element_type = None
+    if isinstance(x1, OpenVINOKerasTensor):
+        element_type = x1.output.get_element_type()
+    if isinstance(x2, OpenVINOKerasTensor):
+        element_type = x2.output.get_element_type()
+    x1 = get_ov_output(x1, element_type)
+    x2 = get_ov_output(x2, element_type)
+    x1, x2 = _align_operand_types(x1, x2, "logaddexp2()")
+    if x1.element_type.is_integral() or x2.element_type.is_integral():
+        float_dtype = OPENVINO_DTYPES[config.floatx()]
+        if x1.get_element_type().is_integral():
+            x1 = ov_opset.convert(x1, float_dtype)
+        if x2.get_element_type().is_integral():
+            x2 = ov_opset.convert(x2, float_dtype)
+    max_val = ov_opset.maximum(x1, x2)
+    sub = ov_opset.subtract(x1, x2)
+    abs_diff = ov_opset.abs(sub)
+    neg_abs_diff = ov_opset.negative(abs_diff)
+    element_type = neg_abs_diff.get_element_type()
+    two = ov_opset.constant(2, dtype=element_type)
+    power_of_2 = ov_opset.power(two, neg_abs_diff)
+    one_plus_power = ov_opset.add(
+        ov_opset.constant(1, dtype=element_type), power_of_2
     )
+    log2_term = ov_opset.divide(ov_opset.log(one_plus_power), ov_opset.log(two))
+    result = ov_opset.add(max_val, log2_term).output(0)
+    return OpenVINOKerasTensor(result)
 def logical_and(x1, x2):
@@ -1829,6 +2089,10 @@ def moveaxis(x, source, destination):
     return OpenVINOKerasTensor(ov_opset.transpose(x, axes_const).output(0))
+def nansum(x, axis=None, keepdims=False):
+    raise NotImplementedError("`nansum` is not supported with openvino backend")
 def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     x = get_ov_output(x)
     dtype = x.get_element_type()
@@ -1979,6 +2243,10 @@ def prod(x, axis=None, keepdims=False, dtype=None):
     return OpenVINOKerasTensor(result)
+def ptp(x, axis=None, keepdims=False):
+    raise NotImplementedError("`ptp` is not supported with openvino backend")
 def quantile(x, q, axis=None, method="linear", keepdims=False):
     raise NotImplementedError(
         "`quantile` is not supported with openvino backend"
@@ -2115,7 +2383,14 @@ def sinh(x):
 def size(x):
-    raise NotImplementedError("`size` is not supported with openvino backend")
+    x = get_ov_output(x)
+    shape_tensor = ov_opset.shape_of(x, output_type=Type.i64)
+    final_size = ov_opset.reduce_prod(
+        shape_tensor,
+        ov_opset.constant([0], Type.i64),
+        keep_dims=False,
+    )
+    return OpenVINOKerasTensor(final_size.output(0))
 def sort(x, axis=-1):
@@ -2257,9 +2532,20 @@ def std(x, axis=None, keepdims=False):
 def swapaxes(x, axis1, axis2):
-    raise NotImplementedError(
-        "`swapaxes` is not supported with openvino backend"
-    )
+    x = get_ov_output(x)
+    x_shape = x.get_partial_shape()
+    if x_shape.rank.is_dynamic:
+        raise ValueError(
+            "`swapaxes` does not support tensors with dynamic rank for the "
+            "OpenVINO backend."
+        )
+    rank = x_shape.rank.get_length()
+    axis1 = canonicalize_axis(axis1, rank)
+    axis2 = canonicalize_axis(axis2, rank)
+    axes = list(range(rank))
+    axes[axis1], axes[axis2] = axes[axis2], axes[axis1]
+    result = ov_opset.transpose(x, ov_opset.constant(axes, Type.i32))
+    return OpenVINOKerasTensor(result.output(0))
 def take(x, indices, axis=None):
@@ -2378,7 +2664,8 @@ def tile(x, repeats):
 def trace(x, offset=0, axis1=0, axis2=1):
-    raise NotImplementedError("`trace` is not supported with openvino backend")
+    x = diagonal(x, offset=offset, axis1=axis1, axis2=axis2)
+    return sum(x, axis=-1)
 def tri(N, M=None, k=0, dtype=None):
@@ -2572,6 +2859,12 @@ def negative(x):
     return OpenVINOKerasTensor(ov_opset.negative(x).output(0))
+def nextafter(x1, x2):
+    raise NotImplementedError(
+        "`nextafter` is not supported with openvino backend"
+    )
 def square(x):
     x = get_ov_output(x)
     x_type = x.get_element_type()
@@ -2905,6 +3198,66 @@ def slogdet(x):
 def argpartition(x, kth, axis=-1):
-    raise NotImplementedError(
-        "`argpartition` is not supported with openvino backend"
+    x = get_ov_output(x)
+    x_shape = x.get_partial_shape()
+    rank = x_shape.rank.get_length()
+    axis = canonicalize_axis(axis, rank)
+    axes = list(range(rank))
+    axes[axis], axes[-1] = axes[-1], axes[axis]
+    x = ov_opset.transpose(x, ov_opset.constant(axes))
+    x_shape_tensor = ov_opset.shape_of(x)
+    n = ov_opset.gather(
+        x_shape_tensor,
+        ov_opset.constant(-1),
+        ov_opset.constant(0),
+    )
+    if isinstance(kth, int) and kth < 0:
+        kth_tensor = ov_opset.add(
+            n,
+            ov_opset.constant(kth, n.get_element_type()),
+        )
+    else:
+        kth_tensor = ov_opset.constant(kth, n.get_element_type())
+    one = ov_opset.constant(1, kth_tensor.get_element_type())
+    k_val = ov_opset.add(kth_tensor, one)
+    bottom_ind = ov_opset.topk(
+        ov_opset.negative(x),
+        k=k_val,
+        axis=-1,
+        mode="max",
+        sort="value",
+    ).output(1)
+    one_hot_mask = ov_opset.one_hot(
+        bottom_ind,
+        n,
+        ov_opset.constant(1),
+        ov_opset.constant(0),
+        axis=-1,
+    )
+    mask = ov_opset.reduce_sum(
+        one_hot_mask,
+        ov_opset.constant([-2]),
+        keep_dims=False,
     )
+    ones = ov_opset.broadcast(
+        ov_opset.constant(1),
+        x_shape_tensor,
+    )
+    proxy = ov_opset.subtract(ones, mask)
+    remaining_k = ov_opset.subtract(n, k_val)
+    top_ind = ov_opset.topk(
+        proxy,
+        k=remaining_k,
+        axis=-1,
+        mode="max",
+        sort="value",
+    ).output(1)
+    result = ov_opset.concat([bottom_ind, top_ind], axis=-1)
+    inv_axes = [0] * rank
+    for i, a in enumerate(axes):
+        inv_axes[a] = i
+    result = ov_opset.transpose(
+        result,
+        ov_opset.constant(inv_axes),
+    ).output(0)
+    return OpenVINOKerasTensor(result)

keras/src/backend/tensorflow/numpy.py CHANGED Viewed

@@ -2125,6 +2125,22 @@ def moveaxis(x, source, destination):
     return tf.transpose(x, perm)
+def nansum(x, axis=None, keepdims=False):
+    x = convert_to_tensor(x)
+    dtype = standardize_dtype(x.dtype)
+    x_clean = tf.where(
+        tf.math.is_nan(cast(x, config.floatx())), tf.zeros((), dtype=dtype), x
+    )
+    if dtype in ("bool", "int8", "int16"):
+        dtype = "int32"
+    elif dtype in ("uint8", "uint16"):
+        dtype = "uint32"
+    x_clean = cast(x_clean, dtype)
+    return tf.reduce_sum(x_clean, axis=axis, keepdims=keepdims)
 def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     x = convert_to_tensor(x)
@@ -2151,7 +2167,7 @@ def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
 def ndim(x):
     x = convert_to_tensor(x)
-    return x.ndim
+    return x.shape.rank
 def nonzero(x):
@@ -2215,6 +2231,13 @@ def prod(x, axis=None, keepdims=False, dtype=None):
     return tf.reduce_prod(x, axis=axis, keepdims=keepdims)
+def ptp(x, axis=None, keepdims=False):
+    x = convert_to_tensor(x)
+    return tf.reduce_max(x, axis=axis, keepdims=keepdims) - tf.reduce_min(
+        x, axis=axis, keepdims=keepdims
+    )
 def _quantile(x, q, axis=None, method="linear", keepdims=False):
     # ref: tfp.stats.percentile
     # float64 is needed here and below, else we get the wrong index if the array
@@ -3017,6 +3040,16 @@ def negative(x):
     return tf.negative(x)
+def nextafter(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    dtype = dtypes.result_type(x1.dtype, x2.dtype, float)
+    x1 = tf.cast(x1, tf.float64)
+    x2 = tf.cast(x2, tf.float64)
+    return tf.cast(tf.math.nextafter(x1, x2), dtype)
 @sparse.elementwise_unary
 def square(x):
     x = convert_to_tensor(x)

keras/src/backend/tensorflow/rnn.py CHANGED Viewed

@@ -539,11 +539,21 @@ def _do_lstm_arguments_support_cudnn(
 def _has_fully_masked_sequence(mask):
-    # Cudnn kernel will error out if the input sequence contains any
-    # fully masked data. We walk around this issue by rerouting the computation
-    # to standard kernel, until the issue on cudnn side has been fixed.  For a
-    # fully masked sequence, it will contain all Falses. To make it easy to
-    # check, we inverse the boolean, check if any of the sequence has all True.
+    """Check if input sequence contains any fully masked data.
+    cuDNN kernel will error out if the input sequence contains any fully masked
+    data. We work around this issue by rerouting the computation to the
+    standard kernel until the issue on the cuDNN side has been fixed. For a
+    fully masked sequence, it will contain all `False` values. To make it easy
+    to check, we invert the boolean and check if any of the sequences has all
+    `True` values.
+    Args:
+        mask: The mask tensor.
+    Returns:
+        A boolean tensor, `True` if the mask contains a fully masked sequence.
+    """
     return tf.reduce_any(
         tf.reduce_all(tf.logical_not(tf.cast(mask, dtype="bool")), axis=1)
     )
@@ -900,8 +910,8 @@ def _cudnn_lstm(
     if tf.sysconfig.get_build_info()["is_rocm_build"]:
         # ROCm MIOpen's weight sequence for LSTM is different from both
-        # canonical and Cudnn format
-        # MIOpen: [i, f, o, c] Cudnn/Canonical: [i, f, c, o]
+        # canonical and cuDNN format
+        # MIOpen: [i, f, o, c] cuDNN/Canonical: [i, f, c, o]
         # i is input gate weights.
         # f is forget gate weights.
         # o is output gate weights.

keras/src/backend/torch/numpy.py CHANGED Viewed

@@ -1272,6 +1272,20 @@ def moveaxis(x, source, destination):
     return torch.moveaxis(x, source=source, destination=destination)
+def nansum(x, axis=None, keepdims=False):
+    if isinstance(x, (list, tuple)):
+        x = stack(x)
+    x = convert_to_tensor(x)
+    dtype = standardize_dtype(x.dtype)
+    if dtype in ("bool", "uint8", "int8", "int16"):
+        dtype = "int32"
+    if axis == () or axis == []:
+        return cast(torch.nan_to_num(x, nan=0), dtype)
+    return cast(torch.nansum(x, dim=axis, keepdim=keepdims), dtype)
 def nan_to_num(x, nan=0.0, posinf=None, neginf=None):
     x = convert_to_tensor(x)
     return torch.nan_to_num(x, nan=nan, posinf=posinf, neginf=neginf)
@@ -1382,6 +1396,18 @@ def prod(x, axis=None, keepdims=False, dtype=None):
     return x
+def ptp(x, axis=None, keepdims=False):
+    x = convert_to_tensor(x)
+    if axis is None:
+        return x.max() - x.min()
+    elif axis == ():
+        return torch.zeros_like(x)
+    else:
+        return torch.amax(x, dim=axis, keepdim=keepdims) - torch.amin(
+            x, dim=axis, keepdim=keepdims
+        )
 def quantile(x, q, axis=None, method="linear", keepdims=False):
     x = convert_to_tensor(x)
     q = convert_to_tensor(q)
@@ -1793,6 +1819,16 @@ def negative(x):
     return torch.negative(x)
+def nextafter(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    dtype = dtypes.result_type(x1.dtype, x2.dtype, float)
+    x1 = cast(x1, torch.float64)
+    x2 = cast(x2, torch.float64)
+    return cast(torch.nextafter(x1, x2), dtype)
 def square(x):
     x = convert_to_tensor(x)
     if standardize_dtype(x.dtype) == "bool":

keras/src/backend/torch/rnn.py CHANGED Viewed

@@ -413,11 +413,21 @@ def _is_sequence_right_padded(mask):
 def _has_fully_masked_sequence(mask):
-    # Cudnn kernel will error out if the input sequence contains any
-    # fully masked data. We walk around this issue by rerouting the computation
-    # to standard kernel, until the issue on cudnn side has been fixed.  For a
-    # fully masked sequence, it will contain all Falses. To make it easy to
-    # check, we inverse the boolean, check if any of the sequence has all True.
+    """Check if input sequence contains any fully masked data.
+    cuDNN kernel will error out if the input sequence contains any fully masked
+    data. We work around this issue by rerouting the computation to the
+    standard kernel until the issue on the cuDNN side has been fixed. For a
+    fully masked sequence, it will contain all `False` values. To make it easy
+    to check, we invert the boolean and check if any of the sequences has all
+    `True` values.
+    Args:
+        mask: The mask tensor.
+    Returns:
+        A boolean tensor, `True` if the mask contains a fully masked sequence.
+    """
     return torch.any(torch.all(~mask, dim=1))
@@ -447,8 +457,8 @@ def _compute_sequence_length_from_mask(mask, batch_first):
     The masking tensor is a 2D boolean tensor with shape [batch, timestep]. For
     any timestep that should be masked, the corresponding field will be False.
     Consider the following example:
-      a = [[True, True, False, False]
-           [True, True, True, False]]
+        a = [[True, True, False, False]
+             [True, True, True, False]]
     It is a (2, 4) tensor, and the corresponding sequence length result should
     be 1D tensor with value [2, 3]. Note that the masking tensor must be right
     padded that could be checked by, e.g., `is_sequence_right_padded()`.
@@ -467,12 +477,19 @@ def _compute_sequence_length_from_mask(mask, batch_first):
 def prepare_lstm_weights(lstm, kernel, recurrent_kernel, bias, device):
-    """Copies kernel and recurrent kernel weights in the Pytorch format
+    """Copies kernel and recurrent kernel weights into the PyTorch format.
     We split the kernel and recurrent kernel weights, create associated
-    torch tensors adapted to be in line with the Cudnn optimization.
-    After we have copied the weights, we ensure the paramters are on
-    the same device and memory layout is optimized for Cudnn.
+    torch tensors adapted to be in line with the cuDNN optimization.
+    After we have copied the weights, we ensure the parameters are on
+    the same device and memory layout is optimized for cuDNN.
+    Args:
+        lstm: The PyTorch LSTM layer to prepare weights for.
+        kernel: The kernel weights tensor.
+        recurrent_kernel: The recurrent kernel weights tensor.
+        bias: The bias tensor.
+        device: The device to place the tensors on.
     """
     lstm = lstm.to(device)

keras-nightly 3.14.0.dev2025122704__py3-none-any.whl → 3.14.0.dev2026012204__py3-none-any.whl

keras-nightly 3.14.0.dev2025122704py3-none-any.whl → 3.14.0.dev2026012204py3-none-any.whl