PyPI - keras-nightly - Versions diffs - 3.12.0.dev2025100503__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl - Mend

keras-nightly 3.12.0.dev2025100503py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

keras/__init__.py +1 -0
keras/_tf_keras/keras/__init__.py +1 -0
keras/_tf_keras/keras/callbacks/__init__.py +3 -0
keras/_tf_keras/keras/distillation/__init__.py +16 -0
keras/_tf_keras/keras/distribution/__init__.py +3 -0
keras/_tf_keras/keras/dtype_policies/__init__.py +3 -0
keras/_tf_keras/keras/layers/__init__.py +21 -0
keras/_tf_keras/keras/ops/__init__.py +13 -0
keras/_tf_keras/keras/ops/image/__init__.py +1 -0
keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +9 -0
keras/_tf_keras/keras/quantizers/__init__.py +13 -0
keras/callbacks/__init__.py +3 -0
keras/distillation/__init__.py +16 -0
keras/distribution/__init__.py +3 -0
keras/dtype_policies/__init__.py +3 -0
keras/layers/__init__.py +21 -0
keras/ops/__init__.py +13 -0
keras/ops/image/__init__.py +1 -0
keras/ops/linalg/__init__.py +1 -0
keras/ops/nn/__init__.py +3 -0
keras/ops/numpy/__init__.py +9 -0
keras/quantizers/__init__.py +13 -0
keras/src/applications/imagenet_utils.py +4 -1
keras/src/backend/common/backend_utils.py +30 -6
keras/src/backend/common/name_scope.py +2 -1
keras/src/backend/common/variables.py +30 -15
keras/src/backend/jax/core.py +92 -3
keras/src/backend/jax/distribution_lib.py +16 -2
keras/src/backend/jax/linalg.py +4 -0
keras/src/backend/jax/nn.py +509 -29
keras/src/backend/jax/numpy.py +59 -8
keras/src/backend/jax/trainer.py +14 -2
keras/src/backend/numpy/linalg.py +4 -0
keras/src/backend/numpy/nn.py +311 -1
keras/src/backend/numpy/numpy.py +65 -2
keras/src/backend/openvino/__init__.py +1 -0
keras/src/backend/openvino/core.py +2 -23
keras/src/backend/openvino/linalg.py +4 -0
keras/src/backend/openvino/nn.py +271 -20
keras/src/backend/openvino/numpy.py +943 -189
keras/src/backend/tensorflow/layer.py +43 -9
keras/src/backend/tensorflow/linalg.py +24 -0
keras/src/backend/tensorflow/nn.py +545 -1
keras/src/backend/tensorflow/numpy.py +250 -50
keras/src/backend/torch/core.py +3 -1
keras/src/backend/torch/linalg.py +4 -0
keras/src/backend/torch/nn.py +125 -0
keras/src/backend/torch/numpy.py +80 -2
keras/src/callbacks/__init__.py +1 -0
keras/src/callbacks/model_checkpoint.py +5 -0
keras/src/callbacks/orbax_checkpoint.py +332 -0
keras/src/callbacks/terminate_on_nan.py +54 -5
keras/src/datasets/cifar10.py +5 -0
keras/src/distillation/__init__.py +1 -0
keras/src/distillation/distillation_loss.py +390 -0
keras/src/distillation/distiller.py +598 -0
keras/src/distribution/distribution_lib.py +14 -0
keras/src/dtype_policies/__init__.py +2 -0
keras/src/dtype_policies/dtype_policy.py +90 -1
keras/src/export/__init__.py +2 -0
keras/src/export/export_utils.py +39 -2
keras/src/export/litert.py +248 -0
keras/src/export/openvino.py +1 -1
keras/src/export/tf2onnx_lib.py +3 -0
keras/src/layers/__init__.py +13 -0
keras/src/layers/activations/softmax.py +9 -4
keras/src/layers/attention/multi_head_attention.py +4 -1
keras/src/layers/core/dense.py +241 -111
keras/src/layers/core/einsum_dense.py +316 -131
keras/src/layers/core/embedding.py +84 -94
keras/src/layers/core/input_layer.py +1 -0
keras/src/layers/core/reversible_embedding.py +399 -0
keras/src/layers/input_spec.py +17 -17
keras/src/layers/layer.py +45 -15
keras/src/layers/merging/dot.py +4 -1
keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
keras/src/layers/preprocessing/discretization.py +6 -5
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/layers/preprocessing/index_lookup.py +19 -1
keras/src/layers/preprocessing/normalization.py +14 -1
keras/src/layers/regularization/dropout.py +43 -1
keras/src/layers/rnn/rnn.py +19 -0
keras/src/losses/loss.py +1 -1
keras/src/losses/losses.py +24 -0
keras/src/metrics/confusion_metrics.py +7 -6
keras/src/models/cloning.py +4 -0
keras/src/models/functional.py +11 -3
keras/src/models/model.py +172 -34
keras/src/ops/image.py +257 -20
keras/src/ops/linalg.py +93 -0
keras/src/ops/nn.py +258 -0
keras/src/ops/numpy.py +569 -36
keras/src/optimizers/muon.py +65 -31
keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
keras/src/quantizers/__init__.py +14 -1
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +2 -8
keras/src/quantizers/gptq_config.py +36 -1
keras/src/quantizers/gptq_core.py +65 -79
keras/src/quantizers/quantization_config.py +246 -0
keras/src/quantizers/quantizers.py +127 -61
keras/src/quantizers/utils.py +23 -0
keras/src/random/seed_generator.py +6 -4
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +26 -0
keras/src/saving/saving_api.py +37 -14
keras/src/saving/saving_lib.py +1 -1
keras/src/testing/__init__.py +1 -0
keras/src/testing/test_case.py +45 -5
keras/src/utils/backend_utils.py +31 -4
keras/src/utils/dataset_utils.py +234 -35
keras/src/utils/file_utils.py +49 -11
keras/src/utils/image_utils.py +14 -2
keras/src/utils/jax_layer.py +244 -55
keras/src/utils/module_utils.py +29 -0
keras/src/utils/progbar.py +10 -2
keras/src/utils/rng_utils.py +9 -1
keras/src/utils/tracking.py +5 -5
keras/src/version.py +1 -1
{keras_nightly-3.12.0.dev2025100503.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
{keras_nightly-3.12.0.dev2025100503.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +136 -115
{keras_nightly-3.12.0.dev2025100503.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
{keras_nightly-3.12.0.dev2025100503.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0

keras/src/dtype_policies/dtype_policy.py CHANGED Viewed

@@ -3,7 +3,7 @@ from keras.src import ops
 from keras.src.api_export import keras_export
 from keras.src.backend.common import global_state
-QUANTIZATION_MODES = ("int8", "float8", "int4", "gptq")
+QUANTIZATION_MODES = ("int8", "float8", "int4", "gptq", "awq")
 @keras_export(
@@ -376,6 +376,93 @@ class GPTQDTypePolicy(QuantizedDTypePolicy):
         return config
+@keras_export("keras.dtype_policies.AWQDTypePolicy")
+class AWQDTypePolicy(QuantizedDTypePolicy):
+    """Quantized dtype policy for AWQ quantization.
+    This policy helps propagate quantization settings for AWQ
+    when loading an AWQ quantized model in Keras format.
+    Args:
+        mode: The quantization mode. This should be a string in the format
+            `"awq/<weight_bits>/<group_size>"`.
+            -   `"awq"`: The identifier for the quantization algorithm.
+            -   `<weight_bits>`: Number of bits to quantize weights to.
+                AWQ presently only supports 4-bit quantization.
+            -   `<group_size>`: The group size for quantization. Supported
+                values are -1 (for per-channel quantization) or any
+                positive integer.
+            Example: `"awq/4/128"`.
+        source_name: The source dtype policy name, e.g. "float32".
+    """
+    def __init__(
+        self,
+        mode,
+        source_name=None,
+    ):
+        parts = mode.split("/")
+        expected_format = "'awq/<weight_bits>/<group_size>'"
+        # Validate format.
+        if len(parts) != 3 or parts[0] != "awq":
+            raise ValueError(
+                "Invalid mode for AWQDTypePolicy. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # Validate and cast weight_bits and group_size.
+        try:
+            weight_bits = int(parts[1])
+            group_size = int(parts[2])
+        except ValueError:
+            raise ValueError(
+                "Invalid mode for AWQDTypePolicy. <weight_bits> and "
+                "<group_size> must be integers. Expected format "
+                f"{expected_format}, but got '{mode}'."
+            )
+        # AWQ presently only supports 4-bit quantization.
+        if weight_bits != 4:
+            raise ValueError(
+                "Invalid weight_bits in mode. AWQ only supports 4-bit "
+                f"quantization, but got {weight_bits} from '{mode}'."
+            )
+        if group_size < -1 or group_size == 0:
+            raise ValueError(
+                "Invalid group_size in mode. Supported values are "
+                "-1 (per-channel) or a positive integer, "
+                f"but got {group_size} from '{mode}'."
+            )
+        base_mode = parts[0]
+        super().__init__(
+            mode=base_mode,
+            source_name=source_name,
+        )
+        self._name = f"{mode}_from_{source_name}"
+        self.mode = base_mode
+        self.weight_bits = weight_bits
+        self.group_size = group_size
+    def __eq__(self, other):
+        if super().__eq__(other) is False:
+            return False
+        return (
+            self.weight_bits == other.weight_bits
+            and self.group_size == other.group_size
+        )
+    def get_config(self):
+        config = super().get_config()
+        # Reconstruct the full mode string for serialization
+        mode = f"{self.mode}/{self.weight_bits}/{self.group_size}"
+        config.update({"mode": mode})
+        return config
 @keras_export(
     [
         "keras.config.set_dtype_policy",
@@ -442,6 +529,8 @@ def _get_quantized_dtype_policy_by_str(policy):
         return QuantizedDTypePolicy(mode, source_name)
     elif policy.startswith("gptq"):
         return GPTQDTypePolicy(mode, source_name)
+    elif policy.startswith("awq"):
+        return AWQDTypePolicy(mode, source_name)
     elif policy.startswith("float8"):
         return QuantizedFloat8DTypePolicy(mode, source_name)
     else:

keras/src/export/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from keras.src.export.litert import LiteRTExporter
+from keras.src.export.litert import export_litert
 from keras.src.export.onnx import export_onnx
 from keras.src.export.openvino import export_openvino
 from keras.src.export.saved_model import ExportArchive

keras/src/export/export_utils.py CHANGED Viewed

@@ -7,6 +7,14 @@ from keras.src.utils.module_utils import tensorflow as tf
 def get_input_signature(model):
+    """Get input signature for model export.
+    Args:
+        model: A Keras Model instance.
+    Returns:
+        Input signature suitable for model export (always a tuple or list).
+    """
     if not isinstance(model, models.Model):
         raise TypeError(
             "The model must be a `keras.Model`. "
@@ -17,13 +25,20 @@ def get_input_signature(model):
             "The model provided has not yet been built. It must be built "
             "before export."
         )
     if isinstance(model, models.Functional):
+        # Functional models expect a single positional argument `inputs`
+        # containing the full nested input structure. We keep the
+        # original behavior of returning a single-element list that
+        # wraps the mapped structure so that downstream exporters
+        # build a tf.function with one positional argument.
         input_signature = [
             tree.map_structure(make_input_spec, model._inputs_struct)
         ]
     elif isinstance(model, models.Sequential):
         input_signature = tree.map_structure(make_input_spec, model.inputs)
     else:
+        # Subclassed models: rely on recorded shapes from the first call.
         input_signature = _infer_input_signature_from_model(model)
         if not input_signature or not model._called:
             raise ValueError(
@@ -60,6 +75,7 @@ def _infer_input_signature_from_model(model):
                 f"Unsupported type {type(structure)} for {structure}"
             )
+    # Always return a flat list preserving the order of shapes_dict values
     return [_make_input_spec(value) for value in shapes_dict.values()]
@@ -86,13 +102,34 @@ def make_input_spec(x):
     return input_spec
-def make_tf_tensor_spec(x):
+def make_tf_tensor_spec(x, dynamic_batch=False):
+    """Create a TensorSpec from various input types.
+    Args:
+        x: Input to convert (tf.TensorSpec, KerasTensor, or backend tensor).
+        dynamic_batch: If True, set the batch dimension to None.
+    Returns:
+        A tf.TensorSpec instance.
+    """
     if isinstance(x, tf.TensorSpec):
         tensor_spec = x
+        # Adjust batch dimension if needed
+        if dynamic_batch and len(tensor_spec.shape) > 0:
+            shape = tuple(
+                None if i == 0 else s for i, s in enumerate(tensor_spec.shape)
+            )
+            tensor_spec = tf.TensorSpec(
+                shape, dtype=tensor_spec.dtype, name=tensor_spec.name
+            )
     else:
         input_spec = make_input_spec(x)
+        shape = input_spec.shape
+        # Adjust batch dimension if needed and shape is not None
+        if dynamic_batch and shape is not None and len(shape) > 0:
+            shape = tuple(None if i == 0 else s for i, s in enumerate(shape))
         tensor_spec = tf.TensorSpec(
-            input_spec.shape, dtype=input_spec.dtype, name=input_spec.name
+            shape, dtype=input_spec.dtype, name=input_spec.name
         )
     return tensor_spec

keras/src/export/litert.py ADDED Viewed

@@ -0,0 +1,248 @@
+from keras.src import layers
+from keras.src import models
+from keras.src import tree
+from keras.src.export.export_utils import get_input_signature
+from keras.src.utils import io_utils
+from keras.src.utils.module_utils import tensorflow as tf
+def export_litert(
+    model,
+    filepath,
+    input_signature=None,
+    **kwargs,
+):
+    """Export the model as a LiteRT artifact for inference.
+    Args:
+        model: The Keras model to export.
+        filepath: The path to save the exported artifact.
+        input_signature: Optional input signature specification. If
+            `None`, it will be inferred.
+        **kwargs: Additional keyword arguments passed to the exporter.
+    """
+    exporter = LiteRTExporter(
+        model=model,
+        input_signature=input_signature,
+        **kwargs,
+    )
+    exporter.export(filepath)
+    io_utils.print_msg(f"Saved artifact at '{filepath}'.")
+class LiteRTExporter:
+    """Exporter for the LiteRT (TFLite) format.
+    This class handles the conversion of Keras models for LiteRT runtime and
+    generates a `.tflite` model file. For efficient inference on mobile and
+    embedded devices, it creates a single callable signature based on the
+    model's `call()` method.
+    """
+    def __init__(
+        self,
+        model,
+        input_signature=None,
+        **kwargs,
+    ):
+        """Initialize the LiteRT exporter.
+        Args:
+            model: The Keras model to export
+            input_signature: Input signature specification (e.g., TensorFlow
+                TensorSpec or list of TensorSpec)
+            **kwargs: Additional export parameters
+        """
+        self.model = model
+        self.input_signature = input_signature
+        self.kwargs = kwargs
+    def export(self, filepath):
+        """Exports the Keras model to a TFLite file.
+        Args:
+            filepath: Output path for the exported model
+        Returns:
+            Path to exported model
+        """
+        # 1. Resolve / infer input signature
+        if self.input_signature is None:
+            # Use the standard get_input_signature which handles all model types
+            # and preserves nested structures (dicts, lists, etc.)
+            self.input_signature = get_input_signature(self.model)
+        # 2. Determine input structure and create adapter if needed
+        # There are 3 cases:
+        # Case 1: Single input (not nested)
+        # Case 2: Flat list of inputs (list where flattened == original)
+        # Case 3: Nested structure (dicts, nested lists, etc.)
+        # Special handling for Functional models: get_input_signature wraps
+        # the structure in a list, so unwrap it for analysis
+        input_struct = self.input_signature
+        if (
+            isinstance(self.input_signature, list)
+            and len(self.input_signature) == 1
+        ):
+            input_struct = self.input_signature[0]
+        if not tree.is_nested(input_struct):
+            # Case 1: Single input - use as-is
+            model_to_convert = self.model
+            signature_for_conversion = self.input_signature
+        elif isinstance(input_struct, list) and len(input_struct) == len(
+            tree.flatten(input_struct)
+        ):
+            # Case 2: Flat list of inputs - use as-is
+            model_to_convert = self.model
+            signature_for_conversion = self.input_signature
+        else:
+            # Case 3: Nested structure (dict, nested lists, etc.)
+            # Create adapter model that converts flat list to nested structure
+            adapted_model = self._create_nested_inputs_adapter(input_struct)
+            # Flatten signature for TFLite conversion
+            signature_for_conversion = tree.flatten(input_struct)
+            # Use adapted model and flat list signature for conversion
+            model_to_convert = adapted_model
+        # Store original model reference for later use
+        original_model = self.model
+        # Temporarily replace self.model with the model to convert
+        self.model = model_to_convert
+        try:
+            # Convert the model to TFLite.
+            tflite_model = self._convert_to_tflite(signature_for_conversion)
+        finally:
+            # Restore original model
+            self.model = original_model
+        # Save the TFLite model to the specified file path.
+        if not filepath.endswith(".tflite"):
+            raise ValueError(
+                f"The LiteRT export requires the filepath to end with "
+                f"'.tflite'. Got: {filepath}"
+            )
+        with open(filepath, "wb") as f:
+            f.write(tflite_model)
+        return filepath
+    def _create_nested_inputs_adapter(self, input_signature_struct):
+        """Create an adapter model that converts flat list inputs to nested
+        structure.
+        This adapter allows models expecting nested inputs (dicts, lists, etc.)
+        to be exported to TFLite format (which only supports positional/list
+        inputs).
+        Args:
+            input_signature_struct: Nested structure of InputSpecs (dict, list,
+                etc.)
+        Returns:
+            A Functional model that accepts flat list inputs and converts to
+            nested
+        """
+        # Get flat paths to preserve names and print input mapping
+        paths_and_specs = tree.flatten_with_path(input_signature_struct)
+        paths = [".".join(str(e) for e in p) for p, v in paths_and_specs]
+        io_utils.print_msg(f"Creating adapter for inputs: {paths}")
+        # Create Input layers for TFLite (flat list-based)
+        input_layers = []
+        for path, spec in paths_and_specs:
+            # Extract the input name from spec or path
+            name = (
+                spec.name
+                if hasattr(spec, "name") and spec.name
+                else (str(path[-1]) if path else "input")
+            )
+            input_layer = layers.Input(
+                shape=spec.shape[1:],  # Remove batch dimension
+                dtype=spec.dtype,
+                name=name,
+            )
+            input_layers.append(input_layer)
+        # Reconstruct the nested structure from flat list
+        inputs_structure = tree.pack_sequence_as(
+            input_signature_struct, input_layers
+        )
+        # Call the original model with nested inputs
+        outputs = self.model(inputs_structure)
+        # Build as Functional model (flat list inputs -> nested -> model ->
+        # output)
+        adapted_model = models.Model(inputs=input_layers, outputs=outputs)
+        # Preserve the original model's variables
+        adapted_model._variables = self.model.variables
+        adapted_model._trainable_variables = self.model.trainable_variables
+        adapted_model._non_trainable_variables = (
+            self.model.non_trainable_variables
+        )
+        return adapted_model
+    def _convert_to_tflite(self, input_signature):
+        """Converts the Keras model to TFLite format.
+        Returns:
+            A bytes object containing the serialized TFLite model.
+        """
+        # Try direct conversion first for all models
+        try:
+            converter = tf.lite.TFLiteConverter.from_keras_model(self.model)
+            converter.target_spec.supported_ops = [
+                tf.lite.OpsSet.TFLITE_BUILTINS,
+                tf.lite.OpsSet.SELECT_TF_OPS,
+            ]
+            # Keras 3 only supports resource variables
+            converter.experimental_enable_resource_variables = True
+            # Apply any additional converter settings from kwargs
+            self._apply_converter_kwargs(converter)
+            tflite_model = converter.convert()
+            return tflite_model
+        except Exception as e:
+            # If direct conversion fails, raise the error with helpful message
+            raise RuntimeError(
+                f"Direct TFLite conversion failed. This may be due to model "
+                f"complexity or unsupported operations. Error: {e}"
+            ) from e
+    def _apply_converter_kwargs(self, converter):
+        """Apply additional converter settings from kwargs.
+        Args:
+            converter: tf.lite.TFLiteConverter instance to configure
+        Raises:
+            ValueError: If any kwarg is not a valid converter attribute
+        """
+        for attr, value in self.kwargs.items():
+            if attr == "target_spec" and isinstance(value, dict):
+                # Handle nested target_spec settings
+                for spec_key, spec_value in value.items():
+                    if hasattr(converter.target_spec, spec_key):
+                        setattr(converter.target_spec, spec_key, spec_value)
+                    else:
+                        raise ValueError(
+                            f"Unknown target_spec attribute '{spec_key}'"
+                        )
+            elif hasattr(converter, attr):
+                setattr(converter, attr, value)
+            else:
+                raise ValueError(f"Unknown converter attribute '{attr}'")

keras/src/export/openvino.py CHANGED Viewed

@@ -55,7 +55,7 @@ def export_openvino(
     )
     import openvino as ov
-    from openvino.runtime import opset14 as ov_opset
+    import openvino.opset14 as ov_opset
     from keras.src.backend.openvino.core import OPENVINO_DTYPES
     from keras.src.backend.openvino.core import OpenVINOKerasTensor

keras/src/export/tf2onnx_lib.py CHANGED Viewed

@@ -17,6 +17,9 @@ def patch_tf2onnx():
     logger = logging.getLogger(tf2onnx.__name__)
+    if not hasattr(np, "object"):
+        np.object = object
     def patched_rewrite_constant_fold(g, ops):
         """
         We call tensorflow transform with constant folding but in some cases

keras/src/layers/__init__.py CHANGED Viewed

@@ -29,6 +29,7 @@ from keras.src.layers.core.input_layer import Input
 from keras.src.layers.core.input_layer import InputLayer
 from keras.src.layers.core.lambda_layer import Lambda
 from keras.src.layers.core.masking import Masking
+from keras.src.layers.core.reversible_embedding import ReversibleEmbedding
 from keras.src.layers.core.wrapper import Wrapper
 from keras.src.layers.input_spec import InputSpec
 from keras.src.layers.layer import Layer
@@ -62,6 +63,18 @@ from keras.src.layers.normalization.spectral_normalization import (
     SpectralNormalization,
 )
 from keras.src.layers.normalization.unit_normalization import UnitNormalization
+from keras.src.layers.pooling.adaptive_average_pooling1d import (
+    AdaptiveAveragePooling1D,
+)
+from keras.src.layers.pooling.adaptive_average_pooling2d import (
+    AdaptiveAveragePooling2D,
+)
+from keras.src.layers.pooling.adaptive_average_pooling3d import (
+    AdaptiveAveragePooling3D,
+)
+from keras.src.layers.pooling.adaptive_max_pooling1d import AdaptiveMaxPooling1D
+from keras.src.layers.pooling.adaptive_max_pooling2d import AdaptiveMaxPooling2D
+from keras.src.layers.pooling.adaptive_max_pooling3d import AdaptiveMaxPooling3D
 from keras.src.layers.pooling.average_pooling1d import AveragePooling1D
 from keras.src.layers.pooling.average_pooling2d import AveragePooling2D
 from keras.src.layers.pooling.average_pooling3d import AveragePooling3D

keras/src/layers/activations/softmax.py CHANGED Viewed

@@ -52,10 +52,15 @@ class Softmax(Layer):
     def call(self, inputs, mask=None):
         if mask is not None:
-            adder = (
-                1.0 - backend.cast(mask, inputs.dtype)
-            ) * _large_negative_number(inputs.dtype)
-            inputs += adder
+            # We keep the positions where the mask is True or > 0.5, and set the
+            # other (masked) positions to -1e.9.
+            if backend.standardize_dtype(mask.dtype) != "bool":
+                mask = backend.numpy.greater(
+                    mask, backend.cast(0.5, dtype=mask.dtype)
+                )
+            inputs = backend.numpy.where(
+                mask, inputs, _large_negative_number(inputs.dtype)
+            )
         if isinstance(self.axis, (tuple, list)):
             if len(self.axis) > 1:
                 outputs = backend.numpy.exp(

keras/src/layers/attention/multi_head_attention.py CHANGED Viewed

@@ -378,7 +378,10 @@ class MultiHeadAttention(Layer):
         if self._attention_axes is None:
             self._attention_axes = tuple(range(1, rank - 2))
         else:
-            self._attention_axes = tuple(self._attention_axes)
+            self._attention_axes = tuple(
+                axis if axis >= 0 else (rank - 1) + axis
+                for axis in self._attention_axes
+            )
         (
             self._dot_product_equation,
             self._combine_equation,

keras-nightly 3.12.0.dev2025100503__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl

keras-nightly 3.12.0.dev2025100503py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl