PyPI - keras-nightly - Versions diffs - 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl - Mend

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

keras/__init__.py +1 -0
keras/_tf_keras/keras/__init__.py +1 -0
keras/_tf_keras/keras/callbacks/__init__.py +3 -0
keras/_tf_keras/keras/distillation/__init__.py +16 -0
keras/_tf_keras/keras/distribution/__init__.py +3 -0
keras/_tf_keras/keras/dtype_policies/__init__.py +6 -0
keras/_tf_keras/keras/layers/__init__.py +21 -0
keras/_tf_keras/keras/ops/__init__.py +16 -0
keras/_tf_keras/keras/ops/image/__init__.py +1 -0
keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
keras/_tf_keras/keras/ops/numpy/__init__.py +12 -0
keras/_tf_keras/keras/quantizers/__init__.py +13 -0
keras/callbacks/__init__.py +3 -0
keras/distillation/__init__.py +16 -0
keras/distribution/__init__.py +3 -0
keras/dtype_policies/__init__.py +6 -0
keras/layers/__init__.py +21 -0
keras/ops/__init__.py +16 -0
keras/ops/image/__init__.py +1 -0
keras/ops/linalg/__init__.py +1 -0
keras/ops/nn/__init__.py +3 -0
keras/ops/numpy/__init__.py +12 -0
keras/quantizers/__init__.py +13 -0
keras/src/applications/imagenet_utils.py +4 -1
keras/src/backend/common/backend_utils.py +30 -6
keras/src/backend/common/dtypes.py +6 -12
keras/src/backend/common/name_scope.py +2 -1
keras/src/backend/common/variables.py +38 -20
keras/src/backend/jax/core.py +126 -78
keras/src/backend/jax/distribution_lib.py +16 -2
keras/src/backend/jax/layer.py +3 -1
keras/src/backend/jax/linalg.py +4 -0
keras/src/backend/jax/nn.py +511 -29
keras/src/backend/jax/numpy.py +109 -23
keras/src/backend/jax/optimizer.py +3 -2
keras/src/backend/jax/trainer.py +18 -3
keras/src/backend/numpy/linalg.py +4 -0
keras/src/backend/numpy/nn.py +313 -2
keras/src/backend/numpy/numpy.py +97 -8
keras/src/backend/openvino/__init__.py +1 -0
keras/src/backend/openvino/core.py +6 -23
keras/src/backend/openvino/linalg.py +4 -0
keras/src/backend/openvino/nn.py +271 -20
keras/src/backend/openvino/numpy.py +1369 -195
keras/src/backend/openvino/random.py +7 -14
keras/src/backend/tensorflow/layer.py +43 -9
keras/src/backend/tensorflow/linalg.py +24 -0
keras/src/backend/tensorflow/nn.py +545 -1
keras/src/backend/tensorflow/numpy.py +351 -56
keras/src/backend/tensorflow/trainer.py +6 -2
keras/src/backend/torch/core.py +3 -1
keras/src/backend/torch/linalg.py +4 -0
keras/src/backend/torch/nn.py +125 -0
keras/src/backend/torch/numpy.py +109 -9
keras/src/backend/torch/trainer.py +8 -2
keras/src/callbacks/__init__.py +1 -0
keras/src/callbacks/callback_list.py +45 -11
keras/src/callbacks/model_checkpoint.py +5 -0
keras/src/callbacks/orbax_checkpoint.py +332 -0
keras/src/callbacks/terminate_on_nan.py +54 -5
keras/src/datasets/cifar10.py +5 -0
keras/src/distillation/__init__.py +1 -0
keras/src/distillation/distillation_loss.py +390 -0
keras/src/distillation/distiller.py +598 -0
keras/src/distribution/distribution_lib.py +14 -0
keras/src/dtype_policies/__init__.py +4 -0
keras/src/dtype_policies/dtype_policy.py +180 -1
keras/src/export/__init__.py +2 -0
keras/src/export/export_utils.py +39 -2
keras/src/export/litert.py +248 -0
keras/src/export/onnx.py +6 -0
keras/src/export/openvino.py +1 -1
keras/src/export/tf2onnx_lib.py +3 -0
keras/src/layers/__init__.py +13 -0
keras/src/layers/activations/softmax.py +9 -4
keras/src/layers/attention/attention.py +1 -1
keras/src/layers/attention/multi_head_attention.py +4 -1
keras/src/layers/core/dense.py +406 -102
keras/src/layers/core/einsum_dense.py +521 -116
keras/src/layers/core/embedding.py +257 -99
keras/src/layers/core/input_layer.py +1 -0
keras/src/layers/core/reversible_embedding.py +399 -0
keras/src/layers/input_spec.py +17 -17
keras/src/layers/layer.py +50 -15
keras/src/layers/merging/concatenate.py +6 -5
keras/src/layers/merging/dot.py +4 -1
keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
keras/src/layers/preprocessing/discretization.py +6 -5
keras/src/layers/preprocessing/feature_space.py +8 -4
keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py +5 -5
keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
keras/src/layers/preprocessing/index_lookup.py +19 -1
keras/src/layers/preprocessing/normalization.py +16 -1
keras/src/layers/preprocessing/string_lookup.py +26 -28
keras/src/layers/regularization/dropout.py +43 -1
keras/src/layers/rnn/gru.py +1 -1
keras/src/layers/rnn/lstm.py +2 -2
keras/src/layers/rnn/rnn.py +19 -0
keras/src/layers/rnn/simple_rnn.py +1 -1
keras/src/legacy/preprocessing/image.py +4 -1
keras/src/legacy/preprocessing/sequence.py +20 -12
keras/src/losses/loss.py +1 -1
keras/src/losses/losses.py +24 -0
keras/src/metrics/confusion_metrics.py +7 -6
keras/src/models/cloning.py +4 -0
keras/src/models/functional.py +11 -3
keras/src/models/model.py +195 -44
keras/src/ops/image.py +257 -20
keras/src/ops/linalg.py +93 -0
keras/src/ops/nn.py +268 -2
keras/src/ops/numpy.py +701 -44
keras/src/ops/operation.py +90 -29
keras/src/ops/operation_utils.py +2 -0
keras/src/optimizers/adafactor.py +29 -10
keras/src/optimizers/base_optimizer.py +22 -3
keras/src/optimizers/loss_scale_optimizer.py +51 -18
keras/src/optimizers/muon.py +65 -31
keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
keras/src/quantizers/__init__.py +14 -1
keras/src/quantizers/awq.py +361 -0
keras/src/quantizers/awq_config.py +140 -0
keras/src/quantizers/awq_core.py +217 -0
keras/src/quantizers/gptq.py +346 -207
keras/src/quantizers/gptq_config.py +63 -13
keras/src/quantizers/gptq_core.py +328 -215
keras/src/quantizers/quantization_config.py +246 -0
keras/src/quantizers/quantizers.py +407 -38
keras/src/quantizers/utils.py +23 -0
keras/src/random/seed_generator.py +6 -4
keras/src/saving/file_editor.py +81 -6
keras/src/saving/orbax_util.py +26 -0
keras/src/saving/saving_api.py +37 -14
keras/src/saving/saving_lib.py +1 -1
keras/src/testing/__init__.py +1 -0
keras/src/testing/test_case.py +45 -5
keras/src/trainers/compile_utils.py +38 -17
keras/src/trainers/data_adapters/grain_dataset_adapter.py +1 -5
keras/src/tree/torchtree_impl.py +215 -0
keras/src/tree/tree_api.py +6 -1
keras/src/utils/backend_utils.py +31 -4
keras/src/utils/dataset_utils.py +234 -35
keras/src/utils/file_utils.py +49 -11
keras/src/utils/image_utils.py +14 -2
keras/src/utils/jax_layer.py +244 -55
keras/src/utils/module_utils.py +29 -0
keras/src/utils/progbar.py +10 -12
keras/src/utils/python_utils.py +5 -0
keras/src/utils/rng_utils.py +9 -1
keras/src/utils/tracking.py +70 -5
keras/src/version.py +1 -1
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +163 -142
keras/src/quantizers/gptq_quant.py +0 -133
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
{keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0

keras/src/backend/torch/nn.py CHANGED Viewed

@@ -458,6 +458,94 @@ def average_pool(
     return outputs
+def adaptive_average_pool(inputs, output_size, data_format=None):
+    """Adaptive average pooling(1D/2D/3D) with channels_last support."""
+    inputs = convert_to_tensor(inputs)
+    num_spatial_dims = inputs.ndim - 2
+    data_format = backend.standardize_data_format(data_format)
+    orig_format = data_format
+    if data_format == "channels_last":
+        inputs = _transpose_spatial_inputs(inputs)
+    if isinstance(output_size, int):
+        torch_output_size = (
+            output_size
+            if num_spatial_dims == 1
+            else (output_size,) * num_spatial_dims
+        )
+    else:
+        torch_output_size = standardize_tuple(
+            output_size, num_spatial_dims, "output_size"
+        )
+    if get_device() == "meta":
+        inputs = torch.empty(
+            size=inputs.shape, dtype=inputs.dtype, device="cpu"
+        )
+    if num_spatial_dims == 1:
+        outputs = tnn.adaptive_avg_pool1d(inputs, output_size=torch_output_size)
+    elif num_spatial_dims == 2:
+        outputs = tnn.adaptive_avg_pool2d(inputs, output_size=torch_output_size)
+    elif num_spatial_dims == 3:
+        outputs = tnn.adaptive_avg_pool3d(inputs, output_size=torch_output_size)
+    else:
+        raise ValueError(
+            "Inputs to adaptive average pooling must have ndim=3, 4 or 5, "
+            f"Received input shape: {inputs.shape}."
+        )
+    if orig_format == "channels_last":
+        outputs = _transpose_spatial_outputs(outputs)
+    return outputs
+def adaptive_max_pool(inputs, output_size, data_format=None):
+    """Adaptive max pooling(1D/2D/3D) with channels_last support."""
+    inputs = convert_to_tensor(inputs)
+    num_spatial_dims = inputs.ndim - 2
+    data_format = backend.standardize_data_format(data_format)
+    orig_format = data_format
+    if data_format == "channels_last":
+        inputs = _transpose_spatial_inputs(inputs)
+    if isinstance(output_size, int):
+        torch_output_size = (
+            output_size
+            if num_spatial_dims == 1
+            else (output_size,) * num_spatial_dims
+        )
+    else:
+        torch_output_size = standardize_tuple(
+            output_size, num_spatial_dims, "output_size"
+        )
+    if get_device() == "meta":
+        inputs = torch.empty(
+            size=inputs.shape, dtype=inputs.dtype, device="cpu"
+        )
+    if num_spatial_dims == 1:
+        res = tnn.adaptive_max_pool1d(inputs, output_size=torch_output_size)
+    elif num_spatial_dims == 2:
+        res = tnn.adaptive_max_pool2d(inputs, output_size=torch_output_size)
+    elif num_spatial_dims == 3:
+        res = tnn.adaptive_max_pool3d(inputs, output_size=torch_output_size)
+    else:
+        raise ValueError(
+            "Inputs to adaptive max pooling must have ndim=3, 4 or 5, "
+            f"Received input shape: {inputs.shape}."
+        )
+    outputs = res[0] if isinstance(res, tuple) else res
+    if orig_format == "channels_last":
+        outputs = _transpose_spatial_outputs(outputs)
+    return outputs
 def conv(
     inputs,
     kernel,
@@ -755,12 +843,26 @@ def binary_crossentropy(target, output, from_logits=False):
     target = convert_to_tensor(target)
     output = convert_to_tensor(output)
+    # We only apply the squeeze fix if we are on an MPS device,
+    # as this change breaks tests on other platforms that
+    # expect the original tensor shape to be preserved.
+    if (
+        torch.backends.mps.is_available()
+        and target.ndim > 1
+        and output.ndim == target.ndim
+        and target.shape[-1] == 1
+        and output.shape[-1] == 1
+    ):
+        target = torch.squeeze(target, -1).contiguous()
+        output = torch.squeeze(output, -1).contiguous()
     if target.shape != output.shape:
         raise ValueError(
             "Arguments `target` and `output` must have the same shape. "
             "Received: "
             f"target.shape={target.shape}, output.shape={output.shape}"
         )
     # By default, PyTorch, does reduction of `sum` over all rows,
     # change reduction to `none` to keep dim
     if from_logits:
@@ -1092,3 +1194,26 @@ def dot_product_attention(
             scale=scale,
         )
     return torch.transpose(attention_output, axis1, axis0)
+def unfold(input, kernel_size, dilation=1, padding=0, stride=1):
+    """Native PyTorch implementation of Unfold.
+    Extract sliding local blocks from a **NCHW** batched image tensor.
+    Args:
+        input: 4-D tensor, shape (N, C, H, W)  **required**.
+        kernel_size: int or (kH, kW)
+        dilation: int or (dH, dW), default 1
+        padding: int or (pH, pW), default 0
+        stride: int or (sH, sW), default 1
+    Returns:
+        3-D tensor, shape (N, C*kH*kW, L)
+    """
+    return tnn.unfold(
+        input,
+        kernel_size=kernel_size,
+        dilation=dilation,
+        padding=padding,
+        stride=stride,
+    )

keras/src/backend/torch/numpy.py CHANGED Viewed

@@ -313,18 +313,19 @@ def append(x1, x2, axis=None):
     return torch.cat((x1, x2), dim=axis)
-def arange(start, stop=None, step=1, dtype=None):
+def arange(start, stop=None, step=None, dtype=None):
     if dtype is None:
-        dtypes_to_resolve = [
-            getattr(start, "dtype", type(start)),
-            getattr(step, "dtype", type(step)),
-        ]
+        dtypes_to_resolve = [getattr(start, "dtype", type(start))]
         if stop is not None:
             dtypes_to_resolve.append(getattr(stop, "dtype", type(stop)))
+        if step is not None:
+            dtypes_to_resolve.append(getattr(step, "dtype", type(step)))
         dtype = dtypes.result_type(*dtypes_to_resolve)
     dtype = to_torch_dtype(dtype)
     if stop is None:
-        return torch.arange(end=start, dtype=dtype, device=get_device())
+        start, stop = 0, start
+    if step is None:
+        step = 1
     return torch.arange(
         start, stop, step=step, dtype=dtype, device=get_device()
     )
@@ -410,6 +411,12 @@ def array(x, dtype=None):
     return convert_to_tensor(x, dtype=dtype)
+def view(x, dtype=None):
+    dtype = to_torch_dtype(dtype)
+    x = convert_to_tensor(x)
+    return x.view(dtype=dtype)
 def average(x, axis=None, weights=None):
     x = convert_to_tensor(x)
     dtypes_to_resolve = [x.dtype, float]
@@ -763,6 +770,12 @@ def empty(shape, dtype=None):
     return torch.empty(size=shape, dtype=dtype, device=get_device())
+def empty_like(x, dtype=None):
+    x = convert_to_tensor(x)
+    dtype = to_torch_dtype(dtype or x.dtype)
+    return torch.empty_like(x, dtype=dtype, device=get_device())
 def equal(x1, x2):
     x1, x2 = convert_to_tensor(x1), convert_to_tensor(x2)
     return torch.eq(x1, x2)
@@ -945,6 +958,37 @@ def isposinf(x):
     return torch.isposinf(x)
+def isreal(x):
+    x = convert_to_tensor(x)
+    return torch.isreal(x)
+def kron(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    return torch.kron(x1, x2)
+def lcm(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    return torch.lcm(x1, x2)
+def ldexp(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    dtype = dtypes.result_type(x1.dtype, x2.dtype, float)
+    if standardize_dtype(x2.dtype) not in dtypes.INT_TYPES:
+        raise TypeError(
+            f"ldexp exponent must be an integer type. "
+            f"Received: x2 dtype={x2.dtype}"
+        )
+    return cast(torch.ldexp(x1, x2), dtype)
 def less(x1, x2):
     x1, x2 = convert_to_tensor(x1), convert_to_tensor(x2)
     return torch.less(x1, x2)
@@ -1041,6 +1085,15 @@ def logaddexp(x1, x2):
         return torch.logaddexp(x1, x2)
+def logaddexp2(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    dtype = dtypes.result_type(x1.dtype, x2.dtype, float)
+    x1 = cast(x1, dtype)
+    x2 = cast(x2, dtype)
+    return torch.logaddexp2(x1, x2)
 def logical_and(x1, x2):
     x1, x2 = convert_to_tensor(x1), convert_to_tensor(x2)
     return torch.logical_and(x1, x2)
@@ -1329,6 +1382,18 @@ def prod(x, axis=None, keepdims=False, dtype=None):
     return x
+def ptp(x, axis=None, keepdims=False):
+    x = convert_to_tensor(x)
+    if axis is None:
+        return x.max() - x.min()
+    elif axis == ():
+        return torch.zeros_like(x)
+    else:
+        return torch.amax(x, dim=axis, keepdim=keepdims) - torch.amin(
+            x, dim=axis, keepdim=keepdims
+        )
 def quantile(x, q, axis=None, method="linear", keepdims=False):
     x = convert_to_tensor(x)
     q = convert_to_tensor(q)
@@ -1434,7 +1499,7 @@ def searchsorted(sorted_sequence, values, side="left"):
             "to extend it to N-D sequences. Received: "
             f"sorted_sequence.shape={sorted_sequence.shape}"
         )
-    out_int32 = len(sorted_sequence) <= np.iinfo(np.int32).max
+    out_int32 = sorted_sequence.shape[0] <= np.iinfo(np.int32).max
     return torch.searchsorted(
         sorted_sequence, values, side=side, out_int32=out_int32
     )
@@ -1506,6 +1571,12 @@ def split(x, indices_or_sections, axis=0):
     return list(out)
+def array_split(x, indices_or_sections, axis=0):
+    x = convert_to_tensor(x)
+    out = torch.tensor_split(x, indices_or_sections, dim=axis)
+    return list(out)
 def stack(x, axis=0):
     x = [convert_to_tensor(elem) for elem in x]
     return torch.stack(x, dim=axis)
@@ -1619,8 +1690,9 @@ def tile(x, repeats):
 def trace(x, offset=0, axis1=0, axis2=1):
     x = convert_to_tensor(x)
     dtype = standardize_dtype(x.dtype)
-    if dtype != "int64":
-        dtype = dtypes.result_type(dtype, "int32")
+    if dtype in ("bool", "int8", "int16", "uint8"):
+        # Torch backend doesn't support uint32 dtype.
+        dtype = "int32"
     return torch.sum(
         torch.diagonal(x, offset, axis1, axis2),
         dim=-1,
@@ -1733,6 +1805,16 @@ def negative(x):
     return torch.negative(x)
+def nextafter(x1, x2):
+    x1 = convert_to_tensor(x1)
+    x2 = convert_to_tensor(x2)
+    dtype = dtypes.result_type(x1.dtype, x2.dtype, float)
+    x1 = cast(x1, torch.float64)
+    x2 = cast(x2, torch.float64)
+    return cast(torch.nextafter(x1, x2), dtype)
 def square(x):
     x = convert_to_tensor(x)
     if standardize_dtype(x.dtype) == "bool":
@@ -1761,6 +1843,24 @@ def transpose(x, axes=None):
     return x.T
+def trapezoid(y, x=None, dx=1.0, axis=-1):
+    y = convert_to_tensor(y)
+    if standardize_dtype(y.dtype) == "bool":
+        y = cast(y, config.floatx())
+    if x is not None:
+        x = convert_to_tensor(x)
+        return torch.trapz(y, x=x, dim=axis)
+    else:
+        dx = convert_to_tensor(dx)
+        return torch.trapz(y, dx=dx, dim=axis)
+def vander(x, N=None, increasing=False):
+    x = convert_to_tensor(x)
+    result_dtype = dtypes.result_type(x.dtype)
+    return cast(torch.vander(x, N=N, increasing=increasing), result_dtype)
 def var(x, axis=None, keepdims=False):
     x = convert_to_tensor(x)
     compute_dtype = dtypes.result_type(x.dtype, "float32")

keras/src/backend/torch/trainer.py CHANGED Viewed

@@ -54,7 +54,10 @@ class TorchTrainer(base_trainer.Trainer):
             x=x, y=y, y_pred=y_pred, sample_weight=sample_weight, training=True
         )
         self._loss_tracker.update_state(
-            loss, sample_weight=tree.flatten(x)[0].shape[0]
+            loss,
+            sample_weight=next(
+                i for i in tree.flatten(x) if i is not None
+            ).shape[0],
         )
         if self.optimizer is not None:
             loss = self.optimizer.scale_loss(loss)
@@ -90,7 +93,10 @@ class TorchTrainer(base_trainer.Trainer):
             x=x, y=y, y_pred=y_pred, sample_weight=sample_weight, training=False
         )
         self._loss_tracker.update_state(
-            loss, sample_weight=tree.flatten(x)[0].shape[0]
+            loss,
+            sample_weight=next(
+                i for i in tree.flatten(x) if i is not None
+            ).shape[0],
         )
         return self.compute_metrics(x, y, y_pred, sample_weight=sample_weight)

keras/src/callbacks/__init__.py CHANGED Viewed

@@ -8,6 +8,7 @@ from keras.src.callbacks.lambda_callback import LambdaCallback
 from keras.src.callbacks.learning_rate_scheduler import LearningRateScheduler
 from keras.src.callbacks.model_checkpoint import ModelCheckpoint
 from keras.src.callbacks.monitor_callback import MonitorCallback
+from keras.src.callbacks.orbax_checkpoint import OrbaxCheckpoint
 from keras.src.callbacks.progbar_logger import ProgbarLogger
 from keras.src.callbacks.reduce_lr_on_plateau import ReduceLROnPlateau
 from keras.src.callbacks.remote_monitor import RemoteMonitor

keras/src/callbacks/callback_list.py CHANGED Viewed

@@ -39,6 +39,7 @@ class CallbackList(Callback):
                 via `Callback.set_params`.
         """
         self.callbacks = tree.flatten(callbacks) if callbacks else []
+        self._in_begin_end_block_count = 0
         self._executor = None
         self._async_train = False
         self._async_test = False
@@ -78,9 +79,6 @@ class CallbackList(Callback):
                     if not utils.is_default(cbk.on_predict_batch_end):
                         async_predict = False
-        if async_train or async_test or async_predict:
-            self._executor = concurrent.futures.ThreadPoolExecutor()
         self._async_train = async_train
         self._async_test = async_test
         self._async_predict = async_predict
@@ -113,6 +111,33 @@ class CallbackList(Callback):
         for callback in self.callbacks:
             callback.set_model(model)
+    def _on_begin(self):
+        """Called by `on_train/test/predict_begin`.
+        Start the executor for async calls if needed.
+        """
+        self._in_begin_end_block_count += 1
+        if (
+            self._in_begin_end_block_count == 1
+            and (self._async_train or self._async_test or self._async_predict)
+            and self._executor is None
+        ):
+            self._executor = concurrent.futures.ThreadPoolExecutor()
+    def _on_end(self):
+        """Called by `on_train/test/predict_end`.
+        Shutdown the executor for async calls if all begin/end blocks completed.
+        """
+        self._in_begin_end_block_count -= 1
+        if self._in_begin_end_block_count < 0:
+            raise ValueError(
+                "`on_xxx_end` called without corresponding `on_xxx_begin`"
+            )
+        if self._in_begin_end_block_count == 0 and self._executor is not None:
+            self._executor.shutdown()
+            self._executor = None
     def _async_dispatch(self, fn, *args):
         for future in self._futures:
             if future.done():
@@ -121,7 +146,8 @@ class CallbackList(Callback):
         future = self._executor.submit(fn, *args)
         self._futures.append(future)
-    def _clear_futures(self):
+    def _flush_futures(self):
+        """Waits for all futures to complete and clears the list."""
         for future in self._futures:
             future.result()
         self._futures = []
@@ -138,7 +164,7 @@ class CallbackList(Callback):
     def on_epoch_end(self, epoch, logs=None):
         if self._async_train:
-            self._clear_futures()
+            self._flush_futures()
         logs = python_utils.pythonify_logs(logs)
         for callback in self.callbacks:
@@ -204,44 +230,52 @@ class CallbackList(Callback):
             callback.on_predict_batch_end(batch, logs=logs)
     def on_train_begin(self, logs=None):
+        self._on_begin()
         logs = python_utils.pythonify_logs(logs)
         for callback in self.callbacks:
             callback.on_train_begin(logs)
     def on_train_end(self, logs=None):
         if self._async_train:
-            self._clear_futures()
+            self._flush_futures()
         logs = python_utils.pythonify_logs(logs)
         for callback in self.callbacks:
             callback.on_train_end(logs)
+        self._on_end()
     def on_test_begin(self, logs=None):
+        self._on_begin()
         logs = python_utils.pythonify_logs(logs)
         for callback in self.callbacks:
             callback.on_test_begin(logs)
     def on_test_end(self, logs=None):
         if self._async_test:
-            self._clear_futures()
+            self._flush_futures()
         logs = python_utils.pythonify_logs(logs)
         for callback in self.callbacks:
             callback.on_test_end(logs)
+        self._on_end()
     def on_predict_begin(self, logs=None):
+        self._on_begin()
         logs = python_utils.pythonify_logs(logs)
         for callback in self.callbacks:
             callback.on_predict_begin(logs)
     def on_predict_end(self, logs=None):
         if self._async_predict:
-            self._clear_futures()
+            self._flush_futures()
         logs = python_utils.pythonify_logs(logs)
         for callback in self.callbacks:
             callback.on_predict_end(logs)
-    def __del__(self):
-        if self._executor is not None:
-            self._executor.shutdown(cancel_futures=True)
+        self._on_end()

keras/src/callbacks/model_checkpoint.py CHANGED Viewed

@@ -283,6 +283,11 @@ class ModelCheckpoint(MonitorCallback):
                     self.model.save_weights(filepath, overwrite=True)
                 else:
                     self.model.save(filepath, overwrite=True)
+                if self.verbose > 0:
+                    io_utils.print_msg(
+                        f"\nEpoch {epoch + 1}: "
+                        f"finished saving model to {filepath}"
+                    )
         except IsADirectoryError:  # h5py 3.x
             raise IOError(
                 "Please specify a non-directory filepath for "

keras-nightly 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl

keras-nightly 3.12.0.dev2025083103py3-none-any.whl → 3.14.0.dev2026011604py3-none-any.whl