keras-nightly 3.12.0.dev2025083103__py3-none-any.whl → 3.14.0.dev2026011604__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. keras/__init__.py +1 -0
  2. keras/_tf_keras/keras/__init__.py +1 -0
  3. keras/_tf_keras/keras/callbacks/__init__.py +3 -0
  4. keras/_tf_keras/keras/distillation/__init__.py +16 -0
  5. keras/_tf_keras/keras/distribution/__init__.py +3 -0
  6. keras/_tf_keras/keras/dtype_policies/__init__.py +6 -0
  7. keras/_tf_keras/keras/layers/__init__.py +21 -0
  8. keras/_tf_keras/keras/ops/__init__.py +16 -0
  9. keras/_tf_keras/keras/ops/image/__init__.py +1 -0
  10. keras/_tf_keras/keras/ops/linalg/__init__.py +1 -0
  11. keras/_tf_keras/keras/ops/nn/__init__.py +3 -0
  12. keras/_tf_keras/keras/ops/numpy/__init__.py +12 -0
  13. keras/_tf_keras/keras/quantizers/__init__.py +13 -0
  14. keras/callbacks/__init__.py +3 -0
  15. keras/distillation/__init__.py +16 -0
  16. keras/distribution/__init__.py +3 -0
  17. keras/dtype_policies/__init__.py +6 -0
  18. keras/layers/__init__.py +21 -0
  19. keras/ops/__init__.py +16 -0
  20. keras/ops/image/__init__.py +1 -0
  21. keras/ops/linalg/__init__.py +1 -0
  22. keras/ops/nn/__init__.py +3 -0
  23. keras/ops/numpy/__init__.py +12 -0
  24. keras/quantizers/__init__.py +13 -0
  25. keras/src/applications/imagenet_utils.py +4 -1
  26. keras/src/backend/common/backend_utils.py +30 -6
  27. keras/src/backend/common/dtypes.py +6 -12
  28. keras/src/backend/common/name_scope.py +2 -1
  29. keras/src/backend/common/variables.py +38 -20
  30. keras/src/backend/jax/core.py +126 -78
  31. keras/src/backend/jax/distribution_lib.py +16 -2
  32. keras/src/backend/jax/layer.py +3 -1
  33. keras/src/backend/jax/linalg.py +4 -0
  34. keras/src/backend/jax/nn.py +511 -29
  35. keras/src/backend/jax/numpy.py +109 -23
  36. keras/src/backend/jax/optimizer.py +3 -2
  37. keras/src/backend/jax/trainer.py +18 -3
  38. keras/src/backend/numpy/linalg.py +4 -0
  39. keras/src/backend/numpy/nn.py +313 -2
  40. keras/src/backend/numpy/numpy.py +97 -8
  41. keras/src/backend/openvino/__init__.py +1 -0
  42. keras/src/backend/openvino/core.py +6 -23
  43. keras/src/backend/openvino/linalg.py +4 -0
  44. keras/src/backend/openvino/nn.py +271 -20
  45. keras/src/backend/openvino/numpy.py +1369 -195
  46. keras/src/backend/openvino/random.py +7 -14
  47. keras/src/backend/tensorflow/layer.py +43 -9
  48. keras/src/backend/tensorflow/linalg.py +24 -0
  49. keras/src/backend/tensorflow/nn.py +545 -1
  50. keras/src/backend/tensorflow/numpy.py +351 -56
  51. keras/src/backend/tensorflow/trainer.py +6 -2
  52. keras/src/backend/torch/core.py +3 -1
  53. keras/src/backend/torch/linalg.py +4 -0
  54. keras/src/backend/torch/nn.py +125 -0
  55. keras/src/backend/torch/numpy.py +109 -9
  56. keras/src/backend/torch/trainer.py +8 -2
  57. keras/src/callbacks/__init__.py +1 -0
  58. keras/src/callbacks/callback_list.py +45 -11
  59. keras/src/callbacks/model_checkpoint.py +5 -0
  60. keras/src/callbacks/orbax_checkpoint.py +332 -0
  61. keras/src/callbacks/terminate_on_nan.py +54 -5
  62. keras/src/datasets/cifar10.py +5 -0
  63. keras/src/distillation/__init__.py +1 -0
  64. keras/src/distillation/distillation_loss.py +390 -0
  65. keras/src/distillation/distiller.py +598 -0
  66. keras/src/distribution/distribution_lib.py +14 -0
  67. keras/src/dtype_policies/__init__.py +4 -0
  68. keras/src/dtype_policies/dtype_policy.py +180 -1
  69. keras/src/export/__init__.py +2 -0
  70. keras/src/export/export_utils.py +39 -2
  71. keras/src/export/litert.py +248 -0
  72. keras/src/export/onnx.py +6 -0
  73. keras/src/export/openvino.py +1 -1
  74. keras/src/export/tf2onnx_lib.py +3 -0
  75. keras/src/layers/__init__.py +13 -0
  76. keras/src/layers/activations/softmax.py +9 -4
  77. keras/src/layers/attention/attention.py +1 -1
  78. keras/src/layers/attention/multi_head_attention.py +4 -1
  79. keras/src/layers/core/dense.py +406 -102
  80. keras/src/layers/core/einsum_dense.py +521 -116
  81. keras/src/layers/core/embedding.py +257 -99
  82. keras/src/layers/core/input_layer.py +1 -0
  83. keras/src/layers/core/reversible_embedding.py +399 -0
  84. keras/src/layers/input_spec.py +17 -17
  85. keras/src/layers/layer.py +50 -15
  86. keras/src/layers/merging/concatenate.py +6 -5
  87. keras/src/layers/merging/dot.py +4 -1
  88. keras/src/layers/pooling/adaptive_average_pooling1d.py +65 -0
  89. keras/src/layers/pooling/adaptive_average_pooling2d.py +62 -0
  90. keras/src/layers/pooling/adaptive_average_pooling3d.py +63 -0
  91. keras/src/layers/pooling/adaptive_max_pooling1d.py +65 -0
  92. keras/src/layers/pooling/adaptive_max_pooling2d.py +62 -0
  93. keras/src/layers/pooling/adaptive_max_pooling3d.py +63 -0
  94. keras/src/layers/pooling/base_adaptive_pooling.py +63 -0
  95. keras/src/layers/preprocessing/discretization.py +6 -5
  96. keras/src/layers/preprocessing/feature_space.py +8 -4
  97. keras/src/layers/preprocessing/image_preprocessing/aug_mix.py +2 -2
  98. keras/src/layers/preprocessing/image_preprocessing/bounding_boxes/validation.py +5 -5
  99. keras/src/layers/preprocessing/image_preprocessing/random_contrast.py +3 -3
  100. keras/src/layers/preprocessing/image_preprocessing/resizing.py +10 -0
  101. keras/src/layers/preprocessing/index_lookup.py +19 -1
  102. keras/src/layers/preprocessing/normalization.py +16 -1
  103. keras/src/layers/preprocessing/string_lookup.py +26 -28
  104. keras/src/layers/regularization/dropout.py +43 -1
  105. keras/src/layers/rnn/gru.py +1 -1
  106. keras/src/layers/rnn/lstm.py +2 -2
  107. keras/src/layers/rnn/rnn.py +19 -0
  108. keras/src/layers/rnn/simple_rnn.py +1 -1
  109. keras/src/legacy/preprocessing/image.py +4 -1
  110. keras/src/legacy/preprocessing/sequence.py +20 -12
  111. keras/src/losses/loss.py +1 -1
  112. keras/src/losses/losses.py +24 -0
  113. keras/src/metrics/confusion_metrics.py +7 -6
  114. keras/src/models/cloning.py +4 -0
  115. keras/src/models/functional.py +11 -3
  116. keras/src/models/model.py +195 -44
  117. keras/src/ops/image.py +257 -20
  118. keras/src/ops/linalg.py +93 -0
  119. keras/src/ops/nn.py +268 -2
  120. keras/src/ops/numpy.py +701 -44
  121. keras/src/ops/operation.py +90 -29
  122. keras/src/ops/operation_utils.py +2 -0
  123. keras/src/optimizers/adafactor.py +29 -10
  124. keras/src/optimizers/base_optimizer.py +22 -3
  125. keras/src/optimizers/loss_scale_optimizer.py +51 -18
  126. keras/src/optimizers/muon.py +65 -31
  127. keras/src/optimizers/schedules/learning_rate_schedule.py +4 -3
  128. keras/src/quantizers/__init__.py +14 -1
  129. keras/src/quantizers/awq.py +361 -0
  130. keras/src/quantizers/awq_config.py +140 -0
  131. keras/src/quantizers/awq_core.py +217 -0
  132. keras/src/quantizers/gptq.py +346 -207
  133. keras/src/quantizers/gptq_config.py +63 -13
  134. keras/src/quantizers/gptq_core.py +328 -215
  135. keras/src/quantizers/quantization_config.py +246 -0
  136. keras/src/quantizers/quantizers.py +407 -38
  137. keras/src/quantizers/utils.py +23 -0
  138. keras/src/random/seed_generator.py +6 -4
  139. keras/src/saving/file_editor.py +81 -6
  140. keras/src/saving/orbax_util.py +26 -0
  141. keras/src/saving/saving_api.py +37 -14
  142. keras/src/saving/saving_lib.py +1 -1
  143. keras/src/testing/__init__.py +1 -0
  144. keras/src/testing/test_case.py +45 -5
  145. keras/src/trainers/compile_utils.py +38 -17
  146. keras/src/trainers/data_adapters/grain_dataset_adapter.py +1 -5
  147. keras/src/tree/torchtree_impl.py +215 -0
  148. keras/src/tree/tree_api.py +6 -1
  149. keras/src/utils/backend_utils.py +31 -4
  150. keras/src/utils/dataset_utils.py +234 -35
  151. keras/src/utils/file_utils.py +49 -11
  152. keras/src/utils/image_utils.py +14 -2
  153. keras/src/utils/jax_layer.py +244 -55
  154. keras/src/utils/module_utils.py +29 -0
  155. keras/src/utils/progbar.py +10 -12
  156. keras/src/utils/python_utils.py +5 -0
  157. keras/src/utils/rng_utils.py +9 -1
  158. keras/src/utils/tracking.py +70 -5
  159. keras/src/version.py +1 -1
  160. {keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/METADATA +16 -6
  161. {keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/RECORD +163 -142
  162. keras/src/quantizers/gptq_quant.py +0 -133
  163. {keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/WHEEL +0 -0
  164. {keras_nightly-3.12.0.dev2025083103.dist-info → keras_nightly-3.14.0.dev2026011604.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,7 @@ from keras.src import backend
6
6
  from keras.src import ops
7
7
  from keras.src.api_export import keras_export
8
8
  from keras.src.layers.preprocessing.data_layer import DataLayer
9
+ from keras.src.trainers.data_adapters.py_dataset_adapter import PyDataset
9
10
  from keras.src.utils.module_utils import tensorflow as tf
10
11
 
11
12
 
@@ -43,10 +44,12 @@ class Normalization(DataLayer):
43
44
  will be broadcast to the shape of the kept axes above;
44
45
  if the value(s) cannot be broadcast, an error will be raised when
45
46
  this layer's `build()` method is called.
47
+ `mean` and `variance` must be specified together.
46
48
  variance: The variance value(s) to use during normalization. The passed
47
49
  value(s) will be broadcast to the shape of the kept axes above;
48
50
  if the value(s) cannot be broadcast, an error will be raised when
49
51
  this layer's `build()` method is called.
52
+ `mean` and `variance` must be specified together.
50
53
  invert: If `True`, this layer will apply the inverse transformation
51
54
  to its inputs: it would turn a normalized input back into its
52
55
  original form.
@@ -227,6 +230,18 @@ class Normalization(DataLayer):
227
230
  # Batch dataset if it isn't batched
228
231
  data = data.batch(128)
229
232
  input_shape = tuple(data.element_spec.shape)
233
+ elif isinstance(data, PyDataset):
234
+ data = data[0]
235
+ if isinstance(data, tuple):
236
+ # handling (x, y) or (x, y, sample_weight)
237
+ data = data[0]
238
+ input_shape = data.shape
239
+ else:
240
+ raise TypeError(
241
+ f"Unsupported data type: {type(data)}. `adapt` supports "
242
+ f"`np.ndarray`, backend tensors, `tf.data.Dataset`, and "
243
+ f"`keras.utils.PyDataset`."
244
+ )
230
245
 
231
246
  if not self.built:
232
247
  self.build(input_shape)
@@ -246,7 +261,7 @@ class Normalization(DataLayer):
246
261
  elif backend.is_tensor(data):
247
262
  total_mean = ops.mean(data, axis=self._reduce_axis)
248
263
  total_var = ops.var(data, axis=self._reduce_axis)
249
- elif isinstance(data, tf.data.Dataset):
264
+ elif isinstance(data, (tf.data.Dataset, PyDataset)):
250
265
  total_mean = ops.zeros(self._mean_and_var_shape)
251
266
  total_var = ops.zeros(self._mean_and_var_shape)
252
267
  total_count = 0
@@ -26,9 +26,9 @@ class StringLookup(IndexLookup):
26
26
  tokens will be used to create the vocabulary and all others will be treated
27
27
  as out-of-vocabulary (OOV).
28
28
 
29
- There are two possible output modes for the layer.
30
- When `output_mode` is `"int"`,
31
- input strings are converted to their index in the vocabulary (an integer).
29
+ There are two possible output modes for the layer. When `output_mode` is
30
+ `"int"`, input strings are converted to their index in the vocabulary (an
31
+ integer).
32
32
  When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, input strings
33
33
  are encoded into an array where each dimension corresponds to an element in
34
34
  the vocabulary.
@@ -48,7 +48,7 @@ class StringLookup(IndexLookup):
48
48
  It can however be used with any backend when running eagerly.
49
49
  It can also always be used as part of an input preprocessing pipeline
50
50
  with any backend (outside the model itself), which is how we recommend
51
- to use this layer.
51
+ using this layer.
52
52
 
53
53
  **Note:** This layer is safe to use inside a `tf.data` pipeline
54
54
  (independently of which backend you're using).
@@ -65,28 +65,26 @@ class StringLookup(IndexLookup):
65
65
  If this value is 0, OOV inputs will cause an error when calling
66
66
  the layer. Defaults to `1`.
67
67
  mask_token: A token that represents masked inputs. When `output_mode` is
68
- `"int"`, the token is included in vocabulary and mapped to index 0.
69
- In other output modes, the token will not appear
70
- in the vocabulary and instances of the mask token
71
- in the input will be dropped. If set to `None`,
72
- no mask term will be added. Defaults to `None`.
68
+ `"int"`, the token is included in the vocabulary and mapped to index
69
+ 0.
70
+ In other output modes, the token will not appear in the vocabulary
71
+ and instances of the mask token in the input will be dropped.
72
+ If set to `None`, no mask term will be added. Defaults to `None`.
73
73
  oov_token: Only used when `invert` is True. The token to return for OOV
74
74
  indices. Defaults to `"[UNK]"`.
75
- vocabulary: Optional. Either an array of integers or a string path to a
76
- text file. If passing an array, can pass a tuple, list,
77
- 1D NumPy array, or 1D tensor containing the integer vocbulary terms.
78
- If passing a file path, the file should contain one line per term
79
- in the vocabulary. If this argument is set,
80
- there is no need to `adapt()` the layer.
81
- vocabulary_dtype: The dtype of the vocabulary terms, for example
82
- `"int64"` or `"int32"`. Defaults to `"int64"`.
75
+ vocabulary: Optional. Either an array of strings or a string path to a
76
+ text file. If passing an array, you can pass a tuple, list, 1D NumPy
77
+ array, or 1D tensor containing the string vocabulary terms.
78
+ If passing a file path, the file should contain one line per term in
79
+ the vocabulary. If this argument is set, there is no need to
80
+ `adapt()` the layer.
83
81
  idf_weights: Only valid when `output_mode` is `"tf_idf"`.
84
82
  A tuple, list, 1D NumPy array, or 1D tensor or the same length
85
83
  as the vocabulary, containing the floating point inverse document
86
84
  frequency weights, which will be multiplied by per sample term
87
85
  counts for the final TF-IDF weight.
88
- If the `vocabulary` argument is set, and `output_mode` is
89
- `"tf_idf"`, this argument must be supplied.
86
+ If the `vocabulary` argument is set and `output_mode` is `"tf_idf"`,
87
+ this argument must be supplied.
90
88
  invert: Only valid when `output_mode` is `"int"`.
91
89
  If `True`, this layer will map indices to vocabulary items
92
90
  instead of mapping vocabulary items to indices.
@@ -102,11 +100,11 @@ class StringLookup(IndexLookup):
102
100
  If the last dimension is not size 1, will append a new
103
101
  dimension for the encoded output.
104
102
  - `"multi_hot"`: Encodes each sample in the input into a single
105
- array the same size as the vocabulary,
106
- containing a 1 for each vocabulary term present in the sample.
107
- Treats the last dimension as the sample dimension,
108
- if input shape is `(..., sample_length)`,
109
- output shape will be `(..., num_tokens)`.
103
+ array the same size as the vocabulary containing a 1 for each
104
+ vocabulary term present in the sample.
105
+ Treats the last dimension as the sample dimension, if the input
106
+ shape is `(..., sample_length)`, the output shape will be
107
+ `(..., num_tokens)`.
110
108
  - `"count"`: As `"multi_hot"`, but the int array contains
111
109
  a count of the number of times the token at that index
112
110
  appeared in the sample.
@@ -240,8 +238,8 @@ class StringLookup(IndexLookup):
240
238
  array([[0. , 0.25, 0. , 0.6 , 0.8 ],
241
239
  [1.0 , 0. , 0.75, 0. , 0.4 ]], dtype=float32)
242
240
 
243
- To specify the idf weights for oov values, you will need to pass the entire
244
- vocabulary including the leading oov token.
241
+ To specify the idf weights for OOV values, you will need to pass the entire
242
+ vocabulary including the leading OOV token.
245
243
 
246
244
  >>> vocab = ["[UNK]", "a", "b", "c", "d"]
247
245
  >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4]
@@ -269,7 +267,7 @@ class StringLookup(IndexLookup):
269
267
  array([[b'a', b'c', b'd'],
270
268
  [b'd', b'[UNK]', b'b']], dtype=object)
271
269
 
272
- Note that the first index correspond to the oov token by default.
270
+ Note that the first index corresponds to the OOV token by default.
273
271
 
274
272
 
275
273
  **Forward and inverse lookup pairs**
@@ -340,7 +338,7 @@ class StringLookup(IndexLookup):
340
338
  self.supports_jit = False
341
339
 
342
340
  def adapt(self, data, steps=None):
343
- """Computes a vocabulary of integer terms from tokens in a dataset.
341
+ """Computes a vocabulary of terms from tokens in a dataset.
344
342
 
345
343
  Calling `adapt()` on a `StringLookup` layer is an alternative to passing
346
344
  in a precomputed vocabulary on construction via the `vocabulary`
@@ -48,13 +48,55 @@ class Dropout(Layer):
48
48
  )
49
49
  self.rate = rate
50
50
  self.seed = seed
51
- self.noise_shape = noise_shape
51
+ self.noise_shape = self._validate_noise_shape(noise_shape)
52
52
  if rate > 0:
53
53
  self.seed_generator = backend.random.SeedGenerator(seed)
54
54
  self.supports_masking = True
55
55
 
56
56
  self._build_at_init()
57
57
 
58
+ def _validate_noise_shape(self, noise_shape):
59
+ if noise_shape is None:
60
+ return None
61
+
62
+ if isinstance(noise_shape, str):
63
+ raise ValueError(
64
+ f"Invalid value received for argument `noise_shape`. "
65
+ f"Expected a tuple or list of integers. "
66
+ f"Received: noise_shape={noise_shape}"
67
+ )
68
+
69
+ if not isinstance(noise_shape, tuple):
70
+ try:
71
+ noise_shape = tuple(noise_shape)
72
+ except TypeError:
73
+ raise ValueError(
74
+ f"Invalid value received for argument `noise_shape`. "
75
+ f"Expected an iterable of integers "
76
+ f"(e.g., a tuple or list). "
77
+ f"Received: noise_shape={noise_shape}"
78
+ )
79
+
80
+ for i, dim in enumerate(noise_shape):
81
+ if dim is not None:
82
+ if not isinstance(dim, int):
83
+ raise ValueError(
84
+ f"Invalid value received for argument `noise_shape`. "
85
+ f"Expected all elements to be integers or None. "
86
+ f"Received element at index {i}: {dim} "
87
+ f"(type: {type(dim).__name__})"
88
+ )
89
+
90
+ if dim <= 0:
91
+ raise ValueError(
92
+ f"Invalid value received for argument `noise_shape`. "
93
+ f"Expected all dimensions to be positive integers "
94
+ f"or None. "
95
+ f"Received negative or zero value at index {i}: {dim}"
96
+ )
97
+
98
+ return noise_shape
99
+
58
100
  def call(self, inputs, training=False):
59
101
  if training and self.rate > 0:
60
102
  return backend.random.dropout(
@@ -261,7 +261,7 @@ class GRUCell(Layer, DropoutRNNCell):
261
261
  matrix_x = ops.matmul(inputs, self.kernel)
262
262
  if self.use_bias:
263
263
  # biases: bias_z_i, bias_r_i, bias_h_i
264
- matrix_x += input_bias
264
+ matrix_x = ops.add(matrix_x, input_bias)
265
265
 
266
266
  x_z, x_r, x_h = ops.split(matrix_x, 3, axis=-1)
267
267
 
@@ -276,9 +276,9 @@ class LSTMCell(Layer, DropoutRNNCell):
276
276
 
277
277
  z = ops.matmul(inputs, self.kernel)
278
278
 
279
- z += ops.matmul(h_tm1, self.recurrent_kernel)
279
+ z = ops.add(z, ops.matmul(h_tm1, self.recurrent_kernel))
280
280
  if self.use_bias:
281
- z += self.bias
281
+ z = ops.add(z, self.bias)
282
282
 
283
283
  z = ops.split(z, 4, axis=1)
284
284
  c, o = self._compute_carry_and_output_fused(z, c_tm1)
@@ -212,6 +212,7 @@ class RNN(Layer):
212
212
  self.supports_masking = True
213
213
  self.input_spec = None
214
214
  self.states = None
215
+ self._expected_batch_size = None
215
216
 
216
217
  state_size = getattr(self.cell, "state_size", None)
217
218
  if state_size is None:
@@ -283,6 +284,9 @@ class RNN(Layer):
283
284
  f"batch size: sequence.shape={sequences_shape}"
284
285
  )
285
286
  self._create_state_variables(sequences_shape[0])
287
+ self._expected_batch_size = ops.shape(
288
+ tree.flatten(self.states)[0]
289
+ )[0]
286
290
 
287
291
  @tracking.no_automatic_dependency_tracking
288
292
  def _create_state_variables(self, batch_size):
@@ -382,6 +386,21 @@ class RNN(Layer):
382
386
  initial_state = self.get_initial_state(
383
387
  batch_size=ops.shape(sequences)[0]
384
388
  )
389
+ if self.stateful:
390
+ actual_batch_size = sequences.shape[0]
391
+ if (
392
+ self._expected_batch_size is not None
393
+ and actual_batch_size is not None
394
+ and actual_batch_size != self._expected_batch_size
395
+ ):
396
+ raise ValueError(
397
+ f"If an RNN is stateful, the batch size of the "
398
+ f"input sequences must be the same as the batch "
399
+ f"size of the initial state. \n"
400
+ f"- Expected batch size: {self._expected_batch_size}\n"
401
+ f"- Received batch size: {actual_batch_size}"
402
+ )
403
+
385
404
  # RNN expect the states in a list, even if single state.
386
405
  if not tree.is_nested(initial_state):
387
406
  initial_state = [initial_state]
@@ -160,7 +160,7 @@ class SimpleRNNCell(Layer, DropoutRNNCell):
160
160
  sequence = sequence * dp_mask
161
161
  h = ops.matmul(sequence, self.kernel)
162
162
  if self.bias is not None:
163
- h += self.bias
163
+ h = ops.add(h, self.bias)
164
164
 
165
165
  if training and rec_dp_mask is not None:
166
166
  prev_output = prev_output * rec_dp_mask
@@ -30,11 +30,14 @@ class Iterator(PyDataset):
30
30
  batch_size: Integer, size of a batch.
31
31
  shuffle: Boolean, whether to shuffle the data between epochs.
32
32
  seed: Random seeding for data shuffling.
33
+ **kwargs: Additional keyword arguments for the `PyDataset` base class,
34
+ such as `workers`, `use_multiprocessing`, and `max_queue_size`.
33
35
  """
34
36
 
35
37
  white_list_formats = ("png", "jpg", "jpeg", "bmp", "ppm", "tif", "tiff")
36
38
 
37
- def __init__(self, n, batch_size, shuffle, seed):
39
+ def __init__(self, n, batch_size, shuffle, seed, **kwargs):
40
+ super().__init__(**kwargs)
38
41
  self.n = n
39
42
  self.batch_size = batch_size
40
43
  self.seed = seed
@@ -47,6 +47,8 @@ class TimeseriesGenerator(PyDataset):
47
47
  in reverse chronological order.
48
48
  batch_size: Number of timeseries samples in each batch
49
49
  (except maybe the last one).
50
+ **kwargs: Additional keyword arguments for the `PyDataset` base class,
51
+ such as `workers`, `use_multiprocessing`, and `max_queue_size`.
50
52
 
51
53
  Returns:
52
54
  A PyDataset instance.
@@ -64,7 +66,9 @@ class TimeseriesGenerator(PyDataset):
64
66
  shuffle=False,
65
67
  reverse=False,
66
68
  batch_size=128,
69
+ **kwargs,
67
70
  ):
71
+ super().__init__(**kwargs)
68
72
  if len(data) != len(targets):
69
73
  raise ValueError(
70
74
  "Data and targets have to be "
@@ -145,18 +149,22 @@ class TimeseriesGenerator(PyDataset):
145
149
  except TypeError as e:
146
150
  raise TypeError(f"Targets not JSON Serializable: {targets}") from e
147
151
 
148
- return {
149
- "data": json_data,
150
- "targets": json_targets,
151
- "length": self.length,
152
- "sampling_rate": self.sampling_rate,
153
- "stride": self.stride,
154
- "start_index": self.start_index,
155
- "end_index": self.end_index,
156
- "shuffle": self.shuffle,
157
- "reverse": self.reverse,
158
- "batch_size": self.batch_size,
159
- }
152
+ config = super().get_config()
153
+ config.update(
154
+ {
155
+ "data": json_data,
156
+ "targets": json_targets,
157
+ "length": self.length,
158
+ "sampling_rate": self.sampling_rate,
159
+ "stride": self.stride,
160
+ "start_index": self.start_index,
161
+ "end_index": self.end_index,
162
+ "shuffle": self.shuffle,
163
+ "reverse": self.reverse,
164
+ "batch_size": self.batch_size,
165
+ }
166
+ )
167
+ return config
160
168
 
161
169
  def to_json(self, **kwargs):
162
170
  """Returns a JSON string containing the generator's configuration.
keras/src/losses/loss.py CHANGED
@@ -211,7 +211,7 @@ def apply_mask(sample_weight, mask, dtype, reduction):
211
211
  dtype,
212
212
  )
213
213
  valid = ops.sum(mask) # May be 0!
214
- mask *= total / (valid + backend.epsilon())
214
+ mask *= ops.divide_no_nan(total, valid)
215
215
 
216
216
  if sample_weight is not None:
217
217
  sample_weight = ops.cast(sample_weight, dtype=dtype)
@@ -73,6 +73,14 @@ class MeanSquaredError(LossFunctionWrapper):
73
73
  `"float32"` unless set to different value
74
74
  (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is
75
75
  provided, then the `compute_dtype` will be utilized.
76
+
77
+ Examples:
78
+
79
+ >>> y_true = keras.ops.array([1.0, 0.0, 1.0])
80
+ >>> y_pred = keras.ops.array([0.9, 0.1, 0.8])
81
+ >>> loss = keras.losses.MeanSquaredError()
82
+ >>> loss(y_true, y_pred)
83
+ 0.02
76
84
  """
77
85
 
78
86
  def __init__(
@@ -114,6 +122,14 @@ class MeanAbsoluteError(LossFunctionWrapper):
114
122
  `"float32"` unless set to different value
115
123
  (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is
116
124
  provided, then the `compute_dtype` will be utilized.
125
+
126
+ Examples:
127
+
128
+ >>> y_true = keras.ops.array([1.0, 0.3, 1.0])
129
+ >>> y_pred = keras.ops.array([1.9, 0.3, 1.8])
130
+ >>> loss = keras.losses.MeanAbsoluteError()
131
+ >>> loss(y_true, y_pred)
132
+ 0.5666667
117
133
  """
118
134
 
119
135
  def __init__(
@@ -155,6 +171,14 @@ class MeanAbsolutePercentageError(LossFunctionWrapper):
155
171
  `"float32"` unless set to different value
156
172
  (via `keras.backend.set_floatx()`). If a `keras.DTypePolicy` is
157
173
  provided, then the `compute_dtype` will be utilized.
174
+
175
+ Examples:
176
+
177
+ >>> y_true = keras.ops.array([100.0, 200.0, 300.0])
178
+ >>> y_pred = keras.ops.array([90.0, 210.0, 310.0])
179
+ >>> loss = keras.losses.MeanAbsolutePercentageError()
180
+ >>> loss(y_true, y_pred)
181
+ 6.111111
158
182
  """
159
183
 
160
184
  def __init__(
@@ -654,7 +654,7 @@ class SensitivitySpecificityBase(Metric):
654
654
  Args:
655
655
  constrained: Over these values the constraint is specified. A rank-1
656
656
  tensor.
657
- dependent: From these values the maximum that satiesfies the
657
+ dependent: From these values the maximum that satisfies the
658
658
  constraint is selected. Values in this tensor and in
659
659
  `constrained` are linked by having the same threshold at each
660
660
  position, hence this tensor must have the same shape.
@@ -664,11 +664,12 @@ class SensitivitySpecificityBase(Metric):
664
664
  Returns:
665
665
  maximal dependent value, if no value satisfies the constraint 0.0.
666
666
  """
667
- feasible = ops.nonzero(predicate(constrained, self.value))
668
- feasible_exists = ops.greater(ops.size(feasible), 0)
669
- max_dependent = ops.max(ops.take(dependent, feasible), initial=0)
670
-
671
- return ops.where(feasible_exists, max_dependent, 0.0)
667
+ feasible = predicate(constrained, self.value)
668
+ # Mask values based on whether they satisfy the constraint and take max.
669
+ return ops.max(
670
+ ops.multiply(dependent, ops.cast(feasible, dependent.dtype)),
671
+ initial=0,
672
+ )
672
673
 
673
674
 
674
675
  @keras_export("keras.metrics.SensitivityAtSpecificity")
@@ -293,10 +293,12 @@ def _clone_sequential_model(model, clone_function, input_tensors=None):
293
293
  input_name = ref_input_layer.name
294
294
  input_batch_shape = ref_input_layer.batch_shape
295
295
  input_dtype = ref_input_layer._dtype
296
+ input_optional = ref_input_layer.optional
296
297
  else:
297
298
  input_name = None
298
299
  input_dtype = None
299
300
  input_batch_shape = None
301
+ input_optional = False
300
302
 
301
303
  if input_tensors is not None:
302
304
  if isinstance(input_tensors, (list, tuple)):
@@ -313,6 +315,7 @@ def _clone_sequential_model(model, clone_function, input_tensors=None):
313
315
  inputs = Input(
314
316
  tensor=input_tensors,
315
317
  name=input_name,
318
+ optional=input_optional,
316
319
  )
317
320
  new_layers = [inputs] + new_layers
318
321
  else:
@@ -321,6 +324,7 @@ def _clone_sequential_model(model, clone_function, input_tensors=None):
321
324
  batch_shape=input_batch_shape,
322
325
  dtype=input_dtype,
323
326
  name=input_name,
327
+ optional=input_optional,
324
328
  )
325
329
  new_layers = [inputs] + new_layers
326
330
  cloned_model = Sequential(
@@ -254,9 +254,9 @@ class Functional(Function, Model):
254
254
  return converted
255
255
 
256
256
  def _adjust_input_rank(self, flat_inputs):
257
- flat_ref_shapes = [x.shape for x in self._inputs]
258
257
  adjusted = []
259
- for x, ref_shape in zip(flat_inputs, flat_ref_shapes):
258
+ for i, x in enumerate(flat_inputs):
259
+ ref_shape = self._inputs[i].shape
260
260
  if x is None:
261
261
  adjusted.append(x)
262
262
  continue
@@ -273,8 +273,11 @@ class Functional(Function, Model):
273
273
  if ref_shape[-1] == 1:
274
274
  adjusted.append(ops.expand_dims(x, axis=-1))
275
275
  continue
276
+ flat_paths_and_inputs = tree.flatten_with_path(self._inputs_struct)
277
+ path = ".".join(str(p) for p in flat_paths_and_inputs[i][0])
276
278
  raise ValueError(
277
- f"Invalid input shape for input {x}. Expected shape "
279
+ f"Invalid input shape for input {x} with name "
280
+ f"'{self._inputs[i].name}' and path '{path}'. Expected shape "
278
281
  f"{ref_shape}, but input has incompatible shape {x.shape}"
279
282
  )
280
283
  # Add back metadata.
@@ -832,11 +835,16 @@ def clone_graph_nodes(inputs, outputs):
832
835
  kt_id_mapping[id(kt_input)] = kt_input
833
836
  else:
834
837
  # We need to create a new Keras tensor for any intermediate tensor
838
+ original_op = kt_input._keras_history.operation
839
+ optional = False
840
+ if isinstance(original_op, InputLayer):
841
+ optional = original_op.optional
835
842
  cloned_input = Input(
836
843
  batch_shape=kt_input.shape,
837
844
  dtype=kt_input.dtype,
838
845
  sparse=kt_input.sparse,
839
846
  name=f"{kt_input.name}CLONE",
847
+ optional=optional,
840
848
  )
841
849
  cloned_inputs.append(cloned_input)
842
850
  kt_id_mapping[id(kt_input)] = cloned_input