returnn 1.20250228.101938__py3-none-any.whl → 1.20250304.10039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250228.101938
3
+ Version: 1.20250304.10039
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250228.101938'
2
- long_version = '1.20250228.101938+git.c053cfd'
1
+ version = '1.20250304.010039'
2
+ long_version = '1.20250304.010039+git.3e53d74'
@@ -1271,9 +1271,9 @@ class _DimMixin:
1271
1271
  if self.batch:
1272
1272
  x_dim = x_dim.get_for_batch_ctx(self.batch, self.control_flow_ctx)
1273
1273
  x_dim.complete_dyn_size(template_only=template_only, _backend=backend)
1274
- if x_dim.dyn_size_ext is None and not x_dim.dimension:
1274
+ if x_dim.dyn_size_ext is None and x_dim.dimension is None:
1275
1275
  return
1276
- y = _bin_op(y, x_dim.dimension or x_dim.dyn_size_ext)
1276
+ y = _bin_op(y, x_dim.dimension if x_dim.dimension is not None else x_dim.dyn_size_ext)
1277
1277
  if not template_only and y.raw_tensor is not None:
1278
1278
  y_max_value = _bin_op(y_max_value, x_dim.get_dim_value_tensor())
1279
1279
  assert y is not None, f"op {op}?"
@@ -6587,6 +6587,7 @@ class ConvLayer(_ConcatInputLayer):
6587
6587
  input_split_feature_dim=None,
6588
6588
  input_add_feature_dim=False,
6589
6589
  use_time_mask=False,
6590
+ mask_value: float = 0.0,
6590
6591
  ):
6591
6592
  """
6592
6593
  :param Data input_data:
@@ -6600,6 +6601,7 @@ class ConvLayer(_ConcatInputLayer):
6600
6601
  :param bool input_add_feature_dim: will add a dim at the end and use input-feature-dim == 1,
6601
6602
  and use the original input feature-dim as a spatial dim.
6602
6603
  :param bool use_time_mask:
6604
+ :param mask_value: when ``use_time_mask`` is used, what value to use for the mask
6603
6605
  :return: (transformed input, num batch dims). all batch dims are at the front
6604
6606
  :rtype: (Data, int)
6605
6607
  """
@@ -6697,7 +6699,7 @@ class ConvLayer(_ConcatInputLayer):
6697
6699
  continue
6698
6700
  axis = input_data.get_axis_from_description(dim)
6699
6701
  mask = input_data.get_sequence_mask_broadcast(axis=axis)
6700
- x = tf_util.where_bc(mask, x, 0.0)
6702
+ x = tf_util.where_bc(mask, x, mask_value)
6701
6703
 
6702
6704
  input_data.placeholder = x
6703
6705
 
@@ -7061,6 +7063,7 @@ class PoolLayer(_ConcatInputLayer):
7061
7063
  in_dim=in_dim,
7062
7064
  in_spatial_dims=in_spatial_dims,
7063
7065
  use_time_mask=use_time_mask,
7066
+ mask_value={"MAX": float("-inf"), "AVG": 0}[mode],
7064
7067
  )
7065
7068
  # We want to prepare the input data such that the batch-dim(s) is the very first,
7066
7069
  # the feature-dim is the very last ("NHWC" format) or right after batch-dim ("NCHW"),
@@ -337,7 +337,13 @@ class BucketOrderingIterDataPipe(torch.utils.data.IterDataPipe):
337
337
  """
338
338
 
339
339
  def __init__(
340
- self, dataset: torch.utils.data.IterableDataset, *, buckets: Sequence[Tuple[int, int]], length_key: str
340
+ self,
341
+ dataset: torch.utils.data.IterableDataset,
342
+ *,
343
+ buckets: Sequence[Tuple[int, int]],
344
+ length_key: str,
345
+ random_bucket_prob: float = 0.0,
346
+ seed: Optional[int] = None,
341
347
  ):
342
348
  """
343
349
  :param dataset: dataset to apply bucket batching to
@@ -345,9 +351,17 @@ class BucketOrderingIterDataPipe(torch.utils.data.IterDataPipe):
345
351
  Segments longer than the largest size limit configured in the buckets are dropped. To avoid dropping
346
352
  any segments make sure your largest bucket allows segments larger than your longest training segment.
347
353
  :param length_key: data key to take as length measure
354
+ :param random_bucket_prob: Probability of putting a segment not into the best-fitting bucket, but into
355
+ a randomly chosen still-fitting bucket.
356
+ This increases seq length variation within the buckets at the cost of slighly more padding.
357
+ :param seed: random seed
348
358
  """
349
359
  self._dataset = dataset
350
360
  self._length_key = length_key
361
+ assert random_bucket_prob >= 0.0
362
+ self._random_bucket_prob = random_bucket_prob
363
+ self._rng = numpy.random.RandomState()
364
+ self._seed = seed % (2**32) if seed is not None else None
351
365
 
352
366
  assert buckets, "empty bucket batching configuration"
353
367
  if not all(size > 0 and max_seqs > 0 for size, max_seqs in buckets):
@@ -367,6 +381,12 @@ class BucketOrderingIterDataPipe(torch.utils.data.IterDataPipe):
367
381
  if bucket_idx >= len(self._max_seq_lens):
368
382
  # seg is too long, drop it
369
383
  continue
384
+ if (
385
+ self._random_bucket_prob > 0.0
386
+ and bucket_idx < len(self._max_seq_lens) - 1
387
+ and self._rng.rand() < self._random_bucket_prob
388
+ ):
389
+ bucket_idx = self._rng.randint(bucket_idx, len(self._max_bucket_sizes))
370
390
  buckets[bucket_idx].append(data_dict)
371
391
  if len(buckets[bucket_idx]) >= self._max_bucket_sizes[bucket_idx]:
372
392
  yield buckets[bucket_idx]
@@ -383,6 +403,21 @@ class BucketOrderingIterDataPipe(torch.utils.data.IterDataPipe):
383
403
  def __getitem__(self, index):
384
404
  raise Exception(f"{self.__class__.__name__}.__getitem__ is not supported")
385
405
 
406
+ def set_seed(self, seed: int) -> BucketOrderingIterDataPipe:
407
+ """
408
+ Sets the seed for the next invocation of ``__iter__``, for compatibility with
409
+ ``torch.utils.data.graph_settings.apply_random_seed``.
410
+ """
411
+ self._seed = seed % (2**32) # seed must be within [0, 2**32) for seeding RandomState
412
+ return self
413
+
414
+ def reset(self):
415
+ """resets the internal state of the data pipe"""
416
+ if self._seed is None:
417
+ self._seed = int(2**31 + torch.empty((), dtype=torch.int32).random_().item())
418
+ self._rng.seed(self._seed)
419
+ self._seed = None
420
+
386
421
 
387
422
  def get_batching_iterable_dataset_from_config(
388
423
  *, dataset: torch.utils.data.IterableDataset, config: Config, train: bool
@@ -497,7 +532,7 @@ class ShufflingDataPipe(torch.utils.data.IterDataPipe):
497
532
  self._buffer_size = buffer_size
498
533
  self._monotonic_data_keys = monotonic_data_keys
499
534
  self._rng = numpy.random.RandomState()
500
- self._seed = seed
535
+ self._seed = seed % (2**32) if seed is not None else None
501
536
 
502
537
  def __iter__(self):
503
538
  # The implementation is very similar to the PostprocessingDataset's combinator LaplaceOrdering.
@@ -550,7 +585,7 @@ class ShufflingDataPipe(torch.utils.data.IterDataPipe):
550
585
  self._buffer.clear()
551
586
  self._next_buffer.clear()
552
587
  if self._seed is None:
553
- self._seed = int(torch.empty((), dtype=torch.int32).random_().item())
588
+ self._seed = int(2**31 + torch.empty((), dtype=torch.int32).random_().item())
554
589
  self._rng.seed(self._seed)
555
590
  self._seed = None
556
591
 
@@ -1925,15 +1925,19 @@ class TorchBackend(Backend[torch.Tensor]):
1925
1925
  stride_ = strides[i] if isinstance(strides, (list, tuple)) else strides
1926
1926
  else:
1927
1927
  stride_ = 1
1928
- # What is the logic here? You might be aware, in case without striding,
1928
+ # What is the logic here? Also see https://github.com/rwth-i6/returnn/issues/1693.
1929
+ # You might be aware, in case without striding,
1929
1930
  # we simply have pad_left = (s.dimension - 1) // 2,
1930
- # or the total amount of padding is s.dimension - 1.
1931
+ # or the total amount of padding is s.dimension - 1
1932
+ # (s.dimension is the filter size).
1931
1933
  # So we add the same amount of padding on both sides (or one less on the left side if odd).
1932
1934
  # The output seq length in case of "valid" padding is ⌈(in_len - s.dimension + 1) / stride⌉.
1933
1935
  # The output seq length in case of "same" padding with no striding (stride = 1)
1934
- # is simply the same as the input length.
1936
+ # is simply the same as the input length (that's why it's called "same").
1935
1937
  # What is the output seq length in case of "same" padding with striding?
1936
- # It should be ⌈in_len / stride⌉.
1938
+ # It should be out_len = ⌈in_len / stride⌉ (it should be independent of s.dimension).
1939
+ # We can rewrite out_len as:
1940
+ # out_len = ⌊(in_len + stride - 1) / stride⌋ = (in_len + stride - 1 - (in_len - 1) % stride) / stride
1937
1941
  # So, then we need to add a total amount of padding of s.dimension - 1.
1938
1942
  # However, doing it the same way as without striding is incorrect.
1939
1943
  # Why? Because then the first window might have more padding than the final window.
@@ -1941,8 +1945,6 @@ class TorchBackend(Backend[torch.Tensor]):
1941
1945
  # or maybe one less on the first window if odd.
1942
1946
  # How many frames do the windows cover?
1943
1947
  # in_len_covered = out_len * stride + (s.dimension - stride)
1944
- # We can rewrite out_len as:
1945
- # out_len = ⌊(in_len + stride - 1) / stride⌋ = (in_len + stride - 1 - (in_len - 1) % stride) / stride
1946
1948
  # So we have:
1947
1949
  # in_len_covered = (in_len + stride - 1 - (in_len - 1) % stride) + (s.dimension - stride)
1948
1950
  # = in_len + s.dimension - 1 - (in_len - 1) % stride
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250228.101938
3
+ Version: 1.20250304.10039
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=I8nJH2i19lJSp03bggFS1YlTbOT-yFLg8yanKsDGZEk,5215
1
+ returnn/PKG-INFO,sha256=et7Z9NstTVvnWjiIMXhquw3eiMnMxYMfnEEbVc755xQ,5214
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=J7wtu2Asd11qxOS3X2dv_AblIP3xvjshEattiHywgzQ,77
6
+ returnn/_setup_info_generated.py,sha256=d4hd9PkngTUKLJT4Q6GLMhVg4nXyV3Pym04_IKcblgc,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -154,7 +154,7 @@ returnn/sprint/extern_interface.py,sha256=l-v1X-Yg0UpTFe7Y3c4FwWOqpSNuv9Oy5EzqlK
154
154
  returnn/sprint/interface.py,sha256=_IGNQlOFcJcwsSeVkKcM-y8g2NDJv07jFhii47KfWtg,36490
155
155
  returnn/tensor/README.md,sha256=X6BqcRLrPLPnwF9yR69uqIFrMnNluj9pBkOPHwNgzuo,501
156
156
  returnn/tensor/__init__.py,sha256=on6j5PEOQpck50UcsR4nJzJSDmoVy34z1Oq4efv6Ax0,154
157
- returnn/tensor/_dim_extra.py,sha256=pMrzlD8LqlhV9VdBuYSzl38VC3O71HJ1rlJaH8JFxUo,122321
157
+ returnn/tensor/_dim_extra.py,sha256=U7H690nc9WCL_fKX11AZLbTa1FhvJnDaA-vs3HM8_fI,122358
158
158
  returnn/tensor/_tensor_extra.py,sha256=DYJ6Dv3AdHcqFeiS_8fFsJG8ewu0dvFiXYT_TG1U1sI,164873
159
159
  returnn/tensor/_tensor_mixin_base.py,sha256=H5z86I0NejxrSgMH1c5oXQzBqS6L9HpvP4y7oegBaSc,643
160
160
  returnn/tensor/_tensor_op_overloads.py,sha256=kVVcnYtcZdW7Vjj78V1Im_yVX2M2r6dUTgeiAQZ37X0,5449
@@ -193,7 +193,7 @@ returnn/tf/frontend_low_level/__init__.py,sha256=34469k3KzMUIGowxReOZnbf6WdTjxY7
193
193
  returnn/tf/frontend_low_level/_backend.py,sha256=JwwRRIGnElqBC4bTImdB7w3U1u_SJESeZHYLmq86wog,24479
194
194
  returnn/tf/layers/__init__.py,sha256=Ngu-X84nWFgz7ndDu88DqoZ-5lUMMTQWH4g7N8pSoCg,72
195
195
  returnn/tf/layers/base.py,sha256=KcADpZUxqLkoFpQPMe_l9thRC7rpyBJIZCHITmnOd7M,153169
196
- returnn/tf/layers/basic.py,sha256=IVQ_6PkM-uuBN_vVg-VeGM74bb1pc6TjJhKf92pPS1I,610870
196
+ returnn/tf/layers/basic.py,sha256=la0EwaHVzAbL6JOXs6QXnYQ74F3R16piYpT55VwVFT4,611063
197
197
  returnn/tf/layers/rec.py,sha256=K9vvyDJeDApYQDKabz7PaOTGHeSTloInkecxKTbqeTU,548357
198
198
  returnn/tf/layers/segmental_model.py,sha256=wUyDZGr-eTVIIQWcsHLML0wtOxuWn_NFKOIrUKQcvoI,21515
199
199
  returnn/tf/layers/signal_processing.py,sha256=vRlkN7k7otk9_Qdv0qr_l6V0VT5Q6dO2MxwZWb2HH2M,52693
@@ -211,12 +211,12 @@ returnn/torch/engine.py,sha256=sU9A96icaj65uaEkX4i4aUK3IrB2S19_Fb9_sueB_JE,77426
211
211
  returnn/torch/updater.py,sha256=GqtBvZpElPVMm0lq84JPl4NVLFFETZAzAbR0rTomSao,28249
212
212
  returnn/torch/data/__init__.py,sha256=6cLNEi8KoGI12PF6akN7mI_mtjlx-0hcQAfMYoExwik,132
213
213
  returnn/torch/data/extern_data.py,sha256=_uT_9_gd5HIh1IoRsrebVG-nufSnb7fgC5jyU05GxJg,7580
214
- returnn/torch/data/pipeline.py,sha256=C0CAG_jk1oZwrPlW9WdRTxV9OvPztbqKjwKHnf3lhok,27886
214
+ returnn/torch/data/pipeline.py,sha256=mA6R1QU9vvRmfaUBvdqI9jQeIB3O-01ODcpmXs1SZ-w,29458
215
215
  returnn/torch/data/queued_data_iter.py,sha256=PoOsGHdHVZjTmcyfq_ZOw--P6hyfTdmAWIRGq_Z_nLM,888
216
216
  returnn/torch/data/returnn_dataset_wrapper.py,sha256=2CaDapzrlqahANuq-nyVAtv5ENHuM8A7okORwYJDisg,8006
217
217
  returnn/torch/data/tensor_utils.py,sha256=-Teqi--LLbt6q_5mDRdoHZHmPgSdC83W706ukif_YiU,1284
218
218
  returnn/torch/frontend/__init__.py,sha256=AA48HZnC17ASuKA0EWy8loZ-Bib_yUtqF4T1wYvjst4,62
219
- returnn/torch/frontend/_backend.py,sha256=TqyDWNP4XCvJNNGn8jyxaT8BOEjVE24QCUR3qsTIS3A,101242
219
+ returnn/torch/frontend/_backend.py,sha256=rFCoCnzZoBtHPg7mWpO3yJOJMVesuWuA3_6GGSKMc5k,101452
220
220
  returnn/torch/frontend/_rand.py,sha256=1JgIkV2XmpgJD86zXZ-NCAe-QuoP2swr6NaS1oz3Qa8,1830
221
221
  returnn/torch/frontend/bridge.py,sha256=Z2_UW8AagezC7zsXDc5PKcd8G9WwisV7j9SWGHU0m4U,7840
222
222
  returnn/torch/frontend/raw_ops.py,sha256=lF0h-KtYYsdaaqQADylVZp9qzPskOOXA4MfmYDyx5IU,296
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250228.101938.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250228.101938.dist-info/METADATA,sha256=I8nJH2i19lJSp03bggFS1YlTbOT-yFLg8yanKsDGZEk,5215
258
- returnn-1.20250228.101938.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
- returnn-1.20250228.101938.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250228.101938.dist-info/RECORD,,
256
+ returnn-1.20250304.10039.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250304.10039.dist-info/METADATA,sha256=et7Z9NstTVvnWjiIMXhquw3eiMnMxYMfnEEbVc755xQ,5214
258
+ returnn-1.20250304.10039.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
+ returnn-1.20250304.10039.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250304.10039.dist-info/RECORD,,