PyPI - returnn - Versions diffs - 1.20250228.101938__py3-none-any.whl → 1.20250304.10039__py3-none-any.whl - Mend

returnn 1.20250228.101938py3-none-any.whl → 1.20250304.10039py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (11) hide show

returnn/PKG-INFO CHANGED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250228.101938
+Version: 1.20250304.10039
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn/_setup_info_generated.py CHANGED Viewed

@@ -1,2 +1,2 @@
-version = '1.20250228.101938'
-long_version = '1.20250228.101938+git.c053cfd'
+version = '1.20250304.010039'
+long_version = '1.20250304.010039+git.3e53d74'

returnn/tensor/_dim_extra.py CHANGED Viewed

@@ -1271,9 +1271,9 @@ class _DimMixin:
             if self.batch:
                 x_dim = x_dim.get_for_batch_ctx(self.batch, self.control_flow_ctx)
             x_dim.complete_dyn_size(template_only=template_only, _backend=backend)
-            if x_dim.dyn_size_ext is None and not x_dim.dimension:
+            if x_dim.dyn_size_ext is None and x_dim.dimension is None:
                 return
-            y = _bin_op(y, x_dim.dimension or x_dim.dyn_size_ext)
+            y = _bin_op(y, x_dim.dimension if x_dim.dimension is not None else x_dim.dyn_size_ext)
             if not template_only and y.raw_tensor is not None:
                 y_max_value = _bin_op(y_max_value, x_dim.get_dim_value_tensor())
         assert y is not None, f"op {op}?"

returnn/tf/layers/basic.py CHANGED Viewed

@@ -6587,6 +6587,7 @@ class ConvLayer(_ConcatInputLayer):
         input_split_feature_dim=None,
         input_add_feature_dim=False,
         use_time_mask=False,
+        mask_value: float = 0.0,
     ):
         """
         :param Data input_data:
@@ -6600,6 +6601,7 @@ class ConvLayer(_ConcatInputLayer):
         :param bool input_add_feature_dim: will add a dim at the end and use input-feature-dim == 1,
           and use the original input feature-dim as a spatial dim.
         :param bool use_time_mask:
+        :param mask_value: when ``use_time_mask`` is used, what value to use for the mask
         :return: (transformed input, num batch dims). all batch dims are at the front
         :rtype: (Data, int)
         """
@@ -6697,7 +6699,7 @@ class ConvLayer(_ConcatInputLayer):
                     continue
                 axis = input_data.get_axis_from_description(dim)
                 mask = input_data.get_sequence_mask_broadcast(axis=axis)
-                x = tf_util.where_bc(mask, x, 0.0)
+                x = tf_util.where_bc(mask, x, mask_value)
             input_data.placeholder = x
@@ -7061,6 +7063,7 @@ class PoolLayer(_ConcatInputLayer):
             in_dim=in_dim,
             in_spatial_dims=in_spatial_dims,
             use_time_mask=use_time_mask,
+            mask_value={"MAX": float("-inf"), "AVG": 0}[mode],
         )
         # We want to prepare the input data such that the batch-dim(s) is the very first,
         # the feature-dim is the very last ("NHWC" format) or right after batch-dim ("NCHW"),

returnn/torch/data/pipeline.py CHANGED Viewed

@@ -337,7 +337,13 @@ class BucketOrderingIterDataPipe(torch.utils.data.IterDataPipe):
     """
     def __init__(
-        self, dataset: torch.utils.data.IterableDataset, *, buckets: Sequence[Tuple[int, int]], length_key: str
+        self,
+        dataset: torch.utils.data.IterableDataset,
+        *,
+        buckets: Sequence[Tuple[int, int]],
+        length_key: str,
+        random_bucket_prob: float = 0.0,
+        seed: Optional[int] = None,
     ):
         """
         :param dataset: dataset to apply bucket batching to
@@ -345,9 +351,17 @@ class BucketOrderingIterDataPipe(torch.utils.data.IterDataPipe):
             Segments longer than the largest size limit configured in the buckets are dropped. To avoid dropping
             any segments make sure your largest bucket allows segments larger than your longest training segment.
         :param length_key: data key to take as length measure
+        :param random_bucket_prob: Probability of putting a segment not into the best-fitting bucket, but into
+            a randomly chosen still-fitting bucket.
+            This increases seq length variation within the buckets at the cost of slighly more padding.
+        :param seed: random seed
         """
         self._dataset = dataset
         self._length_key = length_key
+        assert random_bucket_prob >= 0.0
+        self._random_bucket_prob = random_bucket_prob
+        self._rng = numpy.random.RandomState()
+        self._seed = seed % (2**32) if seed is not None else None
         assert buckets, "empty bucket batching configuration"
         if not all(size > 0 and max_seqs > 0 for size, max_seqs in buckets):
@@ -367,6 +381,12 @@ class BucketOrderingIterDataPipe(torch.utils.data.IterDataPipe):
             if bucket_idx >= len(self._max_seq_lens):
                 # seg is too long, drop it
                 continue
+            if (
+                self._random_bucket_prob > 0.0
+                and bucket_idx < len(self._max_seq_lens) - 1
+                and self._rng.rand() < self._random_bucket_prob
+            ):
+                bucket_idx = self._rng.randint(bucket_idx, len(self._max_bucket_sizes))
             buckets[bucket_idx].append(data_dict)
             if len(buckets[bucket_idx]) >= self._max_bucket_sizes[bucket_idx]:
                 yield buckets[bucket_idx]
@@ -383,6 +403,21 @@ class BucketOrderingIterDataPipe(torch.utils.data.IterDataPipe):
     def __getitem__(self, index):
         raise Exception(f"{self.__class__.__name__}.__getitem__ is not supported")
+    def set_seed(self, seed: int) -> BucketOrderingIterDataPipe:
+        """
+        Sets the seed for the next invocation of ``__iter__``, for compatibility with
+        ``torch.utils.data.graph_settings.apply_random_seed``.
+        """
+        self._seed = seed % (2**32)  # seed must be within [0, 2**32) for seeding RandomState
+        return self
+    def reset(self):
+        """resets the internal state of the data pipe"""
+        if self._seed is None:
+            self._seed = int(2**31 + torch.empty((), dtype=torch.int32).random_().item())
+        self._rng.seed(self._seed)
+        self._seed = None
 def get_batching_iterable_dataset_from_config(
     *, dataset: torch.utils.data.IterableDataset, config: Config, train: bool
@@ -497,7 +532,7 @@ class ShufflingDataPipe(torch.utils.data.IterDataPipe):
         self._buffer_size = buffer_size
         self._monotonic_data_keys = monotonic_data_keys
         self._rng = numpy.random.RandomState()
-        self._seed = seed
+        self._seed = seed % (2**32) if seed is not None else None
     def __iter__(self):
         # The implementation is very similar to the PostprocessingDataset's combinator LaplaceOrdering.
@@ -550,7 +585,7 @@ class ShufflingDataPipe(torch.utils.data.IterDataPipe):
         self._buffer.clear()
         self._next_buffer.clear()
         if self._seed is None:
-            self._seed = int(torch.empty((), dtype=torch.int32).random_().item())
+            self._seed = int(2**31 + torch.empty((), dtype=torch.int32).random_().item())
         self._rng.seed(self._seed)
         self._seed = None

returnn/torch/frontend/_backend.py CHANGED Viewed

@@ -1925,15 +1925,19 @@ class TorchBackend(Backend[torch.Tensor]):
                     stride_ = strides[i] if isinstance(strides, (list, tuple)) else strides
                 else:
                     stride_ = 1
-                # What is the logic here? You might be aware, in case without striding,
+                # What is the logic here? Also see https://github.com/rwth-i6/returnn/issues/1693.
+                # You might be aware, in case without striding,
                 # we simply have pad_left = (s.dimension - 1) // 2,
-                # or the total amount of padding is s.dimension - 1.
+                # or the total amount of padding is s.dimension - 1
+                # (s.dimension is the filter size).
                 # So we add the same amount of padding on both sides (or one less on the left side if odd).
                 # The output seq length in case of "valid" padding is ⌈(in_len - s.dimension + 1) / stride⌉.
                 # The output seq length in case of "same" padding with no striding (stride = 1)
-                # is simply the same as the input length.
+                # is simply the same as the input length (that's why it's called "same").
                 # What is the output seq length in case of "same" padding with striding?
-                # It should be ⌈in_len / stride⌉.
+                # It should be out_len = ⌈in_len / stride⌉ (it should be independent of s.dimension).
+                # We can rewrite out_len as:
+                # out_len = ⌊(in_len + stride - 1) / stride⌋ = (in_len + stride - 1 - (in_len - 1) % stride) / stride
                 # So, then we need to add a total amount of padding of s.dimension - 1.
                 # However, doing it the same way as without striding is incorrect.
                 # Why? Because then the first window might have more padding than the final window.
@@ -1941,8 +1945,6 @@ class TorchBackend(Backend[torch.Tensor]):
                 # or maybe one less on the first window if odd.
                 # How many frames do the windows cover?
                 # in_len_covered = out_len * stride + (s.dimension - stride)
-                # We can rewrite out_len as:
-                # out_len = ⌊(in_len + stride - 1) / stride⌋ = (in_len + stride - 1 - (in_len - 1) % stride) / stride
                 # So we have:
                 # in_len_covered = (in_len + stride - 1 - (in_len - 1) % stride) + (s.dimension - stride)
                 #                = in_len + s.dimension - 1 - (in_len - 1) % stride

{returnn-1.20250228.101938.dist-info → returnn-1.20250304.10039.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250228.101938
+Version: 1.20250304.10039
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250228.101938.dist-info → returnn-1.20250304.10039.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-returnn/PKG-INFO,sha256=I8nJH2i19lJSp03bggFS1YlTbOT-yFLg8yanKsDGZEk,5215
+returnn/PKG-INFO,sha256=et7Z9NstTVvnWjiIMXhquw3eiMnMxYMfnEEbVc755xQ,5214
 returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
 returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
 returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
 returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
-returnn/_setup_info_generated.py,sha256=J7wtu2Asd11qxOS3X2dv_AblIP3xvjshEattiHywgzQ,77
+returnn/_setup_info_generated.py,sha256=d4hd9PkngTUKLJT4Q6GLMhVg4nXyV3Pym04_IKcblgc,77
 returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
 returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
 returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -154,7 +154,7 @@ returnn/sprint/extern_interface.py,sha256=l-v1X-Yg0UpTFe7Y3c4FwWOqpSNuv9Oy5EzqlK
 returnn/sprint/interface.py,sha256=_IGNQlOFcJcwsSeVkKcM-y8g2NDJv07jFhii47KfWtg,36490
 returnn/tensor/README.md,sha256=X6BqcRLrPLPnwF9yR69uqIFrMnNluj9pBkOPHwNgzuo,501
 returnn/tensor/__init__.py,sha256=on6j5PEOQpck50UcsR4nJzJSDmoVy34z1Oq4efv6Ax0,154
-returnn/tensor/_dim_extra.py,sha256=pMrzlD8LqlhV9VdBuYSzl38VC3O71HJ1rlJaH8JFxUo,122321
+returnn/tensor/_dim_extra.py,sha256=U7H690nc9WCL_fKX11AZLbTa1FhvJnDaA-vs3HM8_fI,122358
 returnn/tensor/_tensor_extra.py,sha256=DYJ6Dv3AdHcqFeiS_8fFsJG8ewu0dvFiXYT_TG1U1sI,164873
 returnn/tensor/_tensor_mixin_base.py,sha256=H5z86I0NejxrSgMH1c5oXQzBqS6L9HpvP4y7oegBaSc,643
 returnn/tensor/_tensor_op_overloads.py,sha256=kVVcnYtcZdW7Vjj78V1Im_yVX2M2r6dUTgeiAQZ37X0,5449
@@ -193,7 +193,7 @@ returnn/tf/frontend_low_level/__init__.py,sha256=34469k3KzMUIGowxReOZnbf6WdTjxY7
 returnn/tf/frontend_low_level/_backend.py,sha256=JwwRRIGnElqBC4bTImdB7w3U1u_SJESeZHYLmq86wog,24479
 returnn/tf/layers/__init__.py,sha256=Ngu-X84nWFgz7ndDu88DqoZ-5lUMMTQWH4g7N8pSoCg,72
 returnn/tf/layers/base.py,sha256=KcADpZUxqLkoFpQPMe_l9thRC7rpyBJIZCHITmnOd7M,153169
-returnn/tf/layers/basic.py,sha256=IVQ_6PkM-uuBN_vVg-VeGM74bb1pc6TjJhKf92pPS1I,610870
+returnn/tf/layers/basic.py,sha256=la0EwaHVzAbL6JOXs6QXnYQ74F3R16piYpT55VwVFT4,611063
 returnn/tf/layers/rec.py,sha256=K9vvyDJeDApYQDKabz7PaOTGHeSTloInkecxKTbqeTU,548357
 returnn/tf/layers/segmental_model.py,sha256=wUyDZGr-eTVIIQWcsHLML0wtOxuWn_NFKOIrUKQcvoI,21515
 returnn/tf/layers/signal_processing.py,sha256=vRlkN7k7otk9_Qdv0qr_l6V0VT5Q6dO2MxwZWb2HH2M,52693
@@ -211,12 +211,12 @@ returnn/torch/engine.py,sha256=sU9A96icaj65uaEkX4i4aUK3IrB2S19_Fb9_sueB_JE,77426
 returnn/torch/updater.py,sha256=GqtBvZpElPVMm0lq84JPl4NVLFFETZAzAbR0rTomSao,28249
 returnn/torch/data/__init__.py,sha256=6cLNEi8KoGI12PF6akN7mI_mtjlx-0hcQAfMYoExwik,132
 returnn/torch/data/extern_data.py,sha256=_uT_9_gd5HIh1IoRsrebVG-nufSnb7fgC5jyU05GxJg,7580
-returnn/torch/data/pipeline.py,sha256=C0CAG_jk1oZwrPlW9WdRTxV9OvPztbqKjwKHnf3lhok,27886
+returnn/torch/data/pipeline.py,sha256=mA6R1QU9vvRmfaUBvdqI9jQeIB3O-01ODcpmXs1SZ-w,29458
 returnn/torch/data/queued_data_iter.py,sha256=PoOsGHdHVZjTmcyfq_ZOw--P6hyfTdmAWIRGq_Z_nLM,888
 returnn/torch/data/returnn_dataset_wrapper.py,sha256=2CaDapzrlqahANuq-nyVAtv5ENHuM8A7okORwYJDisg,8006
 returnn/torch/data/tensor_utils.py,sha256=-Teqi--LLbt6q_5mDRdoHZHmPgSdC83W706ukif_YiU,1284
 returnn/torch/frontend/__init__.py,sha256=AA48HZnC17ASuKA0EWy8loZ-Bib_yUtqF4T1wYvjst4,62
-returnn/torch/frontend/_backend.py,sha256=TqyDWNP4XCvJNNGn8jyxaT8BOEjVE24QCUR3qsTIS3A,101242
+returnn/torch/frontend/_backend.py,sha256=rFCoCnzZoBtHPg7mWpO3yJOJMVesuWuA3_6GGSKMc5k,101452
 returnn/torch/frontend/_rand.py,sha256=1JgIkV2XmpgJD86zXZ-NCAe-QuoP2swr6NaS1oz3Qa8,1830
 returnn/torch/frontend/bridge.py,sha256=Z2_UW8AagezC7zsXDc5PKcd8G9WwisV7j9SWGHU0m4U,7840
 returnn/torch/frontend/raw_ops.py,sha256=lF0h-KtYYsdaaqQADylVZp9qzPskOOXA4MfmYDyx5IU,296
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
 returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
 returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
 returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
-returnn-1.20250228.101938.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
-returnn-1.20250228.101938.dist-info/METADATA,sha256=I8nJH2i19lJSp03bggFS1YlTbOT-yFLg8yanKsDGZEk,5215
-returnn-1.20250228.101938.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-returnn-1.20250228.101938.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
-returnn-1.20250228.101938.dist-info/RECORD,,
+returnn-1.20250304.10039.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
+returnn-1.20250304.10039.dist-info/METADATA,sha256=et7Z9NstTVvnWjiIMXhquw3eiMnMxYMfnEEbVc755xQ,5214
+returnn-1.20250304.10039.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+returnn-1.20250304.10039.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
+returnn-1.20250304.10039.dist-info/RECORD,,

{returnn-1.20250228.101938.dist-info → returnn-1.20250304.10039.dist-info}/LICENSE RENAMED Viewed

File without changes

{returnn-1.20250228.101938.dist-info → returnn-1.20250304.10039.dist-info}/WHEEL RENAMED Viewed

File without changes

{returnn-1.20250228.101938.dist-info → returnn-1.20250304.10039.dist-info}/top_level.txt RENAMED Viewed

File without changes

returnn 1.20250228.101938__py3-none-any.whl → 1.20250304.10039__py3-none-any.whl

Potentially problematic release.

returnn 1.20250228.101938py3-none-any.whl → 1.20250304.10039py3-none-any.whl