PyPI - returnn - Versions diffs - 1.20250225.201207__py3-none-any.whl → 1.20250226.115259__py3-none-any.whl - Mend

returnn 1.20250225.201207py3-none-any.whl → 1.20250226.115259py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (13) hide show

returnn/PKG-INFO CHANGED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250225.201207
+Version: 1.20250226.115259
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn/_setup_info_generated.py CHANGED Viewed

@@ -1,2 +1,2 @@
-version = '1.20250225.201207'
-long_version = '1.20250225.201207+git.c7cfe6c'
+version = '1.20250226.115259'
+long_version = '1.20250226.115259+git.0d32534'

returnn/frontend/_backend.py CHANGED Viewed

@@ -391,17 +391,17 @@ class Backend(Generic[T]):
         source: Tensor,
         *,
         dims: Sequence[Dim],
-        out_dim: Optional[Dim] = None,
-    ) -> Tuple[Tensor, Dim]:
+        out_dim: Dim,
+    ) -> Tensor:
         """
         Merges a list of axes into a single one. (Flatten the dims.)
         E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
         Or input is (batch, time, height, dim) and axes=(height,dim), then we get (batch, time, height*dim).
         :param source:
-        :param dims:
-        :param out_dim:
-        :return: tensor, out_dim
+        :param dims: list of dims to merge. len(dims) >= 2
+        :param out_dim: resulting merged dim
+        :return: tensor
         """
         raise NotImplementedError

returnn/frontend/array_.py CHANGED Viewed

@@ -182,8 +182,29 @@ def merge_dims(
         else:
             out_dim = Dim(1, name="ext")
         return rf.expand_dim(source, out_dim), out_dim
+    if len(dims) == 1:
+        if out_dim is None or out_dim == dims[0]:
+            return source, dims[0]
+        return rf.replace_dim(source, in_dim=dims[0], out_dim=out_dim)
+    if out_dim is None:
+        out_dim = dims[0]
+        reset_dyn_size = False
+        for d in dims[1:]:
+            reset_dyn_size |= d.need_masking() and out_dim.capacity != 1
+            out_dim = out_dim * d
+        if reset_dyn_size:
+            # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
+            # This would then potentially discard some of the data in the tensor in subsequent operations,
+            # when masking is applied.
+            # Thus, discard the dynamic sizes, and just treat it as a flat dim with scalar dynamic size.
+            # https://github.com/rwth-i6/returnn/issues/1694
+            out_dim_size = dims[0].get_dim_value_tensor()
+            for d in dims[1:]:
+                out_dim_size *= d.get_dim_value_tensor()
+            assert isinstance(out_dim_size, Tensor) and out_dim_size.dims == ()  # scalar
+            out_dim.dyn_size_ext = out_dim_size
     # noinspection PyProtectedMember
-    return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim)
+    return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim), out_dim
 def split_dims(

returnn/tensor/_dim_extra.py CHANGED Viewed

@@ -390,15 +390,15 @@ class _DimMixin:
             if dim_extra:
                 dim_extra.cache_dyn_size_ext_dev.clear()
                 dim_extra.cache_seq_mask.clear()
+                if dim.dyn_size_ext is not None or dim.dimension is None:
+                    dim_extra.cache_dim_math.clear()
+                else:
+                    dim_extra.cache_dim_math.clear_dynamic()
             if only_self:
                 return
             if dim_extra:
                 # Any dims via dim math could also contain raw tensors,
                 # so iterate through them.
-                if dim.dyn_size_ext is not None or dim.dimension is None:
-                    dim_extra.cache_dim_math.clear()
-                else:
-                    dim_extra.cache_dim_math.clear_dynamic()
                 queue += dim_extra.cache_dim_math.values()
                 if dim_extra.same_as:
                     queue.append(dim_extra.same_as)

returnn/tf/frontend_layers/_backend.py CHANGED Viewed

@@ -241,8 +241,8 @@ class ReturnnLayersBackend(Backend[Layer]):
         source: Tensor,
         *,
         dims: Sequence[Dim],
-        out_dim: Optional[Dim] = None,
-    ) -> Tuple[Tensor, Dim]:
+        out_dim: Dim,
+    ) -> Tensor:
         """
         Merges a list of axes into a single one. (Flatten the dims.)
         E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
@@ -251,18 +251,14 @@ class ReturnnLayersBackend(Backend[Layer]):
         :param source:
         :param dims:
         :param out_dim:
-        :return: tensor, out_dim
+        :return: tensor
         """
         if not isinstance(source, Tensor):
             raise TypeError(f"merge_dims: unexpected type for source {source!r}, need tensor")
-        if out_dim is None:
-            out_dim = dims[0]
-            for d in dims[1:]:
-                out_dim = out_dim * d
         layer = rfl.make_layer(
             {"class": "merge_dims", "from": source, "axes": dims, "out_dim": out_dim}, name="merge_dims"
         )
-        return layer, out_dim
+        return layer
     @staticmethod
     def split_dims(

returnn/torch/frontend/_backend.py CHANGED Viewed

@@ -262,8 +262,8 @@ class TorchBackend(Backend[torch.Tensor]):
         source: Tensor,
         *,
         dims: Sequence[Dim],
-        out_dim: Optional[Dim] = None,
-    ) -> Tuple[Tensor, Dim]:
+        out_dim: Dim,
+    ) -> Tensor:
         """
         Merges a list of axes into a single one. (Flatten the dims.)
         E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
@@ -272,18 +272,12 @@ class TorchBackend(Backend[torch.Tensor]):
         :param source:
         :param dims:
         :param out_dim:
-        :return: tensor, out_dim
+        :return: tensor
         """
-        assert dims
-        if len(dims) == 1:
-            return source, dims[0]
+        assert len(dims) >= 2
         first_axis = min(source.dims.index(d) for d in dims)
         pre_dims = source.dims[:first_axis]
         post_dims = [d for d in source.dims if d not in dims and d not in pre_dims]
-        if out_dim is None:
-            out_dim = dims[0]
-            for d in dims[1:]:
-                out_dim = out_dim * d
         source = source.copy_transpose(tuple(pre_dims) + tuple(dims) + tuple(post_dims), allow_int=False)
         out = Tensor(
             "merge_dims",
@@ -295,7 +289,7 @@ class TorchBackend(Backend[torch.Tensor]):
         out.raw_tensor = torch.reshape(source.raw_tensor, out_shape)
         if source.feature_dim and source.feature_dim in dims:
             out.feature_dim = out_dim
-        return out, out_dim
+        return out
     @staticmethod
     def split_dims(

returnn/torch/util/debug_inf_nan.py CHANGED Viewed

@@ -39,6 +39,7 @@ from io import TextIOBase
 import traceback
 from types import FrameType
 import torch
+import tree
 # noinspection PyProtectedMember
 from torch.utils._python_dispatch import TorchDispatchMode
@@ -96,6 +97,7 @@ def debug_inf_nan(
 # For efficiency, and to be less spammy
 _TraceFuncNameBlacklist = {
+    "aten::empty.memory_format",
     "aten::zeros_like",
     "aten::ones_like",
     "aten::full",
@@ -113,6 +115,7 @@ _TraceFuncNameBlacklist = {
     "aten::split_with_sizes",
     "aten::slice.Tensor",
     "aten::select.int",
+    "aten::max_pool2d_with_indices",
 }
@@ -140,19 +143,20 @@ class _TraceOps(TorchDispatchMode):
         if self.report_every_op_call:
             print(f"--- op {func.name()}", file=self.file)
         out = func(*args, **kwargs)
-        if isinstance(out, torch.Tensor):
-            with no_python_dispatcher():
-                got_nan_inf_t = torch.stack([torch.isnan(out).any(), torch.isinf(out).any()]).cpu()
-                got_nan = got_nan_inf_t[0].item()
-                got_inf = got_nan_inf_t[1].item()
-                if got_nan or got_inf:
-                    s = "/".join([s_ for s_, b in [("nan", got_nan), ("inf", got_inf)] if b])
-                    print(f"--> {s} in {func}: {out}", file=self.file)
-                    traceback.print_list(
-                        _extract_stack_up_to(skip_top_num_frames=1, root_frame=self.root_frame), file=self.file
-                    )
-                    if self.stop_reporting_after_first_inf_nan:
-                        self.enabled = False
+        for out_ in tree.flatten(out):
+            if isinstance(out_, torch.Tensor):
+                with no_python_dispatcher():
+                    got_nan_inf_t = torch.stack([torch.isnan(out_).any(), torch.isinf(out_).any()]).cpu()
+                    got_nan = got_nan_inf_t[0].item()
+                    got_inf = got_nan_inf_t[1].item()
+                    if got_nan or got_inf:
+                        s = "/".join([s_ for s_, b in [("nan", got_nan), ("inf", got_inf)] if b])
+                        print(f"--> {s} in {func}: {out_}", file=self.file)
+                        traceback.print_list(
+                            _extract_stack_up_to(skip_top_num_frames=1, root_frame=self.root_frame), file=self.file
+                        )
+                        if self.stop_reporting_after_first_inf_nan:
+                            self.enabled = False
         return out

{returnn-1.20250225.201207.dist-info → returnn-1.20250226.115259.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250225.201207
+Version: 1.20250226.115259
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250225.201207.dist-info → returnn-1.20250226.115259.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-returnn/PKG-INFO,sha256=MlICTMbISeiq6sz_1NI8XnxGZLlQzE0eH9VfGGDjlKs,5215
+returnn/PKG-INFO,sha256=2Ws--V5aicc3WJ-I6OrqPbbrvVNTH3Cnno6L7yeIyKY,5215
 returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
 returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
 returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
 returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
-returnn/_setup_info_generated.py,sha256=DwJubgEQQUSxnLSgr9-UFixkYOeM2bKYKKxyIW_3L3w,77
+returnn/_setup_info_generated.py,sha256=3ur2a8rg2h6MJe2vAo7Tq4axfkV1GYJMcaQdnsmshb8,77
 returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
 returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
 returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -75,12 +75,12 @@ returnn/extern/graph_editor/subgraph.py,sha256=R3uIFqWgiL7L5S4YATm9o9a3wfEa_mSb4
 returnn/extern/graph_editor/transform.py,sha256=d9fEgu0JC342q0g9niVxRWMKzkQQA9mrrajBGcU1o_s,29349
 returnn/extern/graph_editor/util.py,sha256=QMrQeQZ7lJwsrNQub9tof0h3quEaoHiGJaZmogQ7jXE,18707
 returnn/frontend/__init__.py,sha256=2aS7nbxXniIrBp2DODl0xN0f3IJ_dX4Bi9ZlR7W5_DE,1472
-returnn/frontend/_backend.py,sha256=VWTe2ps8UK9BQpbnZRqNfbesQ6PGH5WHqkEa4ai8btw,50353
+returnn/frontend/_backend.py,sha256=TNkEdj9GKxJfSM1ZMQ_SdAQzn2TU7SQbG6JGdaWhUeI,50374
 returnn/frontend/_cache.py,sha256=JAhi7L-raQ3A-NC3JUYDtdRTwT3BGJJGGZxrZ8MfEWQ,8403
 returnn/frontend/_numpy_backend.py,sha256=2oCtG0YCWL_89v4cD_jDj8em1O_Fp-_YWl5EblGi_yo,7858
 returnn/frontend/_random_journal.py,sha256=_ktP_mjgx8vtQQGX_DofdhewJj0aPiczefTWeemPkmo,5457
 returnn/frontend/_utils.py,sha256=4A3MSRM0i86J77550uR_AjcBEPu6nymLUZ9Xd1V3Fkc,12073
-returnn/frontend/array_.py,sha256=CYk8lQinS2EDINBttl4UqSYP2BhqikeSjnbNy9Mzpx4,48013
+returnn/frontend/array_.py,sha256=M5vCeH0nlwJ-zrdjbZpsMLN6StOLn0iM7PnXvGLLE3g,49154
 returnn/frontend/attention.py,sha256=GKt-Xqnz8sIyXVrE0i4VCS7J2Wu7dmoH_BA0Cu8CrXQ,45769
 returnn/frontend/backend.py,sha256=iQ9w4xl8Ea7bgpb0VUaCKq50rV5Bl2E5J8Rhd-oqD_c,883
 returnn/frontend/build_from_dict.py,sha256=rfWa2rjjhIR_kIQED_nMrygrQBunS6unegzWTLVbC98,3017
@@ -154,7 +154,7 @@ returnn/sprint/extern_interface.py,sha256=l-v1X-Yg0UpTFe7Y3c4FwWOqpSNuv9Oy5EzqlK
 returnn/sprint/interface.py,sha256=_IGNQlOFcJcwsSeVkKcM-y8g2NDJv07jFhii47KfWtg,36490
 returnn/tensor/README.md,sha256=X6BqcRLrPLPnwF9yR69uqIFrMnNluj9pBkOPHwNgzuo,501
 returnn/tensor/__init__.py,sha256=on6j5PEOQpck50UcsR4nJzJSDmoVy34z1Oq4efv6Ax0,154
-returnn/tensor/_dim_extra.py,sha256=ywfU-vMewufkDYeM1UE3Gfee3NhAUkoSZxvC6L-lkH8,122551
+returnn/tensor/_dim_extra.py,sha256=kL_nnGNaRpKIQLlvCo6TJ35WynS_jIssNZusFmtOAE0,122551
 returnn/tensor/_tensor_extra.py,sha256=v8oacDyrNMlDTRF0XR0LcU04snr5I1D9_yidw1ZWKk4,164859
 returnn/tensor/_tensor_mixin_base.py,sha256=H5z86I0NejxrSgMH1c5oXQzBqS6L9HpvP4y7oegBaSc,643
 returnn/tensor/_tensor_op_overloads.py,sha256=kVVcnYtcZdW7Vjj78V1Im_yVX2M2r6dUTgeiAQZ37X0,5449
@@ -177,7 +177,7 @@ returnn/tf/sprint.py,sha256=Yqjh0-6sCWHpdDPQCzHKx7TwQCOjJyjfd0KHtnYdd-8,5471
 returnn/tf/updater.py,sha256=St4Z5iBjlkWaB6CiS-K1VNc_iLaan2e6-mVMTTPldzk,72034
 returnn/tf/frontend_layers/README.md,sha256=P4vVl_EK-4jT55m40mq-K4Nr9yFY0tJR5fmDzTHSDFE,1096
 returnn/tf/frontend_layers/__init__.py,sha256=MGUn7rv6fOefbtkX-5pq6fC1T6Y5h0oh1uOPSEcv1_I,506
-returnn/tf/frontend_layers/_backend.py,sha256=6bT_4fjfV0IRcFqcZ0kcWLx0eYZGRqAJDTEfWSRIFnA,47451
+returnn/tf/frontend_layers/_backend.py,sha256=U7rbRY9XgMkxxyWY2D8KG-KesSOEGLCxn-Gl6dgwmPc,47277
 returnn/tf/frontend_layers/_utils.py,sha256=ijByaDOqPDod5mZC9EoTkt8PHBEODXHsWbkwDOF9XW4,4205
 returnn/tf/frontend_layers/cond.py,sha256=yQ2h5W0sgMZndJdrWv2EE9k9yIcspQ1U0HwBSh3hOKE,14830
 returnn/tf/frontend_layers/config_entry_points.py,sha256=t01RWOiaZohzuqPXX-MLV0P5yCOfE0dz-9dZ77_pK4c,5751
@@ -216,7 +216,7 @@ returnn/torch/data/queued_data_iter.py,sha256=PoOsGHdHVZjTmcyfq_ZOw--P6hyfTdmAWI
 returnn/torch/data/returnn_dataset_wrapper.py,sha256=1Bw82-Ge_8m_DSDXZNqQ3zGDic2HQlp6jysELL0NVK0,7369
 returnn/torch/data/tensor_utils.py,sha256=-Teqi--LLbt6q_5mDRdoHZHmPgSdC83W706ukif_YiU,1284
 returnn/torch/frontend/__init__.py,sha256=AA48HZnC17ASuKA0EWy8loZ-Bib_yUtqF4T1wYvjst4,62
-returnn/torch/frontend/_backend.py,sha256=8rCnNRoiUf_Sqmb1u2Y7Mf89Hmzd0LkrroLoXVKn6ww,101468
+returnn/torch/frontend/_backend.py,sha256=TqyDWNP4XCvJNNGn8jyxaT8BOEjVE24QCUR3qsTIS3A,101242
 returnn/torch/frontend/_rand.py,sha256=1JgIkV2XmpgJD86zXZ-NCAe-QuoP2swr6NaS1oz3Qa8,1830
 returnn/torch/frontend/bridge.py,sha256=Z2_UW8AagezC7zsXDc5PKcd8G9WwisV7j9SWGHU0m4U,7840
 returnn/torch/frontend/raw_ops.py,sha256=lF0h-KtYYsdaaqQADylVZp9qzPskOOXA4MfmYDyx5IU,296
@@ -226,7 +226,7 @@ returnn/torch/optim/lion.py,sha256=jV_qfwyyO5HAgqW94caap-ALkVjU688RpRgkZyLNZ5Y,5
 returnn/torch/util/README.md,sha256=AW-6ueWhgcwDcm57md6sm227QXNkvLnlRLwaH7NlS-w,193
 returnn/torch/util/__init__.py,sha256=AOXYUjzPm0XrzFJCPAXo9Jj_FvqD1XH3FfKtho80Vl8,26
 returnn/torch/util/array_.py,sha256=ell3VZvn01SLtF9Pw2fvPzFNO-XDQ7tSB9VCrVSKmSA,2556
-returnn/torch/util/debug_inf_nan.py,sha256=v0IzLy4kRKBWChSV70O4x829QtEuXMwB9mBqAyE4O2o,6223
+returnn/torch/util/debug_inf_nan.py,sha256=pXAHwgyn1aimLjD-XUblY2syBRCK0J20ioWgpvWfHvg,6400
 returnn/torch/util/diagnose_gpu.py,sha256=PYMmSk7iQ-jC3RXKKNXlYx1Q744C0LXqz0SB6ympwQg,5844
 returnn/torch/util/exception_helper.py,sha256=4e7YEf9D42aAUEkM3uSjnOxpNEYgtyPSpNV0-1L6PSU,4319
 returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko87ppIvRKAbtpQ,27995
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
 returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
 returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
 returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
-returnn-1.20250225.201207.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
-returnn-1.20250225.201207.dist-info/METADATA,sha256=MlICTMbISeiq6sz_1NI8XnxGZLlQzE0eH9VfGGDjlKs,5215
-returnn-1.20250225.201207.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-returnn-1.20250225.201207.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
-returnn-1.20250225.201207.dist-info/RECORD,,
+returnn-1.20250226.115259.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
+returnn-1.20250226.115259.dist-info/METADATA,sha256=2Ws--V5aicc3WJ-I6OrqPbbrvVNTH3Cnno6L7yeIyKY,5215
+returnn-1.20250226.115259.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+returnn-1.20250226.115259.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
+returnn-1.20250226.115259.dist-info/RECORD,,

{returnn-1.20250225.201207.dist-info → returnn-1.20250226.115259.dist-info}/LICENSE RENAMED Viewed

File without changes

{returnn-1.20250225.201207.dist-info → returnn-1.20250226.115259.dist-info}/WHEEL RENAMED Viewed

File without changes

{returnn-1.20250225.201207.dist-info → returnn-1.20250226.115259.dist-info}/top_level.txt RENAMED Viewed

File without changes

returnn 1.20250225.201207__py3-none-any.whl → 1.20250226.115259__py3-none-any.whl

Potentially problematic release.

returnn 1.20250225.201207py3-none-any.whl → 1.20250226.115259py3-none-any.whl