PyPI - returnn - Versions diffs - 1.20250305.150759__tar.gz → 1.20250312.115110__tar.gz - Mend

returnn 1.20250305.150759tar.gz → 1.20250312.115110tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (476) hide show

{returnn-1.20250305.150759/returnn.egg-info → returnn-1.20250312.115110}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250305.150759
+Version: 1.20250312.115110
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20250312.115110/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20250312.115110'
2	+ long_version = '1.20250312.115110+git.49d5548'

{returnn-1.20250305.150759 → returnn-1.20250312.115110}/returnn/frontend/encoder/conformer.py RENAMED Viewed

@@ -294,7 +294,7 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
     def __init__(
         self,
         in_dim: Dim,
-        out_dim: Dim = Dim(512, name="conformer-enc-default-out-dim"),
+        out_dim: Union[Dim, int] = Dim(512, name="conformer-enc-default-out-dim"),
         *,
         num_layers: int,
         input_layer: Optional[Union[ConformerConvSubsample, ISeqDownsamplingEncoder, rf.Module, Any]],
@@ -333,6 +333,11 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
         """
         super().__init__()
+        assert isinstance(in_dim, Dim)
+        if isinstance(out_dim, int):
+            out_dim = Dim(out_dim, name="conformer-enc-out-dim")
+        assert isinstance(out_dim, Dim)
         self.in_dim = in_dim
         self.out_dim = out_dim
         self.dropout = dropout

{returnn-1.20250305.150759 → returnn-1.20250312.115110/returnn.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250305.150759
+Version: 1.20250312.115110
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250305.150759 → returnn-1.20250312.115110}/tests/rf_utils.py RENAMED Viewed

@@ -57,7 +57,7 @@ def run_model(
     dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
     test_tensorflow: bool = True,
     allow_inf_nan_in_output: bool = False,
-    test_single_batch_entry: bool = False,  # can later enable this globally
+    test_single_batch_entry: bool = True,
 ) -> TensorDict:
     """run"""
     print(f"* run_model with dyn_dim_max_sizes={dyn_dim_max_sizes!r}")

{returnn-1.20250305.150759 → returnn-1.20250312.115110}/tests/test_rf_array.py RENAMED Viewed

@@ -364,7 +364,8 @@ def test_reshape():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
+    # Note: The tested op here is a bit meaningless. It also is not consinstent for different batch sizes...
+    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, test_single_batch_entry=False)
 def test_expand_dim():
@@ -791,7 +792,7 @@ def test_reverse_sequence_no_dyn():
         out = rf.reverse_sequence(extern_data["data"], axis=time_dim, handle_dynamic_dims=False)
         out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))
-    run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step)
+    run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step, test_single_batch_entry=False)
 def test_where():
@@ -877,7 +878,7 @@ def test_copy_masked():
         x, _ = rf.pool1d(x, mode="avg", pool_size=3, strides=1, padding="same", in_spatial_dim=time_dim)
         x.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))
-    run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step)
+    run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step, test_single_batch_entry=False)
 def test_cast_sparse():

{returnn-1.20250305.150759 → returnn-1.20250312.115110}/tests/test_rf_cond.py RENAMED Viewed

@@ -38,7 +38,7 @@ def test_cond():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=(batch_dim, time_dim, out_dim))
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
+    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, test_single_batch_entry=False)
 def test_cond_via_time_even():
@@ -69,8 +69,20 @@ def test_cond_via_time_even():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=(batch_dim, time_dim, out_dim))
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
+    run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 5},
+        test_single_batch_entry=False,
+    )
+    run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 6},
+        test_single_batch_entry=False,
+    )
 def test_cond_shared_params():
@@ -100,8 +112,20 @@ def test_cond_shared_params():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=(batch_dim, time_dim, out_dim))
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
+    run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 5},
+        test_single_batch_entry=False,
+    )
+    run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 6},
+        test_single_batch_entry=False,
+    )
 def test_cond_twice_shared_params():
@@ -140,8 +164,20 @@ def test_cond_twice_shared_params():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=(batch_dim, time_dim, out_dim))
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
+    run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 5},
+        test_single_batch_entry=False,
+    )
+    run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 6},
+        test_single_batch_entry=False,
+    )
 def test_cond_param_assign():
@@ -173,8 +209,20 @@ def test_cond_param_assign():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=())
-    out1 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
-    out2 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
+    out1 = run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 5},
+        test_single_batch_entry=False,
+    )
+    out2 = run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 6},
+        test_single_batch_entry=False,
+    )
     assert out1["output"].raw_tensor == 2
     assert out2["output"].raw_tensor == 5
@@ -208,8 +256,20 @@ def test_cond_param_assign2():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=())
-    out1 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
-    out2 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
+    out1 = run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 5},
+        test_single_batch_entry=False,
+    )
+    out2 = run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 6},
+        test_single_batch_entry=False,
+    )
     assert out1["output"].raw_tensor == 9
     assert out2["output"].raw_tensor == 5
@@ -246,8 +306,20 @@ def test_cond_param_assign3():
         out.mark_as_default_output(shape=())
         param.mark_as_output(shape=(), name="param")
-    out1 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 5})
-    out2 = run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, dyn_dim_max_sizes={time_dim: 6})
+    out1 = run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 5},
+        test_single_batch_entry=False,
+    )
+    out2 = run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        dyn_dim_max_sizes={time_dim: 6},
+        test_single_batch_entry=False,
+    )
     assert out1["output"].raw_tensor == 6 and out1["param"].raw_tensor == 2
     assert out2["output"].raw_tensor == 42 and out2["param"].raw_tensor == 5

{returnn-1.20250305.150759 → returnn-1.20250312.115110}/tests/test_rf_conv.py RENAMED Viewed

@@ -341,7 +341,7 @@ def test_maxpool1d_stride_border_cond():
         # Note: Currently not the single batch test because there is another problem with RF PT pool,
         # which does not correctly handle this case. We get:
         #   RuntimeError: max_pool1d() Invalid computed output size: -1
-        # test_single_batch_entry=True,
+        test_single_batch_entry=False,
     )
     out = out["output"]
     (out_spatial_dim,) = out.get_dyn_size_tags()

{returnn-1.20250305.150759 → returnn-1.20250312.115110}/tests/test_rf_loop.py RENAMED Viewed

@@ -66,20 +66,28 @@ def test_while_loop():
     class _Net(rf.Module):
         def __call__(self, x: Tensor) -> Tensor:
-            def _cond(s: Tuple[Tensor, Tensor]):
-                t, s_ = s
+            def _cond(s: Tuple[Tensor, Tensor, Tensor]) -> Tensor:
+                t, ended, s_ = s
                 if t.raw_tensor.__class__.__module__.startswith("torch"):
-                    print("**", t.raw_tensor, rf.reduce_sum(s_, axis=s_.dims).raw_tensor)
-                return rf.logical_and(rf.reduce_sum(s_, axis=s_.dims) < 50, t < time_dim.get_dim_value_tensor())
+                    print("**", t.raw_tensor, ended.raw_tensor, rf.reduce_sum(s_, axis=in_dim).raw_tensor)
+                return rf.logical_not(rf.reduce_all(ended, axis=[batch_dim]))
             def _body(s):
-                t, s_ = s
-                return t + 1, s_ + rf.abs(rf.gather(x, indices=t, axis=time_dim))
-            _, final_s = rf.while_loop(
+                t, ended, s_ = s
+                cont = rf.logical_and(rf.reduce_sum(s_, axis=in_dim) < 50, t < time_dim.get_size_tensor())
+                ended = rf.logical_or(ended, rf.logical_not(cont))
+                s__ = s_ + rf.abs(rf.gather(x, indices=t, axis=time_dim, clip_to_valid=True))
+                s__ = rf.where(ended, s_, s__)
+                return t + 1, ended, s__
+            _, _, final_s = rf.while_loop(
                 _cond,
                 _body,
-                initial=(rf.zeros((), dtype=rf.get_default_array_index_dtype()), rf.zeros((batch_dim, in_dim))),
+                initial=(
+                    rf.zeros((), dtype=rf.get_default_array_index_dtype()),  # t
+                    rf.zeros((batch_dim,), dtype="bool"),  # ended
+                    rf.zeros((batch_dim, in_dim)),  # s
+                ),
             )
             return final_s
@@ -209,4 +217,7 @@ def test_scan_changing_dim():
         out, beam_dim = model(extern_data["data"])
         out.mark_as_default_output(shape=(batch_dim, beam_dim, in_dim))
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step, test_tensorflow=False)
+    # TODO the way this is implemented, accessing y[-1], is not consistent w.r.t. different batch sizes...
+    run_model(
+        extern_data, lambda *, epoch, step: _Net(), _forward_step, test_tensorflow=False, test_single_batch_entry=False
+    )

{returnn-1.20250305.150759 → returnn-1.20250312.115110}/tests/test_rf_normalization.py RENAMED Viewed

@@ -36,6 +36,8 @@ def test_batch_norm():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))
+    # Note: no test_single_batch_entry=False needed here because we currently don't check the running stats,
+    # and the output currently uses the initial running stats, i.e. should be the same for all batches.
     run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
@@ -62,4 +64,11 @@ def test_batch_norm_masking():
         out = model(extern_data["data"])
         out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))
-    run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
+    run_model(
+        extern_data,
+        lambda *, epoch, step: _Net(),
+        _forward_step,
+        # BatchNorm by definition uses the batch dim.
+        # Needed here because track_running_stats=False and thus use_current_batch_stats=True.
+        test_single_batch_entry=False,
+    )

{returnn-1.20250305.150759 → returnn-1.20250312.115110}/tests/test_rf_rec.py RENAMED Viewed

@@ -16,6 +16,8 @@ def test_lstm():
     extern_data = TensorDict(
         {
             "data": Tensor("data", [batch_dim, time_dim, in_dim], dtype="float32"),
+            "state_h": Tensor("state_h", [batch_dim, out_dim], dtype="float32"),
+            "state_c": Tensor("state_c", [batch_dim, out_dim], dtype="float32"),
             "classes": Tensor("classes", [batch_dim, time_dim], dtype="int32", sparse_dim=out_dim),
         }
     )
@@ -32,10 +34,7 @@ def test_lstm():
     # noinspection PyShadowingNames
     def _forward_step(*, model: _Net, extern_data: TensorDict):
-        state = rf.LstmState(
-            h=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32"),
-            c=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32"),
-        )
+        state = rf.LstmState(h=extern_data["state_h"], c=extern_data["state_c"])
         out, new_state = model(extern_data["data"], state=state, spatial_dim=time_dim)
         out.mark_as_output("out", shape=(batch_dim, time_dim, out_dim))
         new_state.h.mark_as_output("h", shape=(batch_dim, out_dim))
@@ -49,6 +48,8 @@ def test_lstm_single_step():
     extern_data = TensorDict(
         {
             "data": Tensor("data", [batch_dim, in_dim], dtype="float32"),
+            "state_h": Tensor("state_h", [batch_dim, out_dim], dtype="float32"),
+            "state_c": Tensor("state_c", [batch_dim, out_dim], dtype="float32"),
         }
     )
@@ -64,10 +65,7 @@ def test_lstm_single_step():
     # noinspection PyShadowingNames
     def _forward_step(*, model: _Net, extern_data: TensorDict):
-        state = rf.LstmState(
-            h=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32"),
-            c=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32"),
-        )
+        state = rf.LstmState(h=extern_data["state_h"], c=extern_data["state_c"])
         out, new_state = model(extern_data["data"], state=state, spatial_dim=single_step_dim)
         out.mark_as_output("out", shape=(batch_dim, out_dim))
         new_state.h.mark_as_output("h", shape=(batch_dim, out_dim))
@@ -82,6 +80,8 @@ def test_zoneout_lstm():
     extern_data = TensorDict(
         {
             "data": Tensor("data", [batch_dim, time_dim, in_dim], dtype="float32"),
+            "state_h": Tensor("state_h", [batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
+            "state_c": Tensor("state_c", [batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
             "classes": Tensor("classes", [batch_dim, time_dim], dtype="int32", sparse_dim=out_dim),
         }
     )
@@ -103,10 +103,7 @@ def test_zoneout_lstm():
     # noinspection PyShadowingNames
     def _forward_step(*, model: _Net, extern_data: TensorDict):
-        state = rf.LstmState(
-            h=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
-            c=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
-        )
+        state = rf.LstmState(h=extern_data["state_h"], c=extern_data["state_c"])
         out, new_state = model(extern_data["data"], state=state, spatial_dim=time_dim)
         out.mark_as_output("out", shape=(batch_dim, time_dim, out_dim))
         new_state.h.mark_as_output("h", shape=(batch_dim, out_dim))
@@ -121,6 +118,8 @@ def test_zoneout_lstm_single_step():
     extern_data = TensorDict(
         {
             "data": Tensor("data", [batch_dim, in_dim], dtype="float32"),
+            "state_h": Tensor("state_h", [batch_dim, out_dim], dtype="float32"),
+            "state_c": Tensor("state_c", [batch_dim, out_dim], dtype="float32"),
         }
     )
@@ -141,10 +140,7 @@ def test_zoneout_lstm_single_step():
     # noinspection PyShadowingNames
     def _forward_step(*, model: _Net, extern_data: TensorDict):
-        state = rf.LstmState(
-            h=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
-            c=rf.random_normal(dims=[batch_dim, out_dim], dtype="float32", feature_dim=out_dim),
-        )
+        state = rf.LstmState(h=extern_data["state_h"], c=extern_data["state_c"])
         out, new_state = model(extern_data["data"], state=state, spatial_dim=single_step_dim)
         out.mark_as_output("out", shape=(batch_dim, out_dim))
         new_state.h.mark_as_output("h", shape=(batch_dim, out_dim))