PyPI - returnn - Versions diffs - 1.20250826.155029__tar.gz → 1.20250828.142552__tar.gz - Mend

returnn 1.20250826.155029tar.gz → 1.20250828.142552tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (476) hide show

{returnn-1.20250826.155029/returnn.egg-info → returnn-1.20250828.142552}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250826.155029
+Version: 1.20250828.142552
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20250828.142552/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20250828.142552'
2	+ long_version = '1.20250828.142552+git.f81cb9a'

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_cache.py RENAMED Viewed

@@ -6,7 +6,7 @@ One use case example is :func:`sinusoidal_positional_encoding` and :func:`relati
 """
 from __future__ import annotations
-from typing import Optional, Union, Any, Type, Callable, Tuple, Dict
+from typing import Optional, Union, Any, Type, Callable, Tuple, Dict, List
 from weakref import ref
 import tree
 from returnn.util.lru_cache import lru_cache
@@ -59,6 +59,8 @@ class Cache:
             if isinstance(key_item_orig, DimWrapper):
                 assert isinstance(key_item, DimWrapper)
                 dim_orig = key_item_orig.dim_ref()
+                if dim_orig is None:  # orig dim could be dead. but then it would not be used anyway
+                    continue
                 dim = key_item.dim_ref()
                 assert isinstance(dim_orig, Dim) and isinstance(dim, Dim)
                 dim_map[dim_orig] = dim
@@ -103,7 +105,7 @@ def _transform_key(
     key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Optional[Dict[Dim, DimWrapper]] = None
 ) -> Tuple[Union[Type[Backend], ref[rf.RunCtx], _KeyItemType], ...]:
     backend = _get_backend(key)
-    keys_flat = [backend]
+    keys_flat: List[Any] = [backend]
     if not backend.executing_eagerly():
         # See comment above: If graph-mode, the cached value becomes invalid
         # when the current run ctx goes out of scope.

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/array_.py RENAMED Viewed

@@ -188,22 +188,18 @@ def merge_dims(
             return source, dims[0]
         return rf.replace_dim(source, in_dim=dims[0], out_dim=out_dim)
     if out_dim is None:
-        out_dim = dims[0]
-        reset_dyn_size = False
-        for d in dims[1:]:
-            reset_dyn_size |= d.need_masking() and out_dim.capacity != 1
-            out_dim = out_dim * d
-        if reset_dyn_size:
+        from returnn.util.basic import prod
+        if any(d.need_masking() for d in dims[1:]):
             # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
             # This would then potentially discard some of the data in the tensor in subsequent operations,
             # when masking is applied.
             # Thus, discard the dynamic sizes, and just treat it as a flat dim with scalar dynamic size.
             # https://github.com/rwth-i6/returnn/issues/1694
-            out_dim_size = dims[0].get_dim_value_tensor()
-            for d in dims[1:]:
-                out_dim_size *= d.get_dim_value_tensor()
-            assert isinstance(out_dim_size, Tensor) and out_dim_size.dims == ()  # scalar
-            out_dim.dyn_size_ext = out_dim_size
+            # See also similar logic in :func:`concat`.
+            out_dim = Dim(prod(d.get_dim_value_tensor() for d in dims), name="merged")
+        else:
+            out_dim = prod(dims)
     # noinspection PyProtectedMember
     return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim), out_dim
@@ -427,13 +423,40 @@ def concat(
         dims = sources[0][0].dims_set - {sources[0][1]}
         for src, dim in sources:
             assert src.dims_set - {dim} == dims, f"concat {sources}, need allow_broadcast=True"
+    need_handle_dynamic_dims = False
+    for src, dim in sources[:-1]:
+        if dim.need_masking():
+            need_handle_dynamic_dims = True
+    if handle_dynamic_dims is None:
+        handle_dynamic_dims = need_handle_dynamic_dims
     if not out_dim:
-        out_dim = sum(d for _, d in sources)
-    if handle_dynamic_dims is None or handle_dynamic_dims:
-        for src, dim in sources[:-1]:
-            assert dim.is_static(), f"concat {sources}, dim {dim} is not static, not yet implemented..."
-    # noinspection PyProtectedMember
-    return sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim), out_dim
+        if handle_dynamic_dims or not need_handle_dynamic_dims:
+            out_dim = sum(d for _, d in sources)
+        else:  # not handle_dynamic_dims but need_handle_dynamic_dims
+            # There are dynamic dims, but we don't want to handle them.
+            # So, summing the dims would be incorrect.
+            # Just add the dim values.
+            out_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources if d.dimension is not None), name="concat")
+    if handle_dynamic_dims:
+        out_non_masked_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources))
+        # noinspection PyProtectedMember
+        out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_non_masked_dim)
+        masks = []
+        for _, dim in sources:
+            masks.append(
+                dim.get_mask(dim_order=(dim,) + dim.dyn_size_ext.dims, device=out.device)
+                if dim.need_masking()
+                else rf.constant(True, dims=[dim], device=out.device)
+            )
+        # noinspection PyProtectedMember
+        mask_concat = sources[0][0]._raw_backend.concat(
+            *[(mask, dim) for (_, dim), mask in zip(sources, masks)], allow_broadcast=True, out_dim=out_non_masked_dim
+        )
+        out, _ = rf.masked_select(out, mask=mask_concat, dims=[out_non_masked_dim], out_dim=out_dim)
+    else:
+        # noinspection PyProtectedMember
+        out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim)
+    return out, out_dim
 def concat_features(*sources: Tensor, allow_broadcast=False) -> Tensor:
@@ -478,7 +501,12 @@ def pad(
     if handle_dynamic_dims is None:
         handle_dynamic_dims = _pad_handle_dynamic_dims_default(axes, padding, mode=mode)
     if not out_dims:
-        out_dims = [left + middle + right for middle, (left, right) in zip(axes, padding)]
+        out_dims = [
+            (left + middle + right)
+            if handle_dynamic_dims or not _pad_need_dyn_dim_handling(middle, left, right, mode=mode)
+            else _pad_sum_dims_no_dyn_dim_handling(middle, left, right)
+            for middle, (left, right) in zip(axes, padding)
+        ]
     # noinspection PyProtectedMember
     return (
         source._raw_backend.pad(
@@ -544,6 +572,32 @@ def _pad_need_dyn_dim_handling(
     return True
+def _pad_sum_dims_no_dyn_dim_handling(
+    middle: Dim, left: Union[Dim, int, Tensor], right: Union[Dim, int, Tensor]
+) -> Dim:
+    """
+    This gets called when we need to handle dyn dims, but handle_dynamic_dims=False.
+    See also the same logic in :func:`concat`.
+    """
+    if isinstance(left, Dim):
+        left = left.get_dim_value_tensor()
+    elif isinstance(left, int):
+        pass
+    elif isinstance(left, Tensor):
+        assert left.dims == ()  # scalar
+    else:
+        raise TypeError(f"invalid left pad {left}")
+    if isinstance(right, Dim):
+        right = right.get_dim_value_tensor()
+    elif isinstance(right, int):
+        pass
+    elif isinstance(right, Tensor):
+        assert right.dims == ()  # scalar
+    else:
+        raise TypeError(f"invalid right pad {right}")
+    return Dim(left + middle.get_dim_value_tensor() + right, name="pad")
 def cum_concat_step(
     source: Tensor, *, prev_accum: Tensor, axis: Dim, out_spatial_dim: Optional[Dim] = None
 ) -> Tuple[Tensor, Dim]:

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/conv.py RENAMED Viewed

@@ -862,8 +862,9 @@ def _consistent_same_padding(
         pad_right = (s - 1) * d - pad_left
         paddings.append((pad_left, pad_right))
     # We expect that masking was already done before (or we don't care about it), thus handle_dynamic_dims=False.
+    out_dims = [(left + middle + right) for middle, (left, right) in zip(in_spatial_dims, paddings)]
     source, in_spatial_dims = rf.pad(
-        source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False
+        source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False, out_dims=out_dims
     )
     return source, in_spatial_dims, 0

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/encoder/conformer.py RENAMED Viewed

@@ -8,6 +8,8 @@ https://github.com/rwth-i6/returnn_common/issues/233
 from __future__ import annotations
 from typing import Optional, Union, Any, Tuple, List, Dict, Callable
+from types import FunctionType
+import functools
 import copy as _copy
 from returnn.tensor import Tensor, Dim
 import returnn.frontend as rf
@@ -298,7 +300,8 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
         *,
         num_layers: int,
         input_layer: Optional[Union[ConformerConvSubsample, ISeqDownsamplingEncoder, rf.Module, Any]],
-        input_embedding_scale: float = 1.0,
+        input_embedding_scale: Optional[float] = None,
+        pos_enc: Union[None, Callable, Dict[str, Any], rf.Module] = None,
         input_dropout: float = 0.1,
         ff_dim: Dim = NotSpecified,
         ff_activation: Union[Callable[[Tensor], Tensor], Dict[str, Any], rf.Module] = NotSpecified,
@@ -317,8 +320,17 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
         :param num_layers: the number of encoder layers
         :param input_layer: input/frontend/prenet with potential subsampling.
             (x, in_spatial_dim) -> (y, out_spatial_dim)
-        :param input_embedding_scale: applied after input_layer. 1.0 by default for historic reasons.
-            In std Transformer, also ESPnet E-Branchformer and Conformer, this is sqrt(out_dim).
+        :param input_embedding_scale: applied after input_layer.
+            1.0 by default for historic reasons if pos_enc is None,
+            else sqrt(out_dim) by default.
+            In std Transformer, also ESPnet E-Branchformer and Conformer, this is sqrt(out_dim),
+            which is relevant when you add positional encoding.
+        :param pos_enc: positional encoding, applied after input_embedding_scale.
+            None (no positional encoding) by default, unlike standard Transformer.
+            E.g. :func:`rf.sinusoidal_positional_encoding` for absolute pos enc.
+            Note, relative positional encoding is usually part of the attention layer,
+            e.g. :class:`rf.RelPosSelfAttention`,
+            and nothing needs to be set here.
         :param input_dropout: applied after input_projection(input_layer(x))
         :param ff_dim: the dimension of feed-forward layers. 2048 originally, or 4 times out_dim
         :param ff_activation: activation function for feed-forward network
@@ -352,12 +364,22 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
         else:
             raise TypeError(f"unexpected input_layer {input_layer!r}")
         self.input_layer = input_layer
-        self.input_projection = (
-            rf.Linear(self.input_layer.out_dim if self.input_layer else self.in_dim, self.out_dim, with_bias=False)
-            if input_layer
-            else None
-        )
+        in_dim = self.input_layer.out_dim if self.input_layer else self.in_dim
+        self.input_projection = rf.Linear(in_dim, self.out_dim, with_bias=False) if in_dim != self.out_dim else None
+        if input_embedding_scale is None:
+            input_embedding_scale = (self.out_dim.dimension**0.5) if pos_enc is not None else 1.0
         self.input_embedding_scale = input_embedding_scale
+        if pos_enc is None:
+            pass
+        elif isinstance(pos_enc, dict):
+            pos_enc = rf.build_from_dict(pos_enc, feat_dim=self.out_dim)
+        elif isinstance(pos_enc, rf.Module):
+            pass
+        elif isinstance(pos_enc, FunctionType):
+            pos_enc = functools.partial(pos_enc, feat_dim=self.out_dim)
+        else:
+            raise TypeError(f"unexpected pos_enc type {pos_enc!r}")
+        self.pos_enc = pos_enc
         self.input_dropout = input_dropout
         if not encoder_layer or isinstance(encoder_layer, (dict, type)):
@@ -411,6 +433,8 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
         x = self.input_projection(x_subsample) if self.input_projection else x_subsample
         if self.input_embedding_scale != 1.0:
             x = x * self.input_embedding_scale
+        if self.pos_enc is not None:
+            x = x + self.pos_enc(spatial_dim=out_spatial_dim)
         x = rf.dropout(x, self.input_dropout, axis=self.dropout_broadcast and self.out_dim)
         x = self.layers(x, spatial_dim=out_spatial_dim, collected_outputs=collected_outputs)
         return x, out_spatial_dim

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/_dim_extra.py RENAMED Viewed

@@ -1264,7 +1264,6 @@ class _DimMixin:
                 raise TypeError(f"complete_dyn_size: _relu: unexpected type {type(a)}")
         y: Optional[_t.Tensor] = None  # resulting dyn size
-        y_max_value: Optional[_t.Tensor] = None  # resulting dyn size max value
         inputs = list(op.inputs)
         assert inputs
         for x_dim in inputs:
@@ -1275,8 +1274,6 @@ class _DimMixin:
             if x_dim.dyn_size_ext is None and x_dim.dimension is None:
                 return
             y = _bin_op(y, x_dim.dimension if x_dim.dimension is not None else x_dim.dyn_size_ext)
-            if not template_only and y.raw_tensor is not None:
-                y_max_value = _bin_op(y_max_value, x_dim.get_dim_value_tensor())
         assert y is not None, f"op {op}?"
         if self.dyn_size_ext is not None:
             assert self.dyn_size_ext.dim_tags == y.dim_tags
@@ -1286,9 +1283,14 @@ class _DimMixin:
             else:
                 self.batch = y.batch
         self.dyn_size_ext = y
-        if not template_only and y_max_value is not None:
-            assert y_max_value is not None and y_max_value.raw_tensor is not None
-            self._dyn_size_max_value = y_max_value
+        if not template_only and y.raw_tensor is not None:
+            # Note: Earlier, we had this wrong.
+            # It is not correct to replicate the same math (bin ops)
+            # on the dim values (_dyn_size_max_value of each dim).
+            # Consider sizes1=[2,3], sizes2=[5,4], and the op is "add".
+            # Then the result sizes would be [7,7], thus its max is 7,
+            # but max(sizes1)+max(sizes2)=3+5=8.
+            self._dyn_size_max_value = rf.reduce_max(y, axis=y.dims) if y.dims else y
         if tf and y.placeholder is not None:
             self.set_tag_on_size_tensor(y.placeholder)
@@ -2080,6 +2082,8 @@ class _DimMixin:
         :return: self + other. note that this is not commutative, i.e. different from other + self.
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         cache_key = ("add", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2098,6 +2102,8 @@ class _DimMixin:
         :return: other + self
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         cache_key = ("add_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2115,6 +2121,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         return self.sub_right(other)
     def sub_right(self: Dim, other):
@@ -2123,6 +2131,8 @@ class _DimMixin:
         :return: self - other
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         cache_key = ("sub", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2141,6 +2151,8 @@ class _DimMixin:
         :return: (-other) + self
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         cache_key = ("sub_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2158,6 +2170,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("mul", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2175,6 +2189,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("mul_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2192,6 +2208,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("floordiv", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2209,6 +2227,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         return self.div_right(other)
     def div_left(self: Dim, other):
@@ -2216,6 +2236,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("truediv_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2233,6 +2255,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("truediv", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2250,6 +2274,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("ceildiv_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2267,6 +2293,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("ceildiv", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/basic.py RENAMED Viewed

@@ -1693,15 +1693,17 @@ def inplace_increment(x: numpy.ndarray, idx: numpy.ndarray, y: Union[numpy.ndarr
     raise NotImplementedError("This feature was removed with dropped Theano support")
-def prod(ls):
+def prod(ls: Union[Iterable[T], numpy.ndarray]) -> Union[int, T, float]:
     """
-    :param list[T]|tuple[T]|numpy.ndarray ls:
-    :rtype: T|int|float
+    :param ls:
+    :return: ls[0] * ls[1] * ...
     """
-    if len(ls) == 0:
+    it = iter(ls)
+    try:
+        x = next(it)
+    except StopIteration:
         return 1
-    x = ls[0]
-    for y in ls[1:]:
+    for y in it:
         x = x * y  # *= doesn't work because x might be a tensor, and for e.g. torch.Tensor this op is in-place
     return x

{returnn-1.20250826.155029 → returnn-1.20250828.142552/returnn.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250826.155029
+Version: 1.20250828.142552
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/rf_utils.py RENAMED Viewed

@@ -48,6 +48,26 @@ def tf_scope():
         yield session
+class RunModelException(Exception):
+    """run model exception"""
+class NonFiniteValuesException(RunModelException):
+    """non-finite values exception"""
+class CompareResultsMismatchException(RunModelException):
+    """compare results exception"""
+class CompareResultsMismatchTfVsPtException(CompareResultsMismatchException):
+    """compare results TF vs PT exception"""
+class CompareResultsMismatchSingleVsMultiBatchException(CompareResultsMismatchException):
+    """compare results single vs multi batch exception"""
 def run_model(
     extern_data: TensorDict,
     get_model: rf.GetModelFunc,
@@ -85,7 +105,7 @@ def run_model(
                 lambda: (_run_model_torch(extern_data, get_model, forward_step), None)[-1],
                 stop_reporting_after_first_inf_nan=False,
             )
-            raise Exception(f"Non-finite values in output: {non_finite_outputs}. See log above.")
+            raise NonFiniteValuesException(f"Non-finite values in output: {non_finite_outputs}. See log above.")
     if test_single_batch_entry and batch_dim in extern_data_dims:
         dyn_dims = [
@@ -146,7 +166,7 @@ def run_model(
         if not numpy.allclose(v_pt, v_tf, atol=1e-5, rtol=1e-5):
             print(f"  PT:\n{v_pt}")
             print(f"  TF:\n{v_tf}")
-            raise Exception(f"output {k!r} differs")
+            raise CompareResultsMismatchTfVsPtException(f"output {k!r} differs")
     return out_pt
@@ -300,9 +320,10 @@ def _run_model_torch_single_batch(
         # Slice the raw ref output to be able to match it to the raw single output.
         ref_output_raw = ref_output_.raw_tensor[_get_slices(output_)]
         single_output_raw = output_.raw_tensor
-        numpy.testing.assert_allclose(
-            ref_output_raw, single_output_raw, atol=1e-5, rtol=1e-5, err_msg=f"output {key!r} differs"
-        )
+        if not numpy.allclose(ref_output_raw, single_output_raw, atol=1e-5, rtol=1e-5):
+            print(f"  Batched:\n{ref_output_raw}")
+            print(f"  Single:\n{single_output_raw}")
+            raise CompareResultsMismatchSingleVsMultiBatchException(f"output {key!r} differs")
     # Recover original data.
     extern_data.reset_content()

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_array.py RENAMED Viewed

@@ -411,6 +411,46 @@ def test_concat():
     run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
+def test_concat_partly_dyn_dim():
+    time_static_dim = Dim(5, name="time_static")
+    time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
+    in_dim = Dim(7, name="in")
+    extern_data = TensorDict(
+        {
+            "left": Tensor("left", [batch_dim, time_static_dim, in_dim], dtype="float32"),
+            "right": Tensor("right", [batch_dim, time_dim, in_dim], dtype="float32"),
+        }
+    )
+    # noinspection PyShadowingNames
+    def _forward_step(*, extern_data: TensorDict, **_kwargs):
+        left, right = extern_data["left"], extern_data["right"]
+        out, out_time_dim = rf.concat((left, time_static_dim), (right, time_dim))
+        out.mark_as_default_output(shape=(batch_dim, out_time_dim, in_dim))
+    run_model(extern_data, lambda **_: rf.Module(), _forward_step)
+def test_concat_dyn_time():
+    time1_dim = Dim(Tensor("time1", [batch_dim], dtype="int32"))
+    time2_dim = Dim(Tensor("time2", [batch_dim], dtype="int32"))
+    extern_data = TensorDict(
+        {
+            "left": Tensor("left", [batch_dim, time1_dim], dtype="float32"),
+            "right": Tensor("right", [batch_dim, time2_dim], dtype="float32"),
+        }
+    )
+    # noinspection PyShadowingNames
+    def _forward_step(*, extern_data: TensorDict, **_kwargs):
+        left, right = extern_data["left"], extern_data["right"]
+        out, out_time_dim = rf.concat((left, time1_dim), (right, time2_dim))
+        out.mark_as_default_output(shape=(batch_dim, out_time_dim))
+    # test_single_batch_entry should test the interesting case.
+    run_model(extern_data, lambda **_: rf.Module(), _forward_step, test_tensorflow=False)
 def test_pad():
     time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
     in_dim = Dim(7, name="in")

{returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_attention.py RENAMED Viewed

@@ -38,7 +38,7 @@ def test_dot_attention():
     class _Net(rf.Module):
         def __call__(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
-            kv_axis = Dim(None, name=f"kv-axis")
+            kv_axis = Dim(None, name="kv-axis")
             k, _ = rf.replace_dim(k, in_dim=time_dim, out_dim=kv_axis)
             v, _ = rf.replace_dim(v, in_dim=time_dim, out_dim=kv_axis)
             return rf.dot_attention(q, k, v, axis=kv_axis, key_dim=key_dim)
@@ -604,7 +604,7 @@ def test_rel_pos_self_attention():
                     x_b = rf.gather(x, axis=batch_dim, indices=b)
                     assert batch_dim in axis.dyn_size_ext.dims  # current assumption...
                     seq_len = rf.gather(axis.dyn_size_ext, axis=batch_dim, indices=b)
-                    axis_b = Dim(seq_len)
+                    axis_b = Dim(seq_len, name=f"time_b{b}")
                     # Note: The current order (replace_dim and then slice) is somewhat dependent
                     # on the current internal behavior of gather and replace_dim,
                     # which might change at some point...