PyPI - returnn - Versions diffs - 1.20250828.2732__tar.gz → 1.20250829.151916__tar.gz - Mend

returnn 1.20250828.2732tar.gz → 1.20250829.151916tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (476) hide show

{returnn-1.20250828.2732/returnn.egg-info → returnn-1.20250829.151916}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250828.2732
+Version: 1.20250829.151916
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20250829.151916/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20250829.151916'
2	+ long_version = '1.20250829.151916+git.687fa49'

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/frontend/_cache.py RENAMED Viewed

@@ -6,7 +6,7 @@ One use case example is :func:`sinusoidal_positional_encoding` and :func:`relati
 """
 from __future__ import annotations
-from typing import Optional, Union, Any, Type, Callable, Tuple, Dict
+from typing import Optional, Union, Any, Type, Callable, Tuple, Dict, List
 from weakref import ref
 import tree
 from returnn.util.lru_cache import lru_cache
@@ -59,6 +59,8 @@ class Cache:
             if isinstance(key_item_orig, DimWrapper):
                 assert isinstance(key_item, DimWrapper)
                 dim_orig = key_item_orig.dim_ref()
+                if dim_orig is None:  # orig dim could be dead. but then it would not be used anyway
+                    continue
                 dim = key_item.dim_ref()
                 assert isinstance(dim_orig, Dim) and isinstance(dim, Dim)
                 dim_map[dim_orig] = dim
@@ -103,7 +105,7 @@ def _transform_key(
     key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Optional[Dict[Dim, DimWrapper]] = None
 ) -> Tuple[Union[Type[Backend], ref[rf.RunCtx], _KeyItemType], ...]:
     backend = _get_backend(key)
-    keys_flat = [backend]
+    keys_flat: List[Any] = [backend]
     if not backend.executing_eagerly():
         # See comment above: If graph-mode, the cached value becomes invalid
         # when the current run ctx goes out of scope.

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/frontend/array_.py RENAMED Viewed

@@ -188,22 +188,18 @@ def merge_dims(
             return source, dims[0]
         return rf.replace_dim(source, in_dim=dims[0], out_dim=out_dim)
     if out_dim is None:
-        out_dim = dims[0]
-        reset_dyn_size = False
-        for d in dims[1:]:
-            reset_dyn_size |= d.need_masking() and out_dim.capacity != 1
-            out_dim = out_dim * d
-        if reset_dyn_size:
+        from returnn.util.basic import prod
+        if any(d.need_masking() for d in dims[1:]):
             # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
             # This would then potentially discard some of the data in the tensor in subsequent operations,
             # when masking is applied.
             # Thus, discard the dynamic sizes, and just treat it as a flat dim with scalar dynamic size.
             # https://github.com/rwth-i6/returnn/issues/1694
-            out_dim_size = dims[0].get_dim_value_tensor()
-            for d in dims[1:]:
-                out_dim_size *= d.get_dim_value_tensor()
-            assert isinstance(out_dim_size, Tensor) and out_dim_size.dims == ()  # scalar
-            out_dim.dyn_size_ext = out_dim_size
+            # See also similar logic in :func:`concat`.
+            out_dim = Dim(prod(d.get_dim_value_tensor() for d in dims), name="merged")
+        else:
+            out_dim = prod(dims)
     # noinspection PyProtectedMember
     return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim), out_dim
@@ -345,7 +341,9 @@ def window(
     """
     if spatial_dim.need_masking():
         if use_mask is None:
-            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22)
+            use_mask = rf.use_mask_default(
+                default=True, default_false_for_behavior_version_up_to=22, func_name="window"
+            )
         if use_mask:
             source = source.copy_masked(0, dims=[spatial_dim])
     assert window_dim.dimension is not None
@@ -427,28 +425,39 @@ def concat(
         dims = sources[0][0].dims_set - {sources[0][1]}
         for src, dim in sources:
             assert src.dims_set - {dim} == dims, f"concat {sources}, need allow_broadcast=True"
+    need_handle_dynamic_dims = False
+    for src, dim in sources[:-1]:
+        if dim.need_masking():
+            need_handle_dynamic_dims = True
+    if handle_dynamic_dims is None:
+        handle_dynamic_dims = need_handle_dynamic_dims
     if not out_dim:
-        out_dim = sum(d for _, d in sources)
-    # noinspection PyProtectedMember
-    out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim)
-    if handle_dynamic_dims is None or handle_dynamic_dims:
-        need_to_handle = False
-        for src, dim in sources[:-1]:
-            if dim.need_masking():
-                need_to_handle = True
-        if need_to_handle:
-            masks = []
-            for _, dim in sources:
-                masks.append(
-                    dim.get_mask(dim_order=(dim,) + dim.dyn_size_ext.dims, device=out.device)
-                    if dim.need_masking()
-                    else rf.constant(True, dims=[dim], device=out.device)
-                )
-            # noinspection PyProtectedMember
-            mask_concat = sources[0][0]._raw_backend.concat(
-                *[(mask, dim) for (_, dim), mask in zip(sources, masks)], allow_broadcast=True, out_dim=out_dim
+        if handle_dynamic_dims or not need_handle_dynamic_dims:
+            out_dim = sum(d for _, d in sources)
+        else:  # not handle_dynamic_dims but need_handle_dynamic_dims
+            # There are dynamic dims, but we don't want to handle them.
+            # So, summing the dims would be incorrect.
+            # Just add the dim values.
+            out_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources if d.dimension is not None), name="concat")
+    if handle_dynamic_dims:
+        out_non_masked_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources))
+        # noinspection PyProtectedMember
+        out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_non_masked_dim)
+        masks = []
+        for _, dim in sources:
+            masks.append(
+                dim.get_mask(dim_order=(dim,) + dim.dyn_size_ext.dims, device=out.device)
+                if dim.need_masking()
+                else rf.constant(True, dims=[dim], device=out.device)
             )
-            out, out_dim = rf.masked_select(out, mask=mask_concat, dims=[out_dim])
+        # noinspection PyProtectedMember
+        mask_concat = sources[0][0]._raw_backend.concat(
+            *[(mask, dim) for (_, dim), mask in zip(sources, masks)], allow_broadcast=True, out_dim=out_non_masked_dim
+        )
+        out, _ = rf.masked_select(out, mask=mask_concat, dims=[out_non_masked_dim], out_dim=out_dim)
+    else:
+        # noinspection PyProtectedMember
+        out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim)
     return out, out_dim
@@ -494,7 +503,12 @@ def pad(
     if handle_dynamic_dims is None:
         handle_dynamic_dims = _pad_handle_dynamic_dims_default(axes, padding, mode=mode)
     if not out_dims:
-        out_dims = [left + middle + right for middle, (left, right) in zip(axes, padding)]
+        out_dims = [
+            (left + middle + right)
+            if handle_dynamic_dims or not _pad_need_dyn_dim_handling(middle, left, right, mode=mode)
+            else _pad_sum_dims_no_dyn_dim_handling(middle, left, right)
+            for middle, (left, right) in zip(axes, padding)
+        ]
     # noinspection PyProtectedMember
     return (
         source._raw_backend.pad(
@@ -560,6 +574,32 @@ def _pad_need_dyn_dim_handling(
     return True
+def _pad_sum_dims_no_dyn_dim_handling(
+    middle: Dim, left: Union[Dim, int, Tensor], right: Union[Dim, int, Tensor]
+) -> Dim:
+    """
+    This gets called when we need to handle dyn dims, but handle_dynamic_dims=False.
+    See also the same logic in :func:`concat`.
+    """
+    if isinstance(left, Dim):
+        left = left.get_dim_value_tensor()
+    elif isinstance(left, int):
+        pass
+    elif isinstance(left, Tensor):
+        assert left.dims == ()  # scalar
+    else:
+        raise TypeError(f"invalid left pad {left}")
+    if isinstance(right, Dim):
+        right = right.get_dim_value_tensor()
+    elif isinstance(right, int):
+        pass
+    elif isinstance(right, Tensor):
+        assert right.dims == ()  # scalar
+    else:
+        raise TypeError(f"invalid right pad {right}")
+    return Dim(left + middle.get_dim_value_tensor() + right, name="pad")
 def cum_concat_step(
     source: Tensor, *, prev_accum: Tensor, axis: Dim, out_spatial_dim: Optional[Dim] = None
 ) -> Tuple[Tensor, Dim]:
@@ -867,7 +907,9 @@ def scatter(
     indices_dim = indices_dim if isinstance(indices_dim, (list, tuple)) else [indices_dim]
     if any(dim.need_masking() for dim in indices_dim):
         if use_mask is None:
-            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22)
+            use_mask = rf.use_mask_default(
+                default=True, default_false_for_behavior_version_up_to=22, func_name="scatter"
+            )
         if use_mask:
             source = source.copy_masked(fill_value, dims=indices_dim)
     else:

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/frontend/conv.py RENAMED Viewed

@@ -223,7 +223,7 @@ def conv(
     """
     if any(in_spatial_dim.need_masking() for in_spatial_dim in in_spatial_dims):
         if use_mask is None:
-            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22)
+            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22, func_name="conv")
         if use_mask:
             source = source.copy_masked(0, dims=in_spatial_dims)
     for in_spatial_dim in in_spatial_dims:
@@ -391,7 +391,9 @@ def transposed_conv(
     """transposed conv"""
     if any(in_spatial_dim.need_masking() for in_spatial_dim in in_spatial_dims):
         if use_mask is None:
-            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22)
+            use_mask = rf.use_mask_default(
+                default=True, default_false_for_behavior_version_up_to=22, func_name="transposed_conv"
+            )
         if use_mask:
             source = source.copy_masked(0, dims=in_spatial_dims)
     if padding == "same" and _any_is_non_default(strides, default=1) and _should_use_consistent_same_padding():
@@ -503,7 +505,7 @@ def pool(
     if any(in_spatial_dim.need_masking() for in_spatial_dim in in_spatial_dims):
         if use_mask is None:
-            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22)
+            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22, func_name="pool")
         if use_mask:
             source = source.copy_masked({"max": float("-inf"), "avg": 0}[mode], dims=in_spatial_dims)
     else:
@@ -862,8 +864,9 @@ def _consistent_same_padding(
         pad_right = (s - 1) * d - pad_left
         paddings.append((pad_left, pad_right))
     # We expect that masking was already done before (or we don't care about it), thus handle_dynamic_dims=False.
+    out_dims = [(left + middle + right) for middle, (left, right) in zip(in_spatial_dims, paddings)]
     source, in_spatial_dims = rf.pad(
-        source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False
+        source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False, out_dims=out_dims
     )
     return source, in_spatial_dims, 0

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/frontend/dims.py RENAMED Viewed

@@ -3,7 +3,7 @@ Utilities for dimension tags, dimensions, axes.
 """
 from __future__ import annotations
-from typing import Optional, Union, TypeVar, Sequence, Tuple
+from typing import TYPE_CHECKING, Optional, Union, TypeVar, Sequence, Tuple
 from returnn.tensor import Tensor, Dim
 import returnn.frontend as rf
 from ._backend import get_backend_by_tensor, global_backend
@@ -25,6 +25,9 @@ __all__ = [
     "use_mask_default",
 ]
+if TYPE_CHECKING:
+    from returnn.config import Config
 def range_over_dim(dim: Dim, *, dtype: Optional[str] = None, device: Optional[str] = None) -> Tensor[T]:
     """
@@ -309,7 +312,10 @@ def last_frame_position_of_dim(
 def use_mask_default(
-    *, default: Optional[bool] = None, default_false_for_behavior_version_up_to: Optional[int] = None
+    *,
+    default: Optional[bool] = None,
+    default_false_for_behavior_version_up_to: Optional[int] = None,
+    func_name: Optional[str] = None,
 ) -> Optional[bool]:
     """
     Check the global RETURNN config for the ``rf_use_mask``
@@ -324,20 +330,20 @@ def use_mask_default(
         and if this is set, and the behavior version is less or equal,
         then return False by default, i.e. do not use the mask by default, if it is not defined in the config.
         This takes precedence over `default`.
+    :param func_name: if specified, also check
     :return: what to use for the ``use_mask`` argument by default
     """
     from returnn.config import get_global_config
     config = get_global_config(raise_exception=False)
-    config_value = None
     if config:
-        if "rf_use_mask" in config.typed_dict:
-            config_value = config.typed_dict["rf_use_mask"]
-            assert config_value is None or isinstance(config_value, bool)
-        elif "rf_use_mask" in config.dict:
-            config_value = config.bool("rf_use_mask", None)
-    if config_value is not None:
-        return config_value
+        config_value = _get_opt_bool_from_config(config, "rf_use_mask")
+        if config_value is not None:
+            return config_value
+        if func_name:
+            config_value = _get_opt_bool_from_config(config, f"rf_use_mask_{func_name}")
+            if config_value is not None:
+                return config_value
     if default_false_for_behavior_version_up_to is not None:
         from returnn.util.basic import BehaviorVersion
@@ -345,3 +351,13 @@ def use_mask_default(
         if BehaviorVersion.get() <= default_false_for_behavior_version_up_to:
             return False
     return default
+def _get_opt_bool_from_config(config: Config, key: str) -> Optional[bool]:
+    if key in config.typed_dict:
+        config_value = config.typed_dict[key]
+        assert config_value is None or isinstance(config_value, bool)
+        return config_value
+    elif key in config.dict:
+        return config.bool(key, None)
+    return None

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/frontend/hooks.py RENAMED Viewed

@@ -16,7 +16,7 @@ T = TypeVar("T")
 def setup_post_hook_on_method(
-    obj: Any,
+    obj: T,
     attr: str,
     hook: Callable[[T, Tuple[Any, ...], Dict[str, Any], Any], Optional[Any]],
     *,
@@ -40,7 +40,7 @@ class MethodWithHooks:
     """
     @classmethod
-    def get(cls, obj: Any, attr: str) -> MethodWithHooks:
+    def get(cls, obj: T, attr: str) -> MethodWithHooks:
         """get existing or init new :class:`MethodWithHooks`"""
         method = getattr(obj, attr)
         if not isinstance(method, MethodWithHooks):
@@ -56,7 +56,7 @@ class MethodWithHooks:
             method.setup()
         return method
-    def __init__(self, obj: Any, attr: str):
+    def __init__(self, obj: T, attr: str):
         """
         :param obj:
         :param attr:

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/frontend/normalization.py RENAMED Viewed

@@ -218,7 +218,7 @@ class BatchNorm(rf.Module):
         if any(d.need_masking() for d in source.dims if d != self.in_dim):
             if self.use_mask is None:
-                use_mask = rf.use_mask_default(default=True)
+                use_mask = rf.use_mask_default(default=True, func_name="BatchNorm")
             else:
                 use_mask = self.use_mask
         else:

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/frontend/signal.py RENAMED Viewed

@@ -71,7 +71,7 @@ def stft(
     """
     if in_spatial_dim.need_masking():
         if use_mask is None:
-            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22)
+            use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22, func_name="stft")
         if use_mask:
             x = x.copy_masked(0, dims=[in_spatial_dim])
     fft_length = fft_length or frame_length

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/tensor/_dim_extra.py RENAMED Viewed

@@ -1264,7 +1264,6 @@ class _DimMixin:
                 raise TypeError(f"complete_dyn_size: _relu: unexpected type {type(a)}")
         y: Optional[_t.Tensor] = None  # resulting dyn size
-        y_max_value: Optional[_t.Tensor] = None  # resulting dyn size max value
         inputs = list(op.inputs)
         assert inputs
         for x_dim in inputs:
@@ -1275,8 +1274,6 @@ class _DimMixin:
             if x_dim.dyn_size_ext is None and x_dim.dimension is None:
                 return
             y = _bin_op(y, x_dim.dimension if x_dim.dimension is not None else x_dim.dyn_size_ext)
-            if not template_only and y.raw_tensor is not None:
-                y_max_value = _bin_op(y_max_value, x_dim.get_dim_value_tensor())
         assert y is not None, f"op {op}?"
         if self.dyn_size_ext is not None:
             assert self.dyn_size_ext.dim_tags == y.dim_tags
@@ -1286,9 +1283,14 @@ class _DimMixin:
             else:
                 self.batch = y.batch
         self.dyn_size_ext = y
-        if not template_only and y_max_value is not None:
-            assert y_max_value is not None and y_max_value.raw_tensor is not None
-            self._dyn_size_max_value = y_max_value
+        if not template_only and y.raw_tensor is not None:
+            # Note: Earlier, we had this wrong.
+            # It is not correct to replicate the same math (bin ops)
+            # on the dim values (_dyn_size_max_value of each dim).
+            # Consider sizes1=[2,3], sizes2=[5,4], and the op is "add".
+            # Then the result sizes would be [7,7], thus its max is 7,
+            # but max(sizes1)+max(sizes2)=3+5=8.
+            self._dyn_size_max_value = rf.reduce_max(y, axis=y.dims) if y.dims else y
         if tf and y.placeholder is not None:
             self.set_tag_on_size_tensor(y.placeholder)
@@ -2080,6 +2082,8 @@ class _DimMixin:
         :return: self + other. note that this is not commutative, i.e. different from other + self.
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         cache_key = ("add", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2098,6 +2102,8 @@ class _DimMixin:
         :return: other + self
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         cache_key = ("add_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2115,6 +2121,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         return self.sub_right(other)
     def sub_right(self: Dim, other):
@@ -2123,6 +2131,8 @@ class _DimMixin:
         :return: self - other
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         cache_key = ("sub", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2141,6 +2151,8 @@ class _DimMixin:
         :return: (-other) + self
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 0:
+            return self
         cache_key = ("sub_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2158,6 +2170,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("mul", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2175,6 +2189,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("mul_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2192,6 +2208,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("floordiv", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2209,6 +2227,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         return self.div_right(other)
     def div_left(self: Dim, other):
@@ -2216,6 +2236,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("truediv_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2233,6 +2255,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("truediv", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2250,6 +2274,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("ceildiv_left", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)
@@ -2267,6 +2293,8 @@ class _DimMixin:
         :param Dim|int other:
         :rtype: Dim
         """
+        if isinstance(other, int) and other == 1:
+            return self
         cache_key = ("ceildiv", other)
         cache = self.get_same_base()._make_extra().cache_dim_math
         cache_entry = cache.get(cache_key, None)

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/returnn/util/basic.py RENAMED Viewed

@@ -1693,15 +1693,17 @@ def inplace_increment(x: numpy.ndarray, idx: numpy.ndarray, y: Union[numpy.ndarr
     raise NotImplementedError("This feature was removed with dropped Theano support")
-def prod(ls):
+def prod(ls: Union[Iterable[T], numpy.ndarray]) -> Union[int, T, float]:
     """
-    :param list[T]|tuple[T]|numpy.ndarray ls:
-    :rtype: T|int|float
+    :param ls:
+    :return: ls[0] * ls[1] * ...
     """
-    if len(ls) == 0:
+    it = iter(ls)
+    try:
+        x = next(it)
+    except StopIteration:
         return 1
-    x = ls[0]
-    for y in ls[1:]:
+    for y in it:
         x = x * y  # *= doesn't work because x might be a tensor, and for e.g. torch.Tensor this op is in-place
     return x

{returnn-1.20250828.2732 → returnn-1.20250829.151916/returnn.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250828.2732
+Version: 1.20250829.151916
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/tests/rf_utils.py RENAMED Viewed

@@ -48,6 +48,26 @@ def tf_scope():
         yield session
+class RunModelException(Exception):
+    """run model exception"""
+class NonFiniteValuesException(RunModelException):
+    """non-finite values exception"""
+class CompareResultsMismatchException(RunModelException):
+    """compare results exception"""
+class CompareResultsMismatchTfVsPtException(CompareResultsMismatchException):
+    """compare results TF vs PT exception"""
+class CompareResultsMismatchSingleVsMultiBatchException(CompareResultsMismatchException):
+    """compare results single vs multi batch exception"""
 def run_model(
     extern_data: TensorDict,
     get_model: rf.GetModelFunc,
@@ -85,7 +105,7 @@ def run_model(
                 lambda: (_run_model_torch(extern_data, get_model, forward_step), None)[-1],
                 stop_reporting_after_first_inf_nan=False,
             )
-            raise Exception(f"Non-finite values in output: {non_finite_outputs}. See log above.")
+            raise NonFiniteValuesException(f"Non-finite values in output: {non_finite_outputs}. See log above.")
     if test_single_batch_entry and batch_dim in extern_data_dims:
         dyn_dims = [
@@ -146,7 +166,7 @@ def run_model(
         if not numpy.allclose(v_pt, v_tf, atol=1e-5, rtol=1e-5):
             print(f"  PT:\n{v_pt}")
             print(f"  TF:\n{v_tf}")
-            raise Exception(f"output {k!r} differs")
+            raise CompareResultsMismatchTfVsPtException(f"output {k!r} differs")
     return out_pt
@@ -300,9 +320,10 @@ def _run_model_torch_single_batch(
         # Slice the raw ref output to be able to match it to the raw single output.
         ref_output_raw = ref_output_.raw_tensor[_get_slices(output_)]
         single_output_raw = output_.raw_tensor
-        numpy.testing.assert_allclose(
-            ref_output_raw, single_output_raw, atol=1e-5, rtol=1e-5, err_msg=f"output {key!r} differs"
-        )
+        if not numpy.allclose(ref_output_raw, single_output_raw, atol=1e-5, rtol=1e-5):
+            print(f"  Batched:\n{ref_output_raw}")
+            print(f"  Single:\n{single_output_raw}")
+            raise CompareResultsMismatchSingleVsMultiBatchException(f"output {key!r} differs")
     # Recover original data.
     extern_data.reset_content()

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/tests/test_rf_array.py RENAMED Viewed

@@ -411,6 +411,46 @@ def test_concat():
     run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
+def test_concat_partly_dyn_dim():
+    time_static_dim = Dim(5, name="time_static")
+    time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
+    in_dim = Dim(7, name="in")
+    extern_data = TensorDict(
+        {
+            "left": Tensor("left", [batch_dim, time_static_dim, in_dim], dtype="float32"),
+            "right": Tensor("right", [batch_dim, time_dim, in_dim], dtype="float32"),
+        }
+    )
+    # noinspection PyShadowingNames
+    def _forward_step(*, extern_data: TensorDict, **_kwargs):
+        left, right = extern_data["left"], extern_data["right"]
+        out, out_time_dim = rf.concat((left, time_static_dim), (right, time_dim))
+        out.mark_as_default_output(shape=(batch_dim, out_time_dim, in_dim))
+    run_model(extern_data, lambda **_: rf.Module(), _forward_step)
+def test_concat_dyn_time():
+    time1_dim = Dim(Tensor("time1", [batch_dim], dtype="int32"))
+    time2_dim = Dim(Tensor("time2", [batch_dim], dtype="int32"))
+    extern_data = TensorDict(
+        {
+            "left": Tensor("left", [batch_dim, time1_dim], dtype="float32"),
+            "right": Tensor("right", [batch_dim, time2_dim], dtype="float32"),
+        }
+    )
+    # noinspection PyShadowingNames
+    def _forward_step(*, extern_data: TensorDict, **_kwargs):
+        left, right = extern_data["left"], extern_data["right"]
+        out, out_time_dim = rf.concat((left, time1_dim), (right, time2_dim))
+        out.mark_as_default_output(shape=(batch_dim, out_time_dim))
+    # test_single_batch_entry should test the interesting case.
+    run_model(extern_data, lambda **_: rf.Module(), _forward_step, test_tensorflow=False)
 def test_pad():
     time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
     in_dim = Dim(7, name="in")

{returnn-1.20250828.2732 → returnn-1.20250829.151916}/tests/test_rf_attention.py RENAMED Viewed

@@ -38,7 +38,7 @@ def test_dot_attention():
     class _Net(rf.Module):
         def __call__(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
-            kv_axis = Dim(None, name=f"kv-axis")
+            kv_axis = Dim(None, name="kv-axis")
             k, _ = rf.replace_dim(k, in_dim=time_dim, out_dim=kv_axis)
             v, _ = rf.replace_dim(v, in_dim=time_dim, out_dim=kv_axis)
             return rf.dot_attention(q, k, v, axis=kv_axis, key_dim=key_dim)
@@ -604,7 +604,7 @@ def test_rel_pos_self_attention():
                     x_b = rf.gather(x, axis=batch_dim, indices=b)
                     assert batch_dim in axis.dyn_size_ext.dims  # current assumption...
                     seq_len = rf.gather(axis.dyn_size_ext, axis=batch_dim, indices=b)
-                    axis_b = Dim(seq_len)
+                    axis_b = Dim(seq_len, name=f"time_b{b}")
                     # Note: The current order (replace_dim and then slice) is somewhat dependent
                     # on the current internal behavior of gather and replace_dim,
                     # which might change at some point...

returnn 1.20250828.2732__tar.gz → 1.20250829.151916__tar.gz

Potentially problematic release.

returnn 1.20250828.2732tar.gz → 1.20250829.151916tar.gz