PyPI - returnn - Versions diffs - 1.20250228.104237__tar.gz → 1.20250304.101951__tar.gz - Mend

returnn 1.20250228.104237tar.gz → 1.20250304.101951tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (476) hide show

{returnn-1.20250228.104237/returnn.egg-info → returnn-1.20250304.101951}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250228.104237
+Version: 1.20250304.101951
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20250304.101951/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20250304.101951'
2	+ long_version = '1.20250304.101951+git.0fa434e'

{returnn-1.20250228.104237 → returnn-1.20250304.101951}/returnn/frontend/_backend.py RENAMED Viewed

@@ -1223,7 +1223,7 @@ class Backend(Generic[T]):
         out_spatial_dims: Optional[Sequence[Dim]] = None,
         filter: Tensor,
         filter_size: Sequence[Dim],  # to have the order well-defined
-        padding: str,
+        padding: Union[str, int, Sequence[int]],
         strides: Optional[Union[int, Sequence[int]]] = None,
         dilation_rate: Optional[Union[int, Sequence[int]]] = None,
         groups: Optional[int] = None,
@@ -1258,7 +1258,7 @@ class Backend(Generic[T]):
         *,
         mode: str,
         pool_size: Sequence[int],
-        padding: str = "valid",
+        padding: Union[str, int, Sequence[int]] = "valid",
         dilation_rate: Union[Sequence[int], int] = 1,
         strides: Sequence[int],
         in_spatial_dims: Sequence[Dim],

{returnn-1.20250228.104237 → returnn-1.20250304.101951}/returnn/frontend/conv.py RENAMED Viewed

@@ -181,15 +181,46 @@ def conv(
     in_spatial_dims: Sequence[Dim],
     out_spatial_dims: Optional[Sequence[Dim]] = None,
     filter: Tensor,
-    filter_size: Sequence[Dim],  # to have the order well-defined
-    padding: str,
+    filter_size: Sequence[Dim],
+    padding: Union[str, int, Sequence[int]],
     strides: Optional[Union[int, Sequence[int]]] = None,
     dilation_rate: Optional[Union[int, Sequence[int]]] = None,
     groups: Optional[int] = None,
     bias: Optional[Tensor] = None,
     use_mask: Optional[bool] = None,
 ) -> Tuple[Tensor, Sequence[Dim]]:
-    """convolution"""
+    """
+    Generic N-D convolution.
+    :param source:
+    :param in_dim: input channels
+    :param out_dim: output channels
+    :param in_spatial_dims: On what dimensions to operate on.
+        The number of specified dims (1, 2 or 3) specifies whether this is 1D, 2D or 3D convolution.
+        The order is consistent with the order of the ``filter_size``, ``strides``, etc.
+    :param out_spatial_dims:
+    :param filter:
+    :param filter_size: defines the order of dims in ``filter``
+        such that it matches the order of ``in_spatial_dims``.
+    :param padding: "valid" or "same" or int. "valid" is like padding=0.
+        padding="same" will pad such that the output has the same spatial dimensions as the input
+        (in case of stride=1), or otherwise ceildiv(input, stride).
+        The specific padding in padding="same" with stride>1 has changed with behavior version >=24
+        (or global config option ``rf_use_consistent_same_padding``)
+        and is now consistent independent of dimension size.
+        See :func:`_consistent_same_padding` for more details.
+    :param strides: the default (if it is None) is 1
+    :param dilation_rate:
+    :param groups:
+    :param bias:
+    :param use_mask: Whether to mask the input tensor based on seq lengths
+        such that the padding in the padded tensor is ignored
+        (it will mask with 0).
+        With behavior version >=23, this is enabled by default,
+        or configured with global config option ``rf_use_mask``.
+        (Also see :func:`use_mask_default`).
+    :return: out, out_spatial_dims
+    """
     if any(in_spatial_dim.need_masking() for in_spatial_dim in in_spatial_dims):
         if use_mask is None:
             use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22)
@@ -198,6 +229,10 @@ def conv(
     for in_spatial_dim in in_spatial_dims:
         if in_spatial_dim not in source.dims:
             raise ValueError(f"conv: source {source} does not have spatial dim {in_spatial_dim}")
+    if padding == "same" and _any_is_non_default(strides, default=1) and _should_use_consistent_same_padding():
+        source, in_spatial_dims, padding = _consistent_same_padding(
+            source, in_spatial_dims=in_spatial_dims, filter_size=filter_size, dilation_rate=dilation_rate, pad_value=0
+        )
     # noinspection PyProtectedMember
     out, out_spatial_dims = source._raw_backend.conv(
         source,
@@ -359,6 +394,9 @@ def transposed_conv(
             use_mask = rf.use_mask_default(default=True, default_false_for_behavior_version_up_to=22)
         if use_mask:
             source = source.copy_masked(0, dims=in_spatial_dims)
+    if padding == "same" and _any_is_non_default(strides, default=1) and _should_use_consistent_same_padding():
+        # I don't really know what this should mean here... Investigate this further...
+        raise NotImplementedError("consistent same padding not implemented for transposed conv")
     # noinspection PyProtectedMember
     out, out_spatial_dims = source._raw_backend.transposed_conv(
         source=source,
@@ -409,7 +447,7 @@ def pool(
     nd: Optional[int] = None,
     mode: str,
     pool_size: Union[Sequence[int], int],
-    padding: str = "valid",
+    padding: Union[str, int, Sequence[int]] = "valid",
     dilation_rate: Union[Sequence[int], int] = 1,
     strides: Optional[Union[Sequence[int], int]] = None,
     in_spatial_dims: Union[Sequence[Dim], Dim],
@@ -417,19 +455,29 @@ def pool(
     use_mask: Optional[bool] = None,
 ) -> Tuple[Tensor, Sequence[Dim]]:
     """
-    A generic N-D pooling layer.
-    This would usually be done after a convolution for down-sampling.
+    Generic N-D pooling.
     :param source:
     :param nd:
     :param mode: "max" or "avg"
     :param pool_size: shape of the window of each reduce
-    :param padding: "valid" or "same"
+    :param padding: "valid" or "same" or int. "valid" is like padding=0.
+        padding="same" will pad such that the output has the same spatial dimensions as the input
+        (in case of stride=1), or otherwise ceildiv(input, stride).
+        The specific padding in padding="same" with stride>1 has changed with behavior version >=24
+        (or global config option ``rf_use_consistent_same_padding``)
+        and is now consistent independent of dimension size.
+        See :func:`_consistent_same_padding` for more details.
     :param dilation_rate:
-    :param strides: in contrast to tf.nn.pool, the default (if it is None) will be set to pool_size
+    :param strides: the default (if it is None) will be set to pool_size (in contrast to :func:`conv`)
     :param in_spatial_dims:
     :param out_spatial_dims:
-    :param use_mask:
+    :param use_mask: Whether to mask the input tensor based on seq lengths
+        such that the padding in the padded tensor is ignored
+        (for max-pooling, it will mask with -inf, for avg-pooling with 0).
+        With behavior version >=23, this is enabled by default,
+        or configured with global config option ``rf_use_mask``.
+        (Also see :func:`use_mask_default`).
     :return: out, out_spatial_dims
     """
     if isinstance(in_spatial_dims, Dim):
@@ -451,8 +499,7 @@ def pool(
         strides = pool_size
     elif isinstance(strides, int):
         strides = [strides] * nd
-    assert isinstance(strides, (list, tuple))
-    assert len(strides) == nd
+    assert isinstance(strides, (list, tuple)) and len(strides) == nd and all(isinstance(s, int) for s in strides)
     if any(in_spatial_dim.need_masking() for in_spatial_dim in in_spatial_dims):
         if use_mask is None:
@@ -462,6 +509,15 @@ def pool(
     else:
         use_mask = False
+    if padding == "same" and _any_is_non_default(strides, default=1) and _should_use_consistent_same_padding():
+        source, in_spatial_dims, padding = _consistent_same_padding(
+            source,
+            in_spatial_dims=in_spatial_dims,
+            filter_size=pool_size,
+            dilation_rate=dilation_rate,
+            pad_value={"max": float("-inf"), "avg": 0}[mode],
+        )
     # noinspection PyProtectedMember
     out, out_spatial_dims = source._raw_backend.pool(
         source=source,
@@ -642,7 +698,7 @@ def make_conv_out_spatial_dims(
     in_spatial_dims: Sequence[Dim],
     *,
     filter_size: Union[Sequence[Union[int, Dim]], int, Dim],
-    padding: str,
+    padding: Union[str, int, Sequence[int]],
     strides: Union[Sequence[int], int] = 1,
     dilation_rate: Union[Sequence[int], int] = 1,
     description_prefix: Optional[str] = None,
@@ -658,11 +714,15 @@ def make_conv_out_spatial_dims(
     if isinstance(dilation_rate, int):
         dilation_rate = [dilation_rate] * nd
     assert nd == len(in_spatial_dims) == len(filter_size) == len(strides) == len(dilation_rate)
-    assert padding.lower() in ("valid", "same")
+    if isinstance(padding, (int, str)):
+        padding = [padding] * nd
+    padding = [p.lower() if isinstance(p, str) else p for p in padding]
     out_spatial_dims = []
     for i in range(nd):
         in_spatial_dim = in_spatial_dims[i]
-        if filter_size[i] == strides[i] == 1 or (strides[i] == 1 and padding.lower() == "same"):
+        if (filter_size[i] == strides[i] == 1 and padding[i] in ("valid", "same", 0)) or (
+            strides[i] == 1 and padding[i] == "same"
+        ):
             out_spatial_dims.append(in_spatial_dim)
         else:
             out_spatial_dim = _calc_out_dim(
@@ -670,7 +730,7 @@ def make_conv_out_spatial_dims(
                 filter_size=filter_size[i],
                 stride=strides[i],
                 dilation_rate=dilation_rate[i],
-                padding=padding,
+                padding=padding[i],
             )
             assert isinstance(out_spatial_dim, Dim)
             if description_prefix and out_spatial_dim != in_spatial_dim:
@@ -681,7 +741,7 @@ def make_conv_out_spatial_dims(
                     filter_size=filter_size[i],
                     stride=strides[i],
                     dilation_rate=dilation_rate[i],
-                    padding=padding,
+                    padding=padding[i],
                 )
             out_spatial_dims.append(out_spatial_dim)
     return out_spatial_dims
@@ -695,7 +755,7 @@ def _calc_out_dim(in_dim, filter_size, stride, padding, dilation_rate=1):
     :param int filter_size: e.g. 2, for the corresponding axis
     :param int stride: e.g. 1, for the corresponding axis
     :param int dilation_rate: e.g. 1
-    :param str padding: "valid" or "same"
+    :param str|int padding: "valid" or "same" or int
     :return: the output dimension
     :rtype: T
     """
@@ -712,13 +772,16 @@ def _calc_out_dim(in_dim, filter_size, stride, padding, dilation_rate=1):
             return rf.ceil_divide(a, b)
         return -(-a // b)
-    padding = padding.upper()
+    padding = padding.lower() if isinstance(padding, str) else padding
     # See tf.compat.v1.nn.convolution() documentation for more.
-    if padding == "SAME":
+    if padding == "same":
         if isinstance(in_dim, Dim):
             return in_dim.ceildiv_right(stride)
         return ceildiv(in_dim, stride)
-    elif padding == "VALID":
+    elif padding == "valid" or isinstance(padding, int):
+        if isinstance(padding, int) and padding != 0:
+            assert padding > 0
+            in_dim = padding + in_dim + padding
         if isinstance(in_dim, Dim):
             filter_left_dilated = (filter_size - 1) * dilation_rate // 2
             filter_right_dilated = (filter_size - 1) * dilation_rate - filter_left_dilated
@@ -726,4 +789,95 @@ def _calc_out_dim(in_dim, filter_size, stride, padding, dilation_rate=1):
             return valid_part.ceildiv_right(stride)
         return ceildiv(in_dim - (filter_size - 1) * dilation_rate, stride)
     else:
-        raise Exception("invalid padding %r" % padding)
+        raise ValueError(f"invalid padding {padding!r} (type {type(padding).__name__})")
+def _should_use_consistent_same_padding() -> bool:
+    """
+    :return: whether to use the new consistent same padding with :func:`_consistent_same_padding`.
+    This is only needed for the case when we have striding and padding="same".
+    See :func:`_consistent_same_padding` for more details.
+    Check the global RETURNN config for the ``rf_use_consistent_same_padding``
+    on how we should handle the ``padding="same"`` case for convolution/pooling when there is striding.
+    If that is not specified, with behavior version >=24, we will use the new consistent same padding,
+    with behavior version <=23, we will not use it.
+    See issue `#1693 <https://github.com/rwth-i6/returnn/issues/1693>`__.
+    """
+    from returnn.config import get_global_config
+    config = get_global_config(raise_exception=False)
+    config_value = None
+    if config:
+        if "rf_use_consistent_same_padding" in config.typed_dict:
+            config_value = config.typed_dict["rf_use_consistent_same_padding"]
+            assert config_value is None or isinstance(config_value, bool)
+        elif "rf_use_consistent_same_padding" in config.dict:
+            config_value = config.bool("rf_use_consistent_same_padding", None)
+    if config_value is not None:
+        return config_value
+    from returnn.util.basic import BehaviorVersion
+    return BehaviorVersion.get() >= 24
+def _consistent_same_padding(
+    source: Tensor,
+    *,
+    in_spatial_dims: Sequence[Dim],
+    filter_size: Optional[Union[int, Dim, Sequence[int], Sequence[Dim]]],
+    dilation_rate: Optional[Union[int, Sequence[int]]] = None,
+    pad_value: Union[int, float],
+) -> Tuple[Tensor, Sequence[Dim], Union[int, Sequence[int]]]:
+    """
+    In case of striding and padding="same", the standard padding that we do (following TensorFlow)
+    depends on the current dimension size.
+    It adds padding left and right such that the first and last window
+    will have the same amount of padding (+-1).
+    With stride=1, this is the standard (filter_size-1)/2 left and right padding,
+    but with stride>1, this is not the case anymore.
+    (See also the explanation and calculation of padding in :func:`returnn.torch.frontend._backend.TorchBackend.conv`.)
+    However, the problem with this behavior is with batching:
+    The padding now depends on the longest sequence in the batch,
+    and thus is arbitrary for any of the other sequences.
+    The new consistent same padding adds padding independent of the current dimension size (largest seq in batch).
+    We just do the same as with stride=1, i.e. (filter_size-1)/2 left and right padding.
+    :return: source or padded source, in_spatial_dims or new in_spatial_dims, new padding on top of the output
+    """
+    filter_size = _make_sequence(filter_size or 1, nd=len(in_spatial_dims))
+    dilation_rate = _make_sequence(dilation_rate or 1, nd=len(in_spatial_dims))
+    filter_size_ints = [s.dimension if isinstance(s, Dim) else s for s in filter_size]
+    if all(s % 2 == 1 for s in filter_size_ints):
+        # In this case, we can pass padding as integer to the backend, so that it adds the same padding left/right.
+        return source, in_spatial_dims, [(s // 2) * d for s, d in zip(filter_size_ints, dilation_rate)]
+    # Need to use the custom padding here.
+    paddings = []
+    for s, d in zip(filter_size, dilation_rate):
+        pad_left = (s - 1) * d // 2
+        pad_right = (s - 1) * d - pad_left
+        paddings.append((pad_left, pad_right))
+    # We expect that masking was already done before (or we don't care about it), thus handle_dynamic_dims=False.
+    source, in_spatial_dims = rf.pad(
+        source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False
+    )
+    return source, in_spatial_dims, 0
+def _make_sequence(value: Union[int, Sequence[int]], *, nd: int) -> Sequence[int]:
+    if isinstance(value, int):
+        return [value] * nd
+    assert len(value) == nd
+    return value
+def _any_is_non_default(single_or_seq: Optional[Union[int, Sequence[int]]], *, default: int) -> bool:
+    if single_or_seq is None:
+        return False
+    if isinstance(single_or_seq, int):
+        return single_or_seq != default
+    return any(i != default for i in single_or_seq)

{returnn-1.20250228.104237 → returnn-1.20250304.101951}/returnn/tensor/_dim_extra.py RENAMED Viewed

@@ -1271,9 +1271,9 @@ class _DimMixin:
             if self.batch:
                 x_dim = x_dim.get_for_batch_ctx(self.batch, self.control_flow_ctx)
             x_dim.complete_dyn_size(template_only=template_only, _backend=backend)
-            if x_dim.dyn_size_ext is None and not x_dim.dimension:
+            if x_dim.dyn_size_ext is None and x_dim.dimension is None:
                 return
-            y = _bin_op(y, x_dim.dimension or x_dim.dyn_size_ext)
+            y = _bin_op(y, x_dim.dimension if x_dim.dimension is not None else x_dim.dyn_size_ext)
             if not template_only and y.raw_tensor is not None:
                 y_max_value = _bin_op(y_max_value, x_dim.get_dim_value_tensor())
         assert y is not None, f"op {op}?"

{returnn-1.20250228.104237 → returnn-1.20250304.101951}/returnn/tf/frontend_layers/_backend.py RENAMED Viewed

@@ -998,7 +998,7 @@ class ReturnnLayersBackend(Backend[Layer]):
         out_spatial_dims: Optional[Sequence[Dim]] = None,
         filter: Tensor,
         filter_size: Sequence[Dim],  # to have the order well-defined
-        padding: str,
+        padding: Union[str, int, Sequence[int]],
         strides: Optional[Union[int, Sequence[int]]] = None,
         dilation_rate: Optional[Union[int, Sequence[int]]] = None,
         groups: Optional[int] = None,
@@ -1088,7 +1088,7 @@ class ReturnnLayersBackend(Backend[Layer]):
         *,
         mode: str,
         pool_size: Sequence[int],
-        padding: str = "valid",
+        padding: Union[str, int, Sequence[int]] = "valid",
         dilation_rate: Union[Sequence[int], int] = 1,
         strides: Sequence[int],
         in_spatial_dims: Sequence[Dim],

returnn 1.20250228.104237__tar.gz → 1.20250304.101951__tar.gz

Potentially problematic release.

returnn 1.20250228.104237tar.gz → 1.20250304.101951tar.gz