PyPI - returnn - Versions diffs - 1.20240924.10718__tar.gz → 1.20240925.224259__tar.gz - Mend

returnn 1.20240924.10718tar.gz → 1.20240925.224259tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (464) hide show

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240924.10718
+Version: 1.20240925.224259
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20240925.224259/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20240925.224259'
2	+ long_version = '1.20240925.224259+git.f199967'

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/returnn/datasets/postprocessing.py RENAMED Viewed

@@ -304,16 +304,31 @@ class LaplaceOrdering(Callable[[Iterator[TensorDict]], Iterator[TensorDict]]):
         iterator = iter(iterator)
         is_down_phase = False
+        seq_buffer = list(islice(iterator, self.num_seqs_per_bin))
         while True:
-            seq_buffer = list(islice(iterator, self.num_seqs_per_bin))
             seq_buffer.sort(key=self._get_seq_len, reverse=is_down_phase)
-            yield from seq_buffer
-            is_down_phase = not is_down_phase
+            next_seq_buffer = []
+            has_ended = False
+            # Yield items to trainer while gradually pulling more data from PP function.
+            # This optimizes CPU load when multiple workers are used.
+            for item in seq_buffer:
+                yield item
+                try:
+                    if not has_ended:
+                        next_seq_buffer.append(next(iterator))
+                except StopIteration:
+                    has_ended = True
             if len(seq_buffer) < self.num_seqs_per_bin:
+                assert has_ended and not next_seq_buffer
                 break
+            is_down_phase = not is_down_phase
+            seq_buffer = next_seq_buffer
     def _get_seq_len(self, tdict: TensorDict) -> int:
         """
         :return: segment length of the segment in `tdict` as measured by `self.length_key` for comparison.

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/returnn/frontend/_backend.py RENAMED Viewed

@@ -921,6 +921,8 @@ class Backend(Generic[T]):
         *,
         indices: Tensor,
         indices_dim: Union[Dim, Sequence[Dim]],
+        mode: str,
+        fill_value: Union[int, float],
         out_dim: Union[Dim, Sequence[Dim]],
     ) -> Tensor:
         """
@@ -932,6 +934,8 @@ class Backend(Generic[T]):
         :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
         :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
         :param indices_dim:
+        :param mode: "sum" or "max" or "min"
+        :param fill_value:
         :param out_dim:
         :return: [batch_dims..., out_dim, feature_dims...]
         """
@@ -983,6 +987,11 @@ class Backend(Generic[T]):
         """
         raise NotImplementedError
+    @staticmethod
+    def is_finite(x: Tensor) -> Tensor:
+        """is finite"""
+        raise NotImplementedError
     @staticmethod
     def clip_by_value(
         x: Tensor,

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/returnn/frontend/array_.py RENAMED Viewed

@@ -24,6 +24,7 @@ __all__ = [
     "reshape",
     "split",
     "expand_dim",
+    "expand_dims",
     "squeeze",
     "window",
     "concat",
@@ -37,6 +38,7 @@ __all__ = [
     "pack_padded",
     "gather",
     "scatter",
+    "scatter_argmax",
     "slice",
     "shift_right",
     "reverse_sequence",
@@ -48,7 +50,7 @@ __all__ = [
 def convert_to_tensor(
-    value: Union[Tensor, T, RawTensorTypes],
+    value: Union[Tensor, T, RawTensorTypes, list, tuple],
     *,
     dims: Sequence[Dim] = None,
     dtype: Optional[str] = None,
@@ -73,6 +75,8 @@ def convert_to_tensor(
     """
     if isinstance(value, Tensor):  # fast path
         return value
+    if isinstance(value, (tuple, list)):
+        value = numpy.array(value, dtype=dtype)
     if dims is None and shape is not None:
         dims = shape  # old code
     if isinstance(value, (int, float, complex, bool, str, numpy.number)):
@@ -257,6 +261,15 @@ def expand_dim(source: Tensor, dim: Dim) -> Tensor:
     return source._raw_backend.expand_dim(source, dim=dim)
+def expand_dims(source: Tensor, dims: Sequence[Dim]) -> Tensor:
+    """
+    Expand multiple dims, via :func:`expand_dim`.
+    """
+    for dim in dims:
+        source = expand_dim(source, dim)
+    return source
 def squeeze(source: Tensor, axis: Dim) -> Tensor:
     """
     Removes the axis with dimension of extend 1 from the source.
@@ -680,17 +693,24 @@ def scatter(
     *,
     indices: Tensor,
     indices_dim: Union[Dim, Sequence[Dim]],
+    mode: str = "sum",
+    fill_value: Optional[Union[int, float]] = None,
     out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
 ) -> Tensor:
     """
     Scatters into new zero-tensor.
     If entries in indices are duplicated, the corresponding values in source will be added together
-    (scatter_add in PyTorch).
+    (scatter_add in PyTorch)
+    with mode=="sum",
+    or otherwise it will take the max/min.
     (TF segment_sum can be implemented via this.)
     :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
     :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
     :param indices_dim:
+    :param mode: "sum" or "max" or "min". also see :func:`scatter_argmax`.
+    :param fill_value:
     :param out_dim: The indices target dim.
         If not given, will be automatically determined as the sparse_dim from indices.
         If multiple out dims, use indices into the merged out dims,
@@ -700,8 +720,81 @@ def scatter(
     if not out_dim:
         assert isinstance(indices, Tensor) and indices.sparse_dim
         out_dim = indices.sparse_dim
+    if fill_value is None:
+        if mode == "sum":
+            fill_value = 0
+        elif mode == "max":
+            if "int" in source.dtype:
+                import numpy
+                fill_value = numpy.iinfo(source.raw_tensor.dtype).min
+            else:
+                fill_value = float("-inf")
+        elif mode == "min":
+            if "int" in source.dtype:
+                import numpy
+                fill_value = numpy.iinfo(source.raw_tensor.dtype).max
+            else:
+                fill_value = float("inf")
+        else:
+            raise ValueError(f"scatter: invalid mode {mode!r}")
     # noinspection PyProtectedMember
-    return source._raw_backend.scatter(source, indices=indices, indices_dim=indices_dim, out_dim=out_dim)
+    return source._raw_backend.scatter(
+        source, indices=indices, indices_dim=indices_dim, mode=mode, fill_value=fill_value, out_dim=out_dim
+    )
+def scatter_argmax(
+    source: Tensor,
+    *,
+    indices: Tensor,
+    indices_dim: Union[Dim, Sequence[Dim]],
+    invalid_idx: int = -1,
+    out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
+) -> Tensor:
+    """
+    Get the index in src which has the max value for each index in index.
+    This is like :func:`scatter` with ``mode="argmax"``.
+    :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
+    :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
+    :param indices_dim:
+    :param invalid_idx: in case some of the output entries are never set (via ``indices``),
+        this will be used as the value.
+    :param out_dim: The indices target dim.
+    :return: [batch_dims..., out_dim(s)..., feature_dims...]
+    """
+    import numpy
+    if not out_dim:
+        assert isinstance(indices, Tensor) and indices.sparse_dim
+        out_dim = indices.sparse_dim
+    # For the shape comments, use [B,I,F] for shorter source, [B,O,F] for shorter output.
+    # use scatter to get the max value for each index
+    out_max = rf.scatter(source, indices=indices, indices_dim=indices_dim, mode="max", out_dim=out_dim)  # [B,O,F]
+    src_max = rf.gather(out_max, indices=indices, axis=out_dim)  # [B,I,F] -> max value or invalid_value
+    max_invalid_idx = numpy.iinfo(indices.dtype).max
+    # then use gather to get the max value back to src.
+    # then mask the src with the max value.
+    src_max_mask = src_max == source
+    src_max_mask = src_max_mask.copy_masked(False)
+    src_indices = rf.where(
+        src_max_mask, rf.range_over_dim(indices_dim, dtype=indices.dtype, device=source.device), max_invalid_idx
+    )  # [B,I,F] -> I
+    # now scatter the min of src_indices into tensor
+    out = rf.scatter(
+        src_indices, indices=indices, indices_dim=indices_dim, mode="min", fill_value=invalid_idx, out_dim=out_dim
+    )  # [B,O,F] -> I or invalid_idx or max_invalid_idx
+    if max_invalid_idx != invalid_idx:
+        out = rf.where(out != max_invalid_idx, out, invalid_idx)  # [B,O,F] -> I or invalid_idx
+    return out
 # noinspection PyShadowingBuiltins

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/returnn/frontend/decoder/transformer.py RENAMED Viewed

@@ -441,6 +441,7 @@ class FeedForwardGated(rf.Module):
         ff_dim: Optional[Union[Dim, int]] = NotSpecified,
         dropout: float = 0.1,
         activation: Union[Callable[[Tensor], Tensor], Dict[str, Any], rf.Module] = rf.swish,
+        gate_activation: Union[Callable[[Tensor], Tensor], Dict[str, Any], rf.Module] = rf.identity,
         with_bias: bool = False,
     ):
         """
@@ -474,11 +475,18 @@ class FeedForwardGated(rf.Module):
             activation = rf.build_from_dict(activation)
         elif not callable(activation):
             raise TypeError(f"{self}: unexpected activation type {activation!r}")
+        if gate_activation is NotSpecified:
+            gate_activation = rf.identity
+        elif isinstance(gate_activation, dict):
+            gate_activation = rf.build_from_dict(gate_activation)
+        elif not callable(gate_activation):
+            raise TypeError(f"{self}: unexpected gate_activation type {gate_activation!r}")
         self.out_dim = out_dim
         self.dropout = dropout
         self.dropout_broadcast = rf.dropout_broadcast_default()
         self.activation = activation
+        self.gate_activation = gate_activation
         # Factor 2 because we concatenate the two paths.
         self.linear_ff = rf.Linear(out_dim, 2 * ff_dim, with_bias=with_bias)
@@ -488,7 +496,7 @@ class FeedForwardGated(rf.Module):
         """forward"""
         x_ff1 = self.linear_ff(inp)
         x_ff1a, x_ff1b = rf.split(x_ff1, axis=self.linear_ff.out_dim, out_dims=[self.linear_out.in_dim] * 2)
-        x_act = self.activation(x_ff1a) * x_ff1b
+        x_act = self.activation(x_ff1a) * self.gate_activation(x_ff1b)
         x_drop = rf.dropout(x_act, self.dropout, axis=self.dropout_broadcast and self.linear_out.in_dim)
         x_ff2 = self.linear_out(x_drop)
         return x_ff2

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/returnn/frontend/math_.py RENAMED Viewed

@@ -37,6 +37,7 @@ __all__ = [
     "logical_not",
     "opt_logical_or",
     "opt_logical_and",
+    "is_finite",
     "maximum",
     "minimum",
     "clip_by_value",
@@ -361,6 +362,12 @@ def opt_logical_and(a: Union[Tensor, bool], b: Union[Tensor, bool]) -> Union[Ten
     return combine(a, "logical_and", b)
+def is_finite(a: Tensor) -> Tensor:
+    """is finite"""
+    # noinspection PyProtectedMember
+    return a._raw_backend.is_finite(a)
 def maximum(a: Tensor, b: Union[Tensor, _RawTensorTypes], *other_tensors) -> Tensor:
     """maximum"""
     if not other_tensors:

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/returnn/tf/frontend_low_level/_backend.py RENAMED Viewed

@@ -554,6 +554,14 @@ class TFBackend(Backend[tf.Tensor]):
             out_data.raw_tensor = y
             return out_data
+    @staticmethod
+    def is_finite(x: Tensor) -> Tensor:
+        """is finite"""
+        out = x.copy_template("is_finite", dtype="bool")
+        with tf_util.same_control_flow_ctx(x):
+            out.raw_tensor = tf.math.is_finite(x.raw_tensor)
+        return out
     @staticmethod
     def clip_by_value(
         x: Tensor,

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/returnn/torch/frontend/_backend.py RENAMED Viewed

@@ -1015,6 +1015,8 @@ class TorchBackend(Backend[torch.Tensor]):
         *,
         indices: Tensor,
         indices_dim: Union[Dim, Sequence[Dim]],
+        mode: str,
+        fill_value: Union[int, float],
         out_dim: Union[Dim, Sequence[Dim]],
     ) -> Tensor:
         """
@@ -1026,6 +1028,8 @@ class TorchBackend(Backend[torch.Tensor]):
         :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
         :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
         :param indices_dim:
+        :param mode: "sum", "max", "min"
+        :param fill_value:
         :param out_dim:
         :return: [batch_dims..., out_dim, feature_dims...]
         """
@@ -1065,8 +1069,29 @@ class TorchBackend(Backend[torch.Tensor]):
         )
         out_dims = batch_dims + [out_flat_dim] + feature_dims
         out_shape = [d.get_dim_value() for d in out_dims]
-        out_raw = torch.zeros(out_shape, dtype=source.raw_tensor.dtype, device=source.raw_tensor.device)
-        out_raw.scatter_add_(dim=len(batch_dims), index=indices.raw_tensor.to(torch.int64), src=source.raw_tensor)
+        if mode == "sum" and isinstance(fill_value, (int, float)) and fill_value == 0:
+            out_raw = torch.zeros(out_shape, dtype=source.raw_tensor.dtype, device=source.raw_tensor.device)
+            out_raw.scatter_add_(dim=len(batch_dims), index=indices.raw_tensor.to(torch.int64), src=source.raw_tensor)
+        elif mode == "sum":
+            out_raw = torch.full(out_shape, fill_value, dtype=source.raw_tensor.dtype, device=source.raw_tensor.device)
+            out_raw.scatter_reduce_(
+                dim=len(batch_dims),
+                index=indices.raw_tensor.to(torch.int64),
+                src=source.raw_tensor,
+                reduce="sum",
+                include_self=False,
+            )
+        elif mode in ("max", "min"):
+            out_raw = torch.full(out_shape, fill_value, dtype=source.raw_tensor.dtype, device=source.raw_tensor.device)
+            out_raw.scatter_reduce_(
+                dim=len(batch_dims),
+                index=indices.raw_tensor.to(torch.int64),
+                src=source.raw_tensor,
+                reduce="a" + mode,
+                include_self=False,
+            )
+        else:
+            raise ValueError(f"scatter: mode {mode!r} not supported")
         res = Tensor(
             "scatter",
             dims=out_dims,
@@ -1128,8 +1153,14 @@ class TorchBackend(Backend[torch.Tensor]):
         allow_broadcast_all_sources: bool = False,
     ) -> Tensor:
         """where"""
-        true_ = rf.convert_to_tensor(true_, _backend=TorchBackend, device=cond.device)
-        false_ = rf.convert_to_tensor(false_, _backend=TorchBackend, device=cond.device)
+        if isinstance(true_, Tensor):
+            dtype = true_.dtype
+        elif isinstance(false_, Tensor):
+            dtype = false_.dtype
+        else:
+            dtype = None
+        true_ = rf.convert_to_tensor(true_, _backend=TorchBackend, dtype=dtype, device=cond.device)
+        false_ = rf.convert_to_tensor(false_, _backend=TorchBackend, dtype=dtype, device=cond.device)
         out = Tensor.get_common_data(
             [true_, false_, cond], allow_broadcast_all_sources=allow_broadcast_all_sources, name="where"
         )
@@ -1174,6 +1205,13 @@ class TorchBackend(Backend[torch.Tensor]):
         out.raw_tensor = out_raw
         return out
+    @staticmethod
+    def is_finite(x: Tensor) -> Tensor:
+        """is finite"""
+        out = x.copy_template("is_finite", dtype="bool")
+        out.raw_tensor = torch.isfinite(x.raw_tensor)
+        return out
     @staticmethod
     def clip_by_value(
         x: Tensor,

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/returnn.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240924.10718
+Version: 1.20240925.224259
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20240924.10718 → returnn-1.20240925.224259}/tests/test_rf_array.py RENAMED Viewed

@@ -373,6 +373,38 @@ def test_gather_time_static_clip_to_valid():
     run_model(extern_data_template, lambda *, epoch, step: rf.Module(), _forward_step)
+def test_scatter_fill_inf():
+    batch_dim_ = Dim(3, name="batch")
+    states_dim = Dim(7, name="states")
+    def _forward_step(**_kwargs):
+        start_states = rf.convert_to_tensor(
+            [2, 4, 5], name="start_states", dims=[batch_dim_], sparse_dim=states_dim, dtype="int32"
+        )
+        batch_dim_.get_size_tensor().mark_as_output("batch_size", shape=[])
+        start_states.mark_as_output("start_states", shape=[batch_dim_])
+        scores = rf.scatter(
+            rf.zeros([batch_dim_]),
+            indices=start_states,
+            indices_dim=[batch_dim_],
+            fill_value=float("-inf"),
+        )  # [S], per state
+        scores.mark_as_default_output(shape=[states_dim])
+    res = run_model(TensorDict(), lambda *, epoch, step: rf.Module(), _forward_step, test_tensorflow=False)
+    batch_size = res["batch_size"].raw_tensor.item()
+    assert res["start_states"].raw_tensor.shape == (batch_size,)
+    assert res["output"].raw_tensor.shape == (states_dim.dimension,)
+    assert res["output"].raw_tensor.tolist().count(0.0) == batch_size
+    assert res["output"].raw_tensor.tolist().count(float("-inf")) == states_dim.dimension - batch_size
+    assert states_dim.dimension > batch_size
+    for i in range(states_dim.dimension):
+        if i in res["start_states"].raw_tensor:
+            assert res["output"].raw_tensor[i] == 0.0
+        else:
+            assert res["output"].raw_tensor[i] == float("-inf")
 def test_slice():
     time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
     in_dim = Dim(7, name="in")