PyPI - returnn - Versions diffs - 1.20240925.152757__tar.gz → 1.20240926.134856__tar.gz - Mend

returnn 1.20240925.152757tar.gz → 1.20240926.134856tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (464) hide show

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240925.152757
+Version: 1.20240926.134856
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20240926.134856/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20240926.134856'
2	+ long_version = '1.20240926.134856+git.1fcaadd'

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/datasets/audio.py RENAMED Viewed

@@ -63,7 +63,9 @@ class OggZipDataset(CachedDataset2):
         :param str|list[str]|((str)->str)|None targets_post_process: :func:`get_post_processor_function`,
             applied on orth
         :param bool use_cache_manager: uses :func:`returnn.util.basic.cf`
-        :param str|None segment_file: .txt or .gz text file containing sequence tags that will be used as whitelist
+        :param str|None segment_file: .txt or .gz text file containing sequence tags that will be used as whitelist.
+            Note: This is somewhat deprecated, as we also support ``seq_list_filter_file`` (via the base class),
+            which does the same but more universally.
         :param bool zip_audio_files_have_name_as_prefix:
         :param float|int|None fixed_random_subset:
           Value in [0,1] to specify the fraction, or integer >=1 which specifies number of seqs.

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/frontend/_backend.py RENAMED Viewed

@@ -921,6 +921,8 @@ class Backend(Generic[T]):
         *,
         indices: Tensor,
         indices_dim: Union[Dim, Sequence[Dim]],
+        mode: str,
+        fill_value: Union[int, float],
         out_dim: Union[Dim, Sequence[Dim]],
     ) -> Tensor:
         """
@@ -932,6 +934,8 @@ class Backend(Generic[T]):
         :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
         :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
         :param indices_dim:
+        :param mode: "sum" or "max" or "min"
+        :param fill_value:
         :param out_dim:
         :return: [batch_dims..., out_dim, feature_dims...]
         """
@@ -983,6 +987,11 @@ class Backend(Generic[T]):
         """
         raise NotImplementedError
+    @staticmethod
+    def is_finite(x: Tensor) -> Tensor:
+        """is finite"""
+        raise NotImplementedError
     @staticmethod
     def clip_by_value(
         x: Tensor,

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/frontend/array_.py RENAMED Viewed

@@ -24,6 +24,7 @@ __all__ = [
     "reshape",
     "split",
     "expand_dim",
+    "expand_dims",
     "squeeze",
     "window",
     "concat",
@@ -37,6 +38,7 @@ __all__ = [
     "pack_padded",
     "gather",
     "scatter",
+    "scatter_argmax",
     "slice",
     "shift_right",
     "reverse_sequence",
@@ -48,7 +50,7 @@ __all__ = [
 def convert_to_tensor(
-    value: Union[Tensor, T, RawTensorTypes],
+    value: Union[Tensor, T, RawTensorTypes, list, tuple],
     *,
     dims: Sequence[Dim] = None,
     dtype: Optional[str] = None,
@@ -73,6 +75,8 @@ def convert_to_tensor(
     """
     if isinstance(value, Tensor):  # fast path
         return value
+    if isinstance(value, (tuple, list)):
+        value = numpy.array(value, dtype=dtype)
     if dims is None and shape is not None:
         dims = shape  # old code
     if isinstance(value, (int, float, complex, bool, str, numpy.number)):
@@ -257,6 +261,15 @@ def expand_dim(source: Tensor, dim: Dim) -> Tensor:
     return source._raw_backend.expand_dim(source, dim=dim)
+def expand_dims(source: Tensor, dims: Sequence[Dim]) -> Tensor:
+    """
+    Expand multiple dims, via :func:`expand_dim`.
+    """
+    for dim in dims:
+        source = expand_dim(source, dim)
+    return source
 def squeeze(source: Tensor, axis: Dim) -> Tensor:
     """
     Removes the axis with dimension of extend 1 from the source.
@@ -680,17 +693,24 @@ def scatter(
     *,
     indices: Tensor,
     indices_dim: Union[Dim, Sequence[Dim]],
+    mode: str = "sum",
+    fill_value: Optional[Union[int, float]] = None,
     out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
 ) -> Tensor:
     """
     Scatters into new zero-tensor.
     If entries in indices are duplicated, the corresponding values in source will be added together
-    (scatter_add in PyTorch).
+    (scatter_add in PyTorch)
+    with mode=="sum",
+    or otherwise it will take the max/min.
     (TF segment_sum can be implemented via this.)
     :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
     :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
     :param indices_dim:
+    :param mode: "sum" or "max" or "min". also see :func:`scatter_argmax`.
+    :param fill_value:
     :param out_dim: The indices target dim.
         If not given, will be automatically determined as the sparse_dim from indices.
         If multiple out dims, use indices into the merged out dims,
@@ -700,8 +720,81 @@ def scatter(
     if not out_dim:
         assert isinstance(indices, Tensor) and indices.sparse_dim
         out_dim = indices.sparse_dim
+    if fill_value is None:
+        if mode == "sum":
+            fill_value = 0
+        elif mode == "max":
+            if "int" in source.dtype:
+                import numpy
+                fill_value = numpy.iinfo(source.raw_tensor.dtype).min
+            else:
+                fill_value = float("-inf")
+        elif mode == "min":
+            if "int" in source.dtype:
+                import numpy
+                fill_value = numpy.iinfo(source.raw_tensor.dtype).max
+            else:
+                fill_value = float("inf")
+        else:
+            raise ValueError(f"scatter: invalid mode {mode!r}")
     # noinspection PyProtectedMember
-    return source._raw_backend.scatter(source, indices=indices, indices_dim=indices_dim, out_dim=out_dim)
+    return source._raw_backend.scatter(
+        source, indices=indices, indices_dim=indices_dim, mode=mode, fill_value=fill_value, out_dim=out_dim
+    )
+def scatter_argmax(
+    source: Tensor,
+    *,
+    indices: Tensor,
+    indices_dim: Union[Dim, Sequence[Dim]],
+    invalid_idx: int = -1,
+    out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
+) -> Tensor:
+    """
+    Get the index in src which has the max value for each index in index.
+    This is like :func:`scatter` with ``mode="argmax"``.
+    :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
+    :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
+    :param indices_dim:
+    :param invalid_idx: in case some of the output entries are never set (via ``indices``),
+        this will be used as the value.
+    :param out_dim: The indices target dim.
+    :return: [batch_dims..., out_dim(s)..., feature_dims...]
+    """
+    import numpy
+    if not out_dim:
+        assert isinstance(indices, Tensor) and indices.sparse_dim
+        out_dim = indices.sparse_dim
+    # For the shape comments, use [B,I,F] for shorter source, [B,O,F] for shorter output.
+    # use scatter to get the max value for each index
+    out_max = rf.scatter(source, indices=indices, indices_dim=indices_dim, mode="max", out_dim=out_dim)  # [B,O,F]
+    src_max = rf.gather(out_max, indices=indices, axis=out_dim)  # [B,I,F] -> max value or invalid_value
+    max_invalid_idx = numpy.iinfo(indices.dtype).max
+    # then use gather to get the max value back to src.
+    # then mask the src with the max value.
+    src_max_mask = src_max == source
+    src_max_mask = src_max_mask.copy_masked(False)
+    src_indices = rf.where(
+        src_max_mask, rf.range_over_dim(indices_dim, dtype=indices.dtype, device=source.device), max_invalid_idx
+    )  # [B,I,F] -> I
+    # now scatter the min of src_indices into tensor
+    out = rf.scatter(
+        src_indices, indices=indices, indices_dim=indices_dim, mode="min", fill_value=invalid_idx, out_dim=out_dim
+    )  # [B,O,F] -> I or invalid_idx or max_invalid_idx
+    if max_invalid_idx != invalid_idx:
+        out = rf.where(out != max_invalid_idx, out, invalid_idx)  # [B,O,F] -> I or invalid_idx
+    return out
 # noinspection PyShadowingBuiltins

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/frontend/decoder/transformer.py RENAMED Viewed

@@ -441,6 +441,7 @@ class FeedForwardGated(rf.Module):
         ff_dim: Optional[Union[Dim, int]] = NotSpecified,
         dropout: float = 0.1,
         activation: Union[Callable[[Tensor], Tensor], Dict[str, Any], rf.Module] = rf.swish,
+        gate_activation: Union[Callable[[Tensor], Tensor], Dict[str, Any], rf.Module] = rf.identity,
         with_bias: bool = False,
     ):
         """
@@ -474,11 +475,18 @@ class FeedForwardGated(rf.Module):
             activation = rf.build_from_dict(activation)
         elif not callable(activation):
             raise TypeError(f"{self}: unexpected activation type {activation!r}")
+        if gate_activation is NotSpecified:
+            gate_activation = rf.identity
+        elif isinstance(gate_activation, dict):
+            gate_activation = rf.build_from_dict(gate_activation)
+        elif not callable(gate_activation):
+            raise TypeError(f"{self}: unexpected gate_activation type {gate_activation!r}")
         self.out_dim = out_dim
         self.dropout = dropout
         self.dropout_broadcast = rf.dropout_broadcast_default()
         self.activation = activation
+        self.gate_activation = gate_activation
         # Factor 2 because we concatenate the two paths.
         self.linear_ff = rf.Linear(out_dim, 2 * ff_dim, with_bias=with_bias)
@@ -488,7 +496,7 @@ class FeedForwardGated(rf.Module):
         """forward"""
         x_ff1 = self.linear_ff(inp)
         x_ff1a, x_ff1b = rf.split(x_ff1, axis=self.linear_ff.out_dim, out_dims=[self.linear_out.in_dim] * 2)
-        x_act = self.activation(x_ff1a) * x_ff1b
+        x_act = self.activation(x_ff1a) * self.gate_activation(x_ff1b)
         x_drop = rf.dropout(x_act, self.dropout, axis=self.dropout_broadcast and self.linear_out.in_dim)
         x_ff2 = self.linear_out(x_drop)
         return x_ff2

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/frontend/math_.py RENAMED Viewed

@@ -37,6 +37,7 @@ __all__ = [
     "logical_not",
     "opt_logical_or",
     "opt_logical_and",
+    "is_finite",
     "maximum",
     "minimum",
     "clip_by_value",
@@ -361,6 +362,12 @@ def opt_logical_and(a: Union[Tensor, bool], b: Union[Tensor, bool]) -> Union[Ten
     return combine(a, "logical_and", b)
+def is_finite(a: Tensor) -> Tensor:
+    """is finite"""
+    # noinspection PyProtectedMember
+    return a._raw_backend.is_finite(a)
 def maximum(a: Tensor, b: Union[Tensor, _RawTensorTypes], *other_tensors) -> Tensor:
     """maximum"""
     if not other_tensors:

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/frontend/run_ctx.py RENAMED Viewed

@@ -275,8 +275,8 @@ class RunCtx:
         if dims is None and expected_output:
             dims = expected_output.dims
         if dims is not None and expected_output:
-            assert (
-                expected_output.dims == dims
+            assert expected_output.dims == tuple(
+                dims
             ), f"mark_as_output: {name!r} dims mismatch from expected output, given {dims}, expected {expected_output}"
         if not isinstance(tensor, Tensor):

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/tensor/_dim_extra.py RENAMED Viewed

@@ -2301,7 +2301,6 @@ class _DimMixin:
         value = {"dim": self.dimension}
         if self.kind is not None:
             value["kind"] = self.kind.name
-        assert self.derived_from_op is None  # not handled yet for hashing...
         return value

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/tf/frontend_low_level/_backend.py RENAMED Viewed

@@ -554,6 +554,14 @@ class TFBackend(Backend[tf.Tensor]):
             out_data.raw_tensor = y
             return out_data
+    @staticmethod
+    def is_finite(x: Tensor) -> Tensor:
+        """is finite"""
+        out = x.copy_template("is_finite", dtype="bool")
+        with tf_util.same_control_flow_ctx(x):
+            out.raw_tensor = tf.math.is_finite(x.raw_tensor)
+        return out
     @staticmethod
     def clip_by_value(
         x: Tensor,

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn/torch/frontend/_backend.py RENAMED Viewed

@@ -1015,6 +1015,8 @@ class TorchBackend(Backend[torch.Tensor]):
         *,
         indices: Tensor,
         indices_dim: Union[Dim, Sequence[Dim]],
+        mode: str,
+        fill_value: Union[int, float],
         out_dim: Union[Dim, Sequence[Dim]],
     ) -> Tensor:
         """
@@ -1026,6 +1028,8 @@ class TorchBackend(Backend[torch.Tensor]):
         :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
         :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
         :param indices_dim:
+        :param mode: "sum", "max", "min"
+        :param fill_value:
         :param out_dim:
         :return: [batch_dims..., out_dim, feature_dims...]
         """
@@ -1065,8 +1069,29 @@ class TorchBackend(Backend[torch.Tensor]):
         )
         out_dims = batch_dims + [out_flat_dim] + feature_dims
         out_shape = [d.get_dim_value() for d in out_dims]
-        out_raw = torch.zeros(out_shape, dtype=source.raw_tensor.dtype, device=source.raw_tensor.device)
-        out_raw.scatter_add_(dim=len(batch_dims), index=indices.raw_tensor.to(torch.int64), src=source.raw_tensor)
+        if mode == "sum" and isinstance(fill_value, (int, float)) and fill_value == 0:
+            out_raw = torch.zeros(out_shape, dtype=source.raw_tensor.dtype, device=source.raw_tensor.device)
+            out_raw.scatter_add_(dim=len(batch_dims), index=indices.raw_tensor.to(torch.int64), src=source.raw_tensor)
+        elif mode == "sum":
+            out_raw = torch.full(out_shape, fill_value, dtype=source.raw_tensor.dtype, device=source.raw_tensor.device)
+            out_raw.scatter_reduce_(
+                dim=len(batch_dims),
+                index=indices.raw_tensor.to(torch.int64),
+                src=source.raw_tensor,
+                reduce="sum",
+                include_self=False,
+            )
+        elif mode in ("max", "min"):
+            out_raw = torch.full(out_shape, fill_value, dtype=source.raw_tensor.dtype, device=source.raw_tensor.device)
+            out_raw.scatter_reduce_(
+                dim=len(batch_dims),
+                index=indices.raw_tensor.to(torch.int64),
+                src=source.raw_tensor,
+                reduce="a" + mode,
+                include_self=False,
+            )
+        else:
+            raise ValueError(f"scatter: mode {mode!r} not supported")
         res = Tensor(
             "scatter",
             dims=out_dims,
@@ -1128,8 +1153,14 @@ class TorchBackend(Backend[torch.Tensor]):
         allow_broadcast_all_sources: bool = False,
     ) -> Tensor:
         """where"""
-        true_ = rf.convert_to_tensor(true_, _backend=TorchBackend, device=cond.device)
-        false_ = rf.convert_to_tensor(false_, _backend=TorchBackend, device=cond.device)
+        if isinstance(true_, Tensor):
+            dtype = true_.dtype
+        elif isinstance(false_, Tensor):
+            dtype = false_.dtype
+        else:
+            dtype = None
+        true_ = rf.convert_to_tensor(true_, _backend=TorchBackend, dtype=dtype, device=cond.device)
+        false_ = rf.convert_to_tensor(false_, _backend=TorchBackend, dtype=dtype, device=cond.device)
         out = Tensor.get_common_data(
             [true_, false_, cond], allow_broadcast_all_sources=allow_broadcast_all_sources, name="where"
         )
@@ -1174,6 +1205,13 @@ class TorchBackend(Backend[torch.Tensor]):
         out.raw_tensor = out_raw
         return out
+    @staticmethod
+    def is_finite(x: Tensor) -> Tensor:
+        """is finite"""
+        out = x.copy_template("is_finite", dtype="bool")
+        out.raw_tensor = torch.isfinite(x.raw_tensor)
+        return out
     @staticmethod
     def clip_by_value(
         x: Tensor,

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/returnn.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20240925.152757
+Version: 1.20240926.134856
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20240925.152757 → returnn-1.20240926.134856}/tests/test_rf_array.py RENAMED Viewed

@@ -373,6 +373,38 @@ def test_gather_time_static_clip_to_valid():
     run_model(extern_data_template, lambda *, epoch, step: rf.Module(), _forward_step)
+def test_scatter_fill_inf():
+    batch_dim_ = Dim(3, name="batch")
+    states_dim = Dim(7, name="states")
+    def _forward_step(**_kwargs):
+        start_states = rf.convert_to_tensor(
+            [2, 4, 5], name="start_states", dims=[batch_dim_], sparse_dim=states_dim, dtype="int32"
+        )
+        batch_dim_.get_size_tensor().mark_as_output("batch_size", shape=[])
+        start_states.mark_as_output("start_states", shape=[batch_dim_])
+        scores = rf.scatter(
+            rf.zeros([batch_dim_]),
+            indices=start_states,
+            indices_dim=[batch_dim_],
+            fill_value=float("-inf"),
+        )  # [S], per state
+        scores.mark_as_default_output(shape=[states_dim])
+    res = run_model(TensorDict(), lambda *, epoch, step: rf.Module(), _forward_step, test_tensorflow=False)
+    batch_size = res["batch_size"].raw_tensor.item()
+    assert res["start_states"].raw_tensor.shape == (batch_size,)
+    assert res["output"].raw_tensor.shape == (states_dim.dimension,)
+    assert res["output"].raw_tensor.tolist().count(0.0) == batch_size
+    assert res["output"].raw_tensor.tolist().count(float("-inf")) == states_dim.dimension - batch_size
+    assert states_dim.dimension > batch_size
+    for i in range(states_dim.dimension):
+        if i in res["start_states"].raw_tensor:
+            assert res["output"].raw_tensor[i] == 0.0
+        else:
+            assert res["output"].raw_tensor[i] == float("-inf")
 def test_slice():
     time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
     in_dim = Dim(7, name="in")