PyPI - returnn - Versions diffs - 1.20250220.174943__tar.gz → 1.20250220.200053__tar.gz - Mend

returnn 1.20250220.174943tar.gz → 1.20250220.200053tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (476) hide show

{returnn-1.20250220.174943/returnn.egg-info → returnn-1.20250220.200053}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250220.174943
+Version: 1.20250220.200053
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn-1.20250220.200053/_setup_info_generated.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ version = '1.20250220.200053'
2	+ long_version = '1.20250220.200053+git.bb5c0aa'

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/frontend/_backend.py RENAMED Viewed

@@ -784,18 +784,11 @@ class Backend(Generic[T]):
         dims: Sequence[Dim],
         dtype: str,
         sparse_dim: Optional[Dim] = None,
+        feature_dim: Optional[Dim] = None,
         device: Optional[str] = None,
         name: Optional[str] = None,
     ) -> Tensor[T]:
-        """
-        :param value: tensor, or scalar raw tensor or some other scalar value
-        :param dims:
-        :param dtype:
-        :param sparse_dim:
-        :param device:
-        :param name:
-        :return: tensor
-        """
+        """convert (raw/any) tensor to tensor"""
         raise NotImplementedError
     @staticmethod
@@ -956,6 +949,11 @@ class Backend(Generic[T]):
         """where"""
         raise NotImplementedError
+    @staticmethod
+    def sort(source: Tensor, *, axis: Dim, descending: bool, stable: bool) -> Tuple[Tensor, Tensor, Dim]:
+        """sort. return values and indices"""
+        raise NotImplementedError
     @staticmethod
     def search_sorted(
         sorted_seq: Tensor, values: Tensor, *, axis: Dim, side: str = "left", out_dtype: str = "int32"

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/frontend/_native/module.cpp RENAMED Viewed

@@ -312,8 +312,9 @@ bool PyModuleState::_cachedOpInitTorch() {
     AddOp(TOp_FloorDiv, "floor_divide");
     AddOp(TOp_Mod, "remainder");
     AddOp(TOp_Pow, "pow");
-    AddOp(TOp_Maximum, "maximum");
-    AddOp(TOp_Minimum, "minimum");
+    // Use clamp_min/clamp_max instead of maximum/minimum because the former allow number arguments.
+    AddOp(TOp_Maximum, "clamp_min");
+    AddOp(TOp_Minimum, "clamp_max");
     AddOpAlt(TOp_SquaredDifference, "squared_difference");
     AddOp(TOp_And, "logical_and");
     AddOp(TOp_Or, "logical_or");

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/frontend/_native/tensor_ops.cpp RENAMED Viewed

@@ -1368,6 +1368,14 @@ static PyObject* compareOrCombineViaCached(
         case TOp_FloorDiv:
         case TOp_Mod:
         case TOp_Pow:
+        case TOp_Maximum:
+        case TOp_Minimum:
+        case TOp_Eq:
+        case TOp_Ne:
+        case TOp_Lt:
+        case TOp_Le:
+        case TOp_Gt:
+        case TOp_Ge:
             needConvertToTensor = false;
         default:
             break;

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/frontend/_numpy_backend.py RENAMED Viewed

@@ -83,6 +83,7 @@ class NumpyBackend(Backend[numpy.ndarray]):
         dims: Sequence[Dim],
         dtype: str,
         sparse_dim: Optional[Dim] = None,
+        feature_dim: Optional[Dim] = None,
         device: Optional[str] = None,
         name: Optional[str] = None,
     ) -> Tensor[numpy.ndarray]:
@@ -95,7 +96,7 @@ class NumpyBackend(Backend[numpy.ndarray]):
             name = name or "const"
             value = numpy.array(value, dtype=NumpyBackend.as_dtype_raw(dtype))
         assert isinstance(value, numpy.ndarray)
-        return Tensor(name, dims=dims, dtype=dtype, sparse_dim=sparse_dim, raw_tensor=value)
+        return Tensor(name, dims=dims, dtype=dtype, sparse_dim=sparse_dim, feature_dim=feature_dim, raw_tensor=value)
     @staticmethod
     def expand_dims_raw(raw_tensor: numpy.ndarray, axis: int) -> numpy.ndarray:

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/frontend/array_.py RENAMED Viewed

@@ -45,9 +45,12 @@ __all__ = [
     "shift_left",
     "reverse_sequence",
     "where",
+    "sort",
     "search_sorted",
     "sparse_to_dense",
     "one_hot",
+    "top_k_mask",
+    "top_p_mask",
 ]
@@ -57,6 +60,7 @@ def convert_to_tensor(
     dims: Sequence[Dim] = None,
     dtype: Optional[str] = None,
     sparse_dim: Optional[Dim] = None,
+    feature_dim: Optional[Dim] = None,
     shape: Sequence[Dim] = None,
     device: Optional[str] = None,
     keep_scalar_on_cpu: bool = False,
@@ -68,6 +72,7 @@ def convert_to_tensor(
     :param dims:
     :param dtype:
     :param sparse_dim:
+    :param feature_dim:
     :param shape: alias for dims, for some older code
     :param name:
     :param device:
@@ -121,7 +126,7 @@ def convert_to_tensor(
         if dtype is None:
             dtype = value_backend.get_dtype_name_raw(value)
     return _backend.convert_to_tensor(
-        value=value, dims=dims, dtype=dtype, sparse_dim=sparse_dim, device=device, name=name
+        value=value, dims=dims, dtype=dtype, sparse_dim=sparse_dim, feature_dim=feature_dim, device=device, name=name
     )
@@ -996,6 +1001,27 @@ def where(
     return cond._raw_backend.where(cond, true_, false_, allow_broadcast_all_sources=allow_broadcast_all_sources)
+def sort(source: Tensor, *, axis: Dim, descending: bool = False, stable: bool = True) -> Tuple[Tensor, Tensor, Dim]:
+    """
+    Sorts the source tensor along the given axis.
+    See also :func:`top_k`.
+    :func:`top_k` with ``k=axis.get_size_tensor()`` is equivalent to this function.
+    :param source: {other_dims..., axis}
+    :param axis: The axis to sort along.
+    :param descending: If True, sort in descending order, otherwise in ascending order.
+    :param stable: If True, use a stable sorting algorithm (not reordering equal elements).
+        Note that many frameworks (Torch, TensorFlow) have ``stable=False`` by default.
+        ``stable=False`` can be faster.
+    :return: sorted tensor, indices tensor, out_dim. both tensors have the shape {other_dims..., out_dim},
+        i.e. ``axis`` replaced by ``out_dim``.
+        indices tensor has sparse_dim set to ``axis``.
+    """
+    # noinspection PyProtectedMember
+    return source._raw_backend.sort(source, axis=axis, descending=descending, stable=stable)
 def search_sorted(
     sorted_seq: Tensor, values: Tensor, *, axis: Dim, side: str = "left", out_dtype: str = "int32"
 ) -> Tensor:
@@ -1044,3 +1070,49 @@ def one_hot(source: Tensor) -> Tensor:
     and much more efficiently than they would be with dense tensors.
     """
     return sparse_to_dense(source, label_value=1.0, other_value=0.0)
+def top_k_mask(values: Tensor, *, axis: Dim, k: Union[int, Tensor]) -> Tensor:
+    """
+    Top-k filtering.
+    :param values: {other_dims..., axis}
+    :param axis:
+    :param k: the number of top values to keep
+    :return: mask {other_dims..., axis} of the top-k values
+    """
+    _, indices, k_dim = rf.top_k(values, axis=axis, k=k)
+    mask = rf.scatter(rf.full(dims=indices.dims, fill_value=True), indices=indices, indices_dim=k_dim, fill_value=False)
+    return mask
+def top_p_mask(
+    probs: Tensor,
+    *,
+    axis: Dim,
+    p: Union[float, Tensor],
+    one_more: bool = True,
+) -> Tensor:
+    """
+    Top-p filtering, e.g. as used in Nucleus sampling (https://arxiv.org/abs/1904.09751).
+    :param probs: {probs_dims..., axis}
+    :param axis:
+    :param p: the probability mass to keep
+    :param one_more: if True (default), keep also the first token above the threshold.
+        (It's enabled by default to follow the behavior of the original implementation.)
+    :return: mask {probs_dims..., axis} of the top-p tokens.
+        ``sum(probs[mask]) <= p``, or slightly more if ``one_more`` is True.
+    """
+    assert 0.0 <= p <= 1.0
+    if isinstance(p, Tensor):
+        assert axis not in p.dims
+    # https://github.com/ari-holtzman/degen/blob/master/gen.py
+    sorted_probs, sorted_indices, sorted_dim = rf.sort(probs, axis=axis, descending=True)
+    cum_probs = rf.cumsum(sorted_probs, spatial_dim=sorted_dim)
+    mask = cum_probs <= p  # {probs_dims..., sorted_dim}
+    if one_more:
+        # keep also the first token above the threshold
+        mask = rf.shift_right(mask, axis=sorted_dim, pad_value=True)
+    mask = rf.scatter(mask, indices=sorted_indices, indices_dim=sorted_dim)
+    return mask

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/frontend/dims.py RENAMED Viewed

@@ -14,6 +14,7 @@ __all__ = [
     "range_over_dim",
     "range_over_dim_strided",
     "range_over_merged_dims",
+    "linspace_over_dim",
     "replace_dim",
     "replace_dim_v2",
     "set_sparse_dim",
@@ -81,6 +82,36 @@ def range_over_merged_dims(
     return indices
+def linspace_over_dim(
+    dim: Dim,
+    start: Union[float, Tensor] = 0.0,
+    end: Union[float, Tensor] = 1.0,
+    *,
+    dtype: Optional[str] = None,
+    device: Optional[str] = None,
+) -> Tensor:
+    """
+    Linearly spaced values over a dim.
+    :param dim: dim to range over
+    :param start: start value
+    :param end: end value
+    :param dtype: dtype of the output tensor
+    :param device: device of the output tensor
+    :return: tensor with shape [dim] containing linearly spaced values between start and end
+    """
+    if dtype is None:
+        dtype = rf.get_default_float_dtype()
+    indices = rf.range_over_dim(dim, dtype=dtype, device=device)
+    linspace = indices / rf.cast(rf.maximum(dim.get_size_tensor(device=indices.device), 1), dtype=indices.dtype)
+    space_len = end - start
+    if not isinstance(space_len, (int, float)) or space_len != 1:
+        linspace *= space_len
+    if not isinstance(start, (int, float)) or start != 0:
+        linspace += start
+    return linspace
 def replace_dim(source: Tensor, *, in_dim: Dim, out_dim: Optional[Dim] = None) -> Tuple[Tensor, Dim]:
     """
     Also see: :func:`replace_dim_v2`, :func:`rf.merge_dims`, :func:`rf.split_dims`.

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/frontend/rand.py RENAMED Viewed

@@ -64,6 +64,7 @@ __all__ = [
     "random_uniform",
     "random_normal",
     "random_truncated_normal",
+    "random_choice_without_replacement",
 ]
@@ -349,3 +350,32 @@ def random_truncated_normal(
         static=static,
         out=out,
     )
+def random_choice_without_replacement(
+    *,
+    log_probs: Tensor,
+    axis: Union[Dim, Sequence[Dim]],
+    num_samples_dim: Dim,
+    noise_scale: Union[float, Tensor] = 1.0,
+) -> Union[Tensor, Sequence[Tensor]]:
+    """
+    Randomly sample without replacement.
+    :param log_probs: {log_probs_dims..., axis}
+    :param axis: same as in :func:`top_k`
+    :param num_samples_dim: how many samples to draw
+    :param noise_scale: scale the noise. with scale=0, you get :func:`top_k`.
+    :return: random indices shape {log_probs_dims..., num_samples_dim} -> axis.
+        if axis was a sequence, will return a sequence of tensors.
+    """
+    # https://github.com/tensorflow/tensorflow/issues/9260
+    # https://timvieira.github.io/blog/post/2014/08/01/gumbel-max-trick-and-weighted-reservoir-sampling/
+    scores_random_sample = -rf.log(
+        -rf.log(random_uniform(log_probs.dims, dtype=log_probs.dtype, device=log_probs.device))
+    )
+    if not isinstance(noise_scale, (int, float)) or noise_scale != 1.0:
+        scores_random_sample *= noise_scale
+    scores = log_probs + scores_random_sample
+    _, indices, _ = rf.top_k(scores, k_dim=num_samples_dim, axis=axis)
+    return indices

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/tf/frontend_layers/_backend.py RENAMED Viewed

@@ -559,6 +559,7 @@ class ReturnnLayersBackend(Backend[Layer]):
         dims: Sequence[Dim],
         dtype: str,
         sparse_dim: Optional[Dim] = None,
+        feature_dim: Optional[Dim] = None,
         device: Optional[str] = None,
         name: Optional[str] = None,
     ) -> Tensor[Layer]:
@@ -568,6 +569,8 @@ class ReturnnLayersBackend(Backend[Layer]):
         kwargs = {}
         if sparse_dim:
             kwargs["sparse_dim"] = sparse_dim
+        if feature_dim:
+            kwargs["feature_dim"] = feature_dim
         dim_deps = _dims.get_dim_deps(dims)
         if dim_deps:
             kwargs["shape_deps"] = dim_deps

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/tf/frontend_low_level/_backend.py RENAMED Viewed

@@ -411,24 +411,19 @@ class TFBackend(Backend[tf.Tensor]):
         dims: Sequence[Dim],
         dtype: str,
         sparse_dim: Optional[Dim] = None,
+        feature_dim: Optional[Dim] = None,
         device: Optional[str] = None,
         name: Optional[str] = None,
     ) -> _TT:
-        """
-        :param value:
-        :param dims:
-        :param dtype:
-        :param sparse_dim:
-        :param device:
-        :param name:
-        :return: tensor
-        """
+        """convert to tensor"""
         if isinstance(value, Tensor):
             return value
         with tf.control_dependencies(None):
             value = tf.convert_to_tensor(value, dtype=dtype)
         assert isinstance(value, tf.Tensor)
-        return Tensor(name or "const", raw_tensor=value, dims=dims, dtype=dtype, sparse_dim=sparse_dim)
+        return Tensor(
+            name or "const", raw_tensor=value, dims=dims, dtype=dtype, sparse_dim=sparse_dim, feature_dim=feature_dim
+        )
     @staticmethod
     def range_over_dim(dim: Dim, *, dtype: Optional[str] = None, device: Optional[str] = None) -> _TT:

{returnn-1.20250220.174943 → returnn-1.20250220.200053}/returnn/torch/frontend/_backend.py RENAMED Viewed

@@ -895,18 +895,11 @@ class TorchBackend(Backend[torch.Tensor]):
         dims: Sequence[Dim],
         dtype: str,
         sparse_dim: Optional[Dim] = None,
+        feature_dim: Optional[Dim] = None,
         device: Optional[str] = None,
         name: Optional[str] = None,
     ) -> Tensor[torch.Tensor]:
-        """
-        :param value:
-        :param dims:
-        :param dtype:
-        :param sparse_dim:
-        :param device:
-        :param name:
-        :return: tensor
-        """
+        """convert to tensor"""
         if isinstance(value, Tensor):
             return value
         if isinstance(value, torch.Tensor):
@@ -926,7 +919,7 @@ class TorchBackend(Backend[torch.Tensor]):
                     device=device or rf.get_default_device(),
                 )
         assert isinstance(value, torch.Tensor)
-        return Tensor(name, dims=dims, dtype=dtype, sparse_dim=sparse_dim, raw_tensor=value)
+        return Tensor(name, dims=dims, dtype=dtype, sparse_dim=sparse_dim, feature_dim=feature_dim, raw_tensor=value)
     @staticmethod
     def full(
@@ -1223,6 +1216,21 @@ class TorchBackend(Backend[torch.Tensor]):
         out.raw_tensor = torch.where(cond_bc_raw, true_bc_raw, false_bc_raw)
         return out
+    @staticmethod
+    def sort(source: Tensor, *, axis: Dim, descending: bool, stable: bool) -> Tuple[Tensor, Tensor, Dim]:
+        """sort. return values and indices"""
+        axis_int = source.get_axis_from_description(axis, allow_int=False)
+        # Move to last axis. Should be more efficient.
+        source = source.copy_move_axis(axis_int, -1)
+        axis_int = source.batch_ndim - 1
+        values_raw, indices_raw = torch.sort(source.raw_tensor, dim=axis_int, descending=descending, stable=stable)
+        out_dims = list(source.dims)
+        out_dim = axis.copy(same_as_self=False, description=f"{axis.description}:sorted")
+        out_dims[axis_int] = out_dim
+        values = rf.convert_to_tensor(values_raw, dims=out_dims, feature_dim={axis: out_dim}.get(source.feature_dim))
+        indices = rf.convert_to_tensor(indices_raw, dims=out_dims, sparse_dim=axis)
+        return values, indices, out_dim
     @staticmethod
     def search_sorted(
         sorted_seq: Tensor, values: Tensor, *, axis: Dim, side: str = "left", out_dtype: str = "int32"
@@ -1566,6 +1574,9 @@ class TorchBackend(Backend[torch.Tensor]):
             return values, indices_out, k_dim
         assert isinstance(axis, Dim)
         axis_int = source.get_axis_from_description(axis, allow_int=False)
+        # Move to last axis. Should be more efficient.
+        source = source.copy_move_axis(axis_int, -1)
+        axis_int = source.batch_ndim - 1
         values_raw, indices_raw = torch.topk(
             source.raw_tensor, k=k_dim.get_dim_value(), dim=axis_int, largest=True, sorted=sorted
         )

{returnn-1.20250220.174943 → returnn-1.20250220.200053/returnn.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250220.174943
+Version: 1.20250220.200053
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer