PyPI - returnn - Versions diffs - 1.20250221.114352__py3-none-any.whl → 1.20250223.154045__py3-none-any.whl - Mend

returnn 1.20250221.114352py3-none-any.whl → 1.20250223.154045py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of returnn might be problematic. Click here for more details.

Files changed (11) hide show

returnn/PKG-INFO CHANGED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250221.114352
+Version: 1.20250223.154045
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

returnn/_setup_info_generated.py CHANGED Viewed

@@ -1,2 +1,2 @@
-version = '1.20250221.114352'
-long_version = '1.20250221.114352+git.650b638'
+version = '1.20250223.154045'
+long_version = '1.20250223.154045+git.354cf31'

returnn/frontend/_backend.py CHANGED Viewed

@@ -350,6 +350,11 @@ class Backend(Generic[T]):
         """
         raise NotImplementedError
+    @staticmethod
+    def stop_gradient_scope() -> Any:
+        """stop gradient scope"""
+        raise NotImplementedError
     @staticmethod
     def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
         """
@@ -905,9 +910,9 @@ class Backend(Generic[T]):
     ) -> Tensor:
         """
         Scatters into new zero-tensor.
-        If entries in indices are duplicated, the corresponding values in source will be added together
-        (scatter_add in PyTorch).
-        (TF segment_sum can be implemented via this.)
+        If entries in indices are duplicated, with mode="sum", the corresponding values in source will be added together
+        (``scatter_add`` in PyTorch), otherwise min/max.
+        (segment_sum can be implemented via this.)
         :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
         :param indices: [batch_dims..., indices_dim(s)...] -> out_dim

returnn/frontend/array_.py CHANGED Viewed

@@ -40,6 +40,8 @@ __all__ = [
     "gather",
     "scatter",
     "scatter_argmax",
+    "scatter_logsumexp",
+    "scatter_logmeanexp",
     "slice",
     "shift_right",
     "shift_left",
@@ -758,19 +760,20 @@ def scatter(
 ) -> Tensor:
     """
     Scatters into new zero-tensor.
-    If entries in indices are duplicated, the corresponding values in source will be added together
-    (scatter_add in PyTorch)
-    with mode=="sum",
-    or otherwise it will take the max/min.
+    If entries in indices are duplicated, with ``mode=="sum"``,
+    the corresponding values in source will be added together
+    (``scatter_add`` in PyTorch),
+    or otherwise it will take the respective reduction.
     ``scatter`` is the inverse of :func:`gather`.
-    (TF segment_sum can be implemented via this.)
+    (segment_sum can be implemented via this.)
     :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
     :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
     :param indices_dim:
-    :param mode: "sum" or "max" or "min". also see :func:`scatter_argmax`.
+    :param mode: "sum", "max", "min", "logsumexp", "logmeanexp", "argmax".
+        (Note: If you ever need mean, argmin, etc, please open an issue/PR.)
     :param fill_value:
     :param out_dim: The indices target dim.
         If not given, will be automatically determined as the sparse_dim from indices.
@@ -778,6 +781,16 @@ def scatter(
         and then we use :func:`rf.split_dims` afterwards.
     :return: [batch_dims..., out_dim(s)..., feature_dims...]
     """
+    if mode == "logsumexp":
+        return scatter_logsumexp(
+            source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim
+        )
+    if mode == "logmeanexp":
+        return scatter_logmeanexp(
+            source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim
+        )
+    if mode == "argmax":
+        return scatter_argmax(source, indices=indices, indices_dim=indices_dim, invalid_idx=fill_value, out_dim=out_dim)
     if not out_dim:
         assert isinstance(indices, Tensor) and indices.sparse_dim
         out_dim = indices.sparse_dim
@@ -858,6 +871,76 @@ def scatter_argmax(
     return out
+def scatter_logsumexp(
+    source: Tensor,
+    *,
+    indices: Tensor,
+    indices_dim: Union[Dim, Sequence[Dim]],
+    fill_value: Optional[Union[int, float]] = None,
+    out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
+) -> Tensor:
+    """
+    Scatters into new zero-tensor.
+    If entries in indices are duplicated, the corresponding values in source will be log-sum-exp'ed together.
+    This is like :func:`scatter` with ``mode="logsumexp"``.
+    :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
+    :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
+    :param indices_dim:
+    :param fill_value:
+    :param out_dim: The indices target dim.
+        If not given, will be automatically determined as the sparse_dim from indices.
+        If multiple out dims, use indices into the merged out dims,
+        and then we use :func:`rf.split_dims` afterwards.
+    :return: [batch_dims..., out_dim(s)..., feature_dims...]
+    """
+    if not out_dim:
+        assert isinstance(indices, Tensor) and indices.sparse_dim
+        out_dim = indices.sparse_dim
+    with rf.stop_gradient_scope():
+        max_x = rf.scatter(source, indices=indices, indices_dim=indices_dim, mode="max", out_dim=out_dim)  # [D_out,...]
+        max_x_ = rf.gather(max_x, indices=indices, axis=out_dim)  # [D_src,...]
+    src_ = rf.exp(source - max_x_)
+    if fill_value is not None:
+        fill_value = rf.exp(fill_value - max_x_)
+    tensor = rf.scatter(
+        src_, indices=indices, indices_dim=indices_dim, mode="sum", fill_value=fill_value, out_dim=out_dim
+    )
+    tensor = rf.log(tensor)
+    tensor = rf.where(rf.is_neg_infinite(max_x), rf.zeros((), dtype=source.dtype, device=source.device), tensor)
+    tensor += max_x
+    return tensor
+def scatter_logmeanexp(
+    source: Tensor,
+    *,
+    indices: Tensor,
+    indices_dim: Union[Dim, Sequence[Dim]],
+    fill_value: Optional[Union[int, float]] = None,
+    out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
+) -> Tensor:
+    """
+    Scatters into new zero-tensor.
+    If entries in indices are duplicated, the corresponding values in source will be log-mean-exp'ed together.
+    This is like :func:`scatter` with ``mode="logmeanexp"``.
+    :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
+    :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
+    :param indices_dim:
+    :param fill_value:
+    :param out_dim: The indices target dim.
+        If not given, will be automatically determined as the sparse_dim from indices.
+        If multiple out dims, use indices into the merged out dims,
+        and then we use :func:`rf.split_dims` afterwards.
+    :return: [batch_dims..., out_dim(s)..., feature_dims...]
+    """
+    ones = rf.ones(dims=indices.dims, dtype=source.dtype, device=source.device)
+    counts = rf.scatter(ones, indices=indices, indices_dim=indices_dim, fill_value=1, out_dim=out_dim)
+    y = scatter_logsumexp(source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim)
+    return y - rf.log(counts)
 # noinspection PyShadowingBuiltins
 def slice(
     source: Tensor,

returnn/frontend/gradient.py CHANGED Viewed

@@ -11,6 +11,7 @@ __all__ = [
     "set_requires_gradient",
     "gradient",
     "stop_gradient",
+    "stop_gradient_scope",
     "scaled_gradient",
     "scaled_gradient_ext",
     "gradient_checkpoint_scope",
@@ -42,6 +43,28 @@ def stop_gradient(source: Tensor) -> Tensor:
     return source._raw_backend.stop_gradient(source)
+def stop_gradient_scope():
+    """
+    Create a stop gradient scope.
+    All tensors created within this scope will have their gradient stopped.
+    Example::
+        a = ...
+        b = ...
+        with stop_gradient_scope():
+            x = a + b
+        y = x * c
+    In this example, the tensor ``x`` will have its gradient stopped,
+    i.e. the gradient of ``x`` w.r.t. ``a`` and ``b`` will be zero.
+    :return: context manager which enables stopping the gradient. It supports __enter__ and __exit__,
+        and the intended usage is with the `with` statement.
+    """
+    return global_backend.stop_gradient_scope()
 def scaled_gradient(source: Tensor, scale: Union[float, Tensor]) -> Tensor:
     """
     :param source:

returnn/torch/frontend/_backend.py CHANGED Viewed

@@ -3,7 +3,7 @@ Backend for exposing PyTorch-specific functionality.
 """
 from __future__ import annotations
-from typing import Optional, Union, Sequence, Tuple, List, Dict, Generator
+from typing import Optional, Union, Any, Sequence, Tuple, List, Dict, Generator
 import contextlib
 import torch
 import numpy
@@ -212,6 +212,11 @@ class TorchBackend(Backend[torch.Tensor]):
         out.raw_tensor = out.raw_tensor.detach()
         return out
+    @staticmethod
+    def stop_gradient_scope() -> Any:
+        """stop gradient scope"""
+        return torch.no_grad()
     @staticmethod
     def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
         """scaled gradient"""
@@ -1061,9 +1066,9 @@ class TorchBackend(Backend[torch.Tensor]):
     ) -> Tensor:
         """
         Scatters into new zero-tensor.
-        If entries in indices are duplicated, the corresponding values in source will be added together
-        (scatter_add in PyTorch).
-        (TF segment_sum can be implemented via this.)
+        If entries in indices are duplicated, with mode="sum", the corresponding values in source will be added together
+        (``scatter_add`` in PyTorch), otherwise min/max.
+        (segment_sum can be implemented via this.)
         :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
         :param indices: [batch_dims..., indices_dim(s)...] -> out_dim

{returnn-1.20250221.114352.dist-info → returnn-1.20250223.154045.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: returnn
-Version: 1.20250221.114352
+Version: 1.20250223.154045
 Summary: The RWTH extensible training framework for universal recurrent neural networks
 Home-page: https://github.com/rwth-i6/returnn/
 Author: Albert Zeyer

{returnn-1.20250221.114352.dist-info → returnn-1.20250223.154045.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-returnn/PKG-INFO,sha256=VL6JCmrRQYGi4OdodEygiq3hJyM1C-73zvMTSusYpOs,5215
+returnn/PKG-INFO,sha256=osW4TLxe1IbJ9J1E7K31tPJAgyAAEoVhILW6twz1beg,5215
 returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
 returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
 returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
 returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
-returnn/_setup_info_generated.py,sha256=LlW75YDQH_DvPCMrSeF0bY52JGq9l4tJNA5mGTT5MQA,77
+returnn/_setup_info_generated.py,sha256=ZkDu5IsuZj9TiGVIFC5h52LbiTy4KoJCJD-MAi4ZWmc,77
 returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
 returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
 returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -75,12 +75,12 @@ returnn/extern/graph_editor/subgraph.py,sha256=R3uIFqWgiL7L5S4YATm9o9a3wfEa_mSb4
 returnn/extern/graph_editor/transform.py,sha256=d9fEgu0JC342q0g9niVxRWMKzkQQA9mrrajBGcU1o_s,29349
 returnn/extern/graph_editor/util.py,sha256=QMrQeQZ7lJwsrNQub9tof0h3quEaoHiGJaZmogQ7jXE,18707
 returnn/frontend/__init__.py,sha256=2aS7nbxXniIrBp2DODl0xN0f3IJ_dX4Bi9ZlR7W5_DE,1472
-returnn/frontend/_backend.py,sha256=W3J3ZSOxonX6wk-wY2dX_aokXHpm1VQ1V0qSjllQxUM,50165
+returnn/frontend/_backend.py,sha256=lRAtOT0oAkgc_WGYBUviGbgIH3Yet6D17sjlEJH56Pg,50327
 returnn/frontend/_cache.py,sha256=JAhi7L-raQ3A-NC3JUYDtdRTwT3BGJJGGZxrZ8MfEWQ,8403
 returnn/frontend/_numpy_backend.py,sha256=2oCtG0YCWL_89v4cD_jDj8em1O_Fp-_YWl5EblGi_yo,7858
 returnn/frontend/_random_journal.py,sha256=_ktP_mjgx8vtQQGX_DofdhewJj0aPiczefTWeemPkmo,5457
 returnn/frontend/_utils.py,sha256=4A3MSRM0i86J77550uR_AjcBEPu6nymLUZ9Xd1V3Fkc,12073
-returnn/frontend/array_.py,sha256=UHTQmb_cFsjVStAELcCqMkCbQNQiBiwN4gQZu6CloIA,44126
+returnn/frontend/array_.py,sha256=x_OSKQ_WyUFqKWEJdf3dHc6bfifvkV_aiVsmaZVCEv0,47816
 returnn/frontend/attention.py,sha256=GKt-Xqnz8sIyXVrE0i4VCS7J2Wu7dmoH_BA0Cu8CrXQ,45769
 returnn/frontend/backend.py,sha256=iQ9w4xl8Ea7bgpb0VUaCKq50rV5Bl2E5J8Rhd-oqD_c,883
 returnn/frontend/build_from_dict.py,sha256=rfWa2rjjhIR_kIQED_nMrygrQBunS6unegzWTLVbC98,3017
@@ -93,7 +93,7 @@ returnn/frontend/device.py,sha256=K7Y1qoQcO4GIHgLkPLQWK-GVT8gKL8GwyQrmPo8LgBE,14
 returnn/frontend/dims.py,sha256=hKA7IQRB0DbohN1ngNw31W44BsyjdHCtYAccxOcumzQ,10872
 returnn/frontend/dropout.py,sha256=rsx3p5b0NblBfXXSQZTQFJ8jUUS3fj4Qzc39iffBMCA,5006
 returnn/frontend/dtype.py,sha256=Ooc5BrcNrTp6XShuFEV9g5V6-niuy4ImP_Lt_Qgq3jE,1886
-returnn/frontend/gradient.py,sha256=dOUvLqN-vxsvjKQfpfIvEYlx4TlpHkOk-p9hsB680iA,3376
+returnn/frontend/gradient.py,sha256=G-Qv4gKGHYEeB92Zwco9ao4qjd6umZPUzQC4J-fbYWo,4033
 returnn/frontend/graph.py,sha256=PIv901WZ1rfTV0QGkyzBv6UxfWk9NsLGxdoJ5x9-8Xg,1818
 returnn/frontend/hooks.py,sha256=jYPbsb4gy5HORRZvKTEJbLcoJri5hOt5ADbhnTCytQo,5507
 returnn/frontend/init.py,sha256=bVB7bpghaY8DI_HL0mkB_9z95onWnIX2zlW4hlMYnRw,7494
@@ -216,7 +216,7 @@ returnn/torch/data/queued_data_iter.py,sha256=PoOsGHdHVZjTmcyfq_ZOw--P6hyfTdmAWI
 returnn/torch/data/returnn_dataset_wrapper.py,sha256=1Bw82-Ge_8m_DSDXZNqQ3zGDic2HQlp6jysELL0NVK0,7369
 returnn/torch/data/tensor_utils.py,sha256=-Teqi--LLbt6q_5mDRdoHZHmPgSdC83W706ukif_YiU,1284
 returnn/torch/frontend/__init__.py,sha256=AA48HZnC17ASuKA0EWy8loZ-Bib_yUtqF4T1wYvjst4,62
-returnn/torch/frontend/_backend.py,sha256=mjR6Ilt2zlnIO4_CpVPCLQ0XVJa_QmW3HsZtR2KT8yk,101110
+returnn/torch/frontend/_backend.py,sha256=ZHeE5A9nPo6i2KShRRNkiqpIrz4DmA0g3QhWddzFikg,101274
 returnn/torch/frontend/_rand.py,sha256=1JgIkV2XmpgJD86zXZ-NCAe-QuoP2swr6NaS1oz3Qa8,1830
 returnn/torch/frontend/bridge.py,sha256=Z2_UW8AagezC7zsXDc5PKcd8G9WwisV7j9SWGHU0m4U,7840
 returnn/torch/frontend/raw_ops.py,sha256=lF0h-KtYYsdaaqQADylVZp9qzPskOOXA4MfmYDyx5IU,296
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
 returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
 returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
 returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
-returnn-1.20250221.114352.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
-returnn-1.20250221.114352.dist-info/METADATA,sha256=VL6JCmrRQYGi4OdodEygiq3hJyM1C-73zvMTSusYpOs,5215
-returnn-1.20250221.114352.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
-returnn-1.20250221.114352.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
-returnn-1.20250221.114352.dist-info/RECORD,,
+returnn-1.20250223.154045.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
+returnn-1.20250223.154045.dist-info/METADATA,sha256=osW4TLxe1IbJ9J1E7K31tPJAgyAAEoVhILW6twz1beg,5215
+returnn-1.20250223.154045.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
+returnn-1.20250223.154045.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
+returnn-1.20250223.154045.dist-info/RECORD,,

{returnn-1.20250221.114352.dist-info → returnn-1.20250223.154045.dist-info}/LICENSE RENAMED Viewed

File without changes

{returnn-1.20250221.114352.dist-info → returnn-1.20250223.154045.dist-info}/WHEEL RENAMED Viewed

File without changes

{returnn-1.20250221.114352.dist-info → returnn-1.20250223.154045.dist-info}/top_level.txt RENAMED Viewed

File without changes

returnn 1.20250221.114352__py3-none-any.whl → 1.20250223.154045__py3-none-any.whl

Potentially problematic release.

returnn 1.20250221.114352py3-none-any.whl → 1.20250223.154045py3-none-any.whl