returnn 1.20250221.114352__py3-none-any.whl → 1.20250223.154045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250221.114352
3
+ Version: 1.20250223.154045
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250221.114352'
2
- long_version = '1.20250221.114352+git.650b638'
1
+ version = '1.20250223.154045'
2
+ long_version = '1.20250223.154045+git.354cf31'
@@ -350,6 +350,11 @@ class Backend(Generic[T]):
350
350
  """
351
351
  raise NotImplementedError
352
352
 
353
+ @staticmethod
354
+ def stop_gradient_scope() -> Any:
355
+ """stop gradient scope"""
356
+ raise NotImplementedError
357
+
353
358
  @staticmethod
354
359
  def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
355
360
  """
@@ -905,9 +910,9 @@ class Backend(Generic[T]):
905
910
  ) -> Tensor:
906
911
  """
907
912
  Scatters into new zero-tensor.
908
- If entries in indices are duplicated, the corresponding values in source will be added together
909
- (scatter_add in PyTorch).
910
- (TF segment_sum can be implemented via this.)
913
+ If entries in indices are duplicated, with mode="sum", the corresponding values in source will be added together
914
+ (``scatter_add`` in PyTorch), otherwise min/max.
915
+ (segment_sum can be implemented via this.)
911
916
 
912
917
  :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
913
918
  :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
@@ -40,6 +40,8 @@ __all__ = [
40
40
  "gather",
41
41
  "scatter",
42
42
  "scatter_argmax",
43
+ "scatter_logsumexp",
44
+ "scatter_logmeanexp",
43
45
  "slice",
44
46
  "shift_right",
45
47
  "shift_left",
@@ -758,19 +760,20 @@ def scatter(
758
760
  ) -> Tensor:
759
761
  """
760
762
  Scatters into new zero-tensor.
761
- If entries in indices are duplicated, the corresponding values in source will be added together
762
- (scatter_add in PyTorch)
763
- with mode=="sum",
764
- or otherwise it will take the max/min.
763
+ If entries in indices are duplicated, with ``mode=="sum"``,
764
+ the corresponding values in source will be added together
765
+ (``scatter_add`` in PyTorch),
766
+ or otherwise it will take the respective reduction.
765
767
 
766
768
  ``scatter`` is the inverse of :func:`gather`.
767
769
 
768
- (TF segment_sum can be implemented via this.)
770
+ (segment_sum can be implemented via this.)
769
771
 
770
772
  :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
771
773
  :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
772
774
  :param indices_dim:
773
- :param mode: "sum" or "max" or "min". also see :func:`scatter_argmax`.
775
+ :param mode: "sum", "max", "min", "logsumexp", "logmeanexp", "argmax".
776
+ (Note: If you ever need mean, argmin, etc, please open an issue/PR.)
774
777
  :param fill_value:
775
778
  :param out_dim: The indices target dim.
776
779
  If not given, will be automatically determined as the sparse_dim from indices.
@@ -778,6 +781,16 @@ def scatter(
778
781
  and then we use :func:`rf.split_dims` afterwards.
779
782
  :return: [batch_dims..., out_dim(s)..., feature_dims...]
780
783
  """
784
+ if mode == "logsumexp":
785
+ return scatter_logsumexp(
786
+ source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim
787
+ )
788
+ if mode == "logmeanexp":
789
+ return scatter_logmeanexp(
790
+ source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim
791
+ )
792
+ if mode == "argmax":
793
+ return scatter_argmax(source, indices=indices, indices_dim=indices_dim, invalid_idx=fill_value, out_dim=out_dim)
781
794
  if not out_dim:
782
795
  assert isinstance(indices, Tensor) and indices.sparse_dim
783
796
  out_dim = indices.sparse_dim
@@ -858,6 +871,76 @@ def scatter_argmax(
858
871
  return out
859
872
 
860
873
 
874
+ def scatter_logsumexp(
875
+ source: Tensor,
876
+ *,
877
+ indices: Tensor,
878
+ indices_dim: Union[Dim, Sequence[Dim]],
879
+ fill_value: Optional[Union[int, float]] = None,
880
+ out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
881
+ ) -> Tensor:
882
+ """
883
+ Scatters into new zero-tensor.
884
+ If entries in indices are duplicated, the corresponding values in source will be log-sum-exp'ed together.
885
+ This is like :func:`scatter` with ``mode="logsumexp"``.
886
+
887
+ :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
888
+ :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
889
+ :param indices_dim:
890
+ :param fill_value:
891
+ :param out_dim: The indices target dim.
892
+ If not given, will be automatically determined as the sparse_dim from indices.
893
+ If multiple out dims, use indices into the merged out dims,
894
+ and then we use :func:`rf.split_dims` afterwards.
895
+ :return: [batch_dims..., out_dim(s)..., feature_dims...]
896
+ """
897
+ if not out_dim:
898
+ assert isinstance(indices, Tensor) and indices.sparse_dim
899
+ out_dim = indices.sparse_dim
900
+ with rf.stop_gradient_scope():
901
+ max_x = rf.scatter(source, indices=indices, indices_dim=indices_dim, mode="max", out_dim=out_dim) # [D_out,...]
902
+ max_x_ = rf.gather(max_x, indices=indices, axis=out_dim) # [D_src,...]
903
+ src_ = rf.exp(source - max_x_)
904
+ if fill_value is not None:
905
+ fill_value = rf.exp(fill_value - max_x_)
906
+ tensor = rf.scatter(
907
+ src_, indices=indices, indices_dim=indices_dim, mode="sum", fill_value=fill_value, out_dim=out_dim
908
+ )
909
+ tensor = rf.log(tensor)
910
+ tensor = rf.where(rf.is_neg_infinite(max_x), rf.zeros((), dtype=source.dtype, device=source.device), tensor)
911
+ tensor += max_x
912
+ return tensor
913
+
914
+
915
+ def scatter_logmeanexp(
916
+ source: Tensor,
917
+ *,
918
+ indices: Tensor,
919
+ indices_dim: Union[Dim, Sequence[Dim]],
920
+ fill_value: Optional[Union[int, float]] = None,
921
+ out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
922
+ ) -> Tensor:
923
+ """
924
+ Scatters into new zero-tensor.
925
+ If entries in indices are duplicated, the corresponding values in source will be log-mean-exp'ed together.
926
+ This is like :func:`scatter` with ``mode="logmeanexp"``.
927
+
928
+ :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
929
+ :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
930
+ :param indices_dim:
931
+ :param fill_value:
932
+ :param out_dim: The indices target dim.
933
+ If not given, will be automatically determined as the sparse_dim from indices.
934
+ If multiple out dims, use indices into the merged out dims,
935
+ and then we use :func:`rf.split_dims` afterwards.
936
+ :return: [batch_dims..., out_dim(s)..., feature_dims...]
937
+ """
938
+ ones = rf.ones(dims=indices.dims, dtype=source.dtype, device=source.device)
939
+ counts = rf.scatter(ones, indices=indices, indices_dim=indices_dim, fill_value=1, out_dim=out_dim)
940
+ y = scatter_logsumexp(source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim)
941
+ return y - rf.log(counts)
942
+
943
+
861
944
  # noinspection PyShadowingBuiltins
862
945
  def slice(
863
946
  source: Tensor,
@@ -11,6 +11,7 @@ __all__ = [
11
11
  "set_requires_gradient",
12
12
  "gradient",
13
13
  "stop_gradient",
14
+ "stop_gradient_scope",
14
15
  "scaled_gradient",
15
16
  "scaled_gradient_ext",
16
17
  "gradient_checkpoint_scope",
@@ -42,6 +43,28 @@ def stop_gradient(source: Tensor) -> Tensor:
42
43
  return source._raw_backend.stop_gradient(source)
43
44
 
44
45
 
46
+ def stop_gradient_scope():
47
+ """
48
+ Create a stop gradient scope.
49
+ All tensors created within this scope will have their gradient stopped.
50
+
51
+ Example::
52
+
53
+ a = ...
54
+ b = ...
55
+ with stop_gradient_scope():
56
+ x = a + b
57
+ y = x * c
58
+
59
+ In this example, the tensor ``x`` will have its gradient stopped,
60
+ i.e. the gradient of ``x`` w.r.t. ``a`` and ``b`` will be zero.
61
+
62
+ :return: context manager which enables stopping the gradient. It supports __enter__ and __exit__,
63
+ and the intended usage is with the `with` statement.
64
+ """
65
+ return global_backend.stop_gradient_scope()
66
+
67
+
45
68
  def scaled_gradient(source: Tensor, scale: Union[float, Tensor]) -> Tensor:
46
69
  """
47
70
  :param source:
@@ -3,7 +3,7 @@ Backend for exposing PyTorch-specific functionality.
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Optional, Union, Sequence, Tuple, List, Dict, Generator
6
+ from typing import Optional, Union, Any, Sequence, Tuple, List, Dict, Generator
7
7
  import contextlib
8
8
  import torch
9
9
  import numpy
@@ -212,6 +212,11 @@ class TorchBackend(Backend[torch.Tensor]):
212
212
  out.raw_tensor = out.raw_tensor.detach()
213
213
  return out
214
214
 
215
+ @staticmethod
216
+ def stop_gradient_scope() -> Any:
217
+ """stop gradient scope"""
218
+ return torch.no_grad()
219
+
215
220
  @staticmethod
216
221
  def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
217
222
  """scaled gradient"""
@@ -1061,9 +1066,9 @@ class TorchBackend(Backend[torch.Tensor]):
1061
1066
  ) -> Tensor:
1062
1067
  """
1063
1068
  Scatters into new zero-tensor.
1064
- If entries in indices are duplicated, the corresponding values in source will be added together
1065
- (scatter_add in PyTorch).
1066
- (TF segment_sum can be implemented via this.)
1069
+ If entries in indices are duplicated, with mode="sum", the corresponding values in source will be added together
1070
+ (``scatter_add`` in PyTorch), otherwise min/max.
1071
+ (segment_sum can be implemented via this.)
1067
1072
 
1068
1073
  :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
1069
1074
  :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250221.114352
3
+ Version: 1.20250223.154045
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=VL6JCmrRQYGi4OdodEygiq3hJyM1C-73zvMTSusYpOs,5215
1
+ returnn/PKG-INFO,sha256=osW4TLxe1IbJ9J1E7K31tPJAgyAAEoVhILW6twz1beg,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=LlW75YDQH_DvPCMrSeF0bY52JGq9l4tJNA5mGTT5MQA,77
6
+ returnn/_setup_info_generated.py,sha256=ZkDu5IsuZj9TiGVIFC5h52LbiTy4KoJCJD-MAi4ZWmc,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -75,12 +75,12 @@ returnn/extern/graph_editor/subgraph.py,sha256=R3uIFqWgiL7L5S4YATm9o9a3wfEa_mSb4
75
75
  returnn/extern/graph_editor/transform.py,sha256=d9fEgu0JC342q0g9niVxRWMKzkQQA9mrrajBGcU1o_s,29349
76
76
  returnn/extern/graph_editor/util.py,sha256=QMrQeQZ7lJwsrNQub9tof0h3quEaoHiGJaZmogQ7jXE,18707
77
77
  returnn/frontend/__init__.py,sha256=2aS7nbxXniIrBp2DODl0xN0f3IJ_dX4Bi9ZlR7W5_DE,1472
78
- returnn/frontend/_backend.py,sha256=W3J3ZSOxonX6wk-wY2dX_aokXHpm1VQ1V0qSjllQxUM,50165
78
+ returnn/frontend/_backend.py,sha256=lRAtOT0oAkgc_WGYBUviGbgIH3Yet6D17sjlEJH56Pg,50327
79
79
  returnn/frontend/_cache.py,sha256=JAhi7L-raQ3A-NC3JUYDtdRTwT3BGJJGGZxrZ8MfEWQ,8403
80
80
  returnn/frontend/_numpy_backend.py,sha256=2oCtG0YCWL_89v4cD_jDj8em1O_Fp-_YWl5EblGi_yo,7858
81
81
  returnn/frontend/_random_journal.py,sha256=_ktP_mjgx8vtQQGX_DofdhewJj0aPiczefTWeemPkmo,5457
82
82
  returnn/frontend/_utils.py,sha256=4A3MSRM0i86J77550uR_AjcBEPu6nymLUZ9Xd1V3Fkc,12073
83
- returnn/frontend/array_.py,sha256=UHTQmb_cFsjVStAELcCqMkCbQNQiBiwN4gQZu6CloIA,44126
83
+ returnn/frontend/array_.py,sha256=x_OSKQ_WyUFqKWEJdf3dHc6bfifvkV_aiVsmaZVCEv0,47816
84
84
  returnn/frontend/attention.py,sha256=GKt-Xqnz8sIyXVrE0i4VCS7J2Wu7dmoH_BA0Cu8CrXQ,45769
85
85
  returnn/frontend/backend.py,sha256=iQ9w4xl8Ea7bgpb0VUaCKq50rV5Bl2E5J8Rhd-oqD_c,883
86
86
  returnn/frontend/build_from_dict.py,sha256=rfWa2rjjhIR_kIQED_nMrygrQBunS6unegzWTLVbC98,3017
@@ -93,7 +93,7 @@ returnn/frontend/device.py,sha256=K7Y1qoQcO4GIHgLkPLQWK-GVT8gKL8GwyQrmPo8LgBE,14
93
93
  returnn/frontend/dims.py,sha256=hKA7IQRB0DbohN1ngNw31W44BsyjdHCtYAccxOcumzQ,10872
94
94
  returnn/frontend/dropout.py,sha256=rsx3p5b0NblBfXXSQZTQFJ8jUUS3fj4Qzc39iffBMCA,5006
95
95
  returnn/frontend/dtype.py,sha256=Ooc5BrcNrTp6XShuFEV9g5V6-niuy4ImP_Lt_Qgq3jE,1886
96
- returnn/frontend/gradient.py,sha256=dOUvLqN-vxsvjKQfpfIvEYlx4TlpHkOk-p9hsB680iA,3376
96
+ returnn/frontend/gradient.py,sha256=G-Qv4gKGHYEeB92Zwco9ao4qjd6umZPUzQC4J-fbYWo,4033
97
97
  returnn/frontend/graph.py,sha256=PIv901WZ1rfTV0QGkyzBv6UxfWk9NsLGxdoJ5x9-8Xg,1818
98
98
  returnn/frontend/hooks.py,sha256=jYPbsb4gy5HORRZvKTEJbLcoJri5hOt5ADbhnTCytQo,5507
99
99
  returnn/frontend/init.py,sha256=bVB7bpghaY8DI_HL0mkB_9z95onWnIX2zlW4hlMYnRw,7494
@@ -216,7 +216,7 @@ returnn/torch/data/queued_data_iter.py,sha256=PoOsGHdHVZjTmcyfq_ZOw--P6hyfTdmAWI
216
216
  returnn/torch/data/returnn_dataset_wrapper.py,sha256=1Bw82-Ge_8m_DSDXZNqQ3zGDic2HQlp6jysELL0NVK0,7369
217
217
  returnn/torch/data/tensor_utils.py,sha256=-Teqi--LLbt6q_5mDRdoHZHmPgSdC83W706ukif_YiU,1284
218
218
  returnn/torch/frontend/__init__.py,sha256=AA48HZnC17ASuKA0EWy8loZ-Bib_yUtqF4T1wYvjst4,62
219
- returnn/torch/frontend/_backend.py,sha256=mjR6Ilt2zlnIO4_CpVPCLQ0XVJa_QmW3HsZtR2KT8yk,101110
219
+ returnn/torch/frontend/_backend.py,sha256=ZHeE5A9nPo6i2KShRRNkiqpIrz4DmA0g3QhWddzFikg,101274
220
220
  returnn/torch/frontend/_rand.py,sha256=1JgIkV2XmpgJD86zXZ-NCAe-QuoP2swr6NaS1oz3Qa8,1830
221
221
  returnn/torch/frontend/bridge.py,sha256=Z2_UW8AagezC7zsXDc5PKcd8G9WwisV7j9SWGHU0m4U,7840
222
222
  returnn/torch/frontend/raw_ops.py,sha256=lF0h-KtYYsdaaqQADylVZp9qzPskOOXA4MfmYDyx5IU,296
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250221.114352.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250221.114352.dist-info/METADATA,sha256=VL6JCmrRQYGi4OdodEygiq3hJyM1C-73zvMTSusYpOs,5215
258
- returnn-1.20250221.114352.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
- returnn-1.20250221.114352.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250221.114352.dist-info/RECORD,,
256
+ returnn-1.20250223.154045.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250223.154045.dist-info/METADATA,sha256=osW4TLxe1IbJ9J1E7K31tPJAgyAAEoVhILW6twz1beg,5215
258
+ returnn-1.20250223.154045.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
+ returnn-1.20250223.154045.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250223.154045.dist-info/RECORD,,