returnn 1.20250220.200053__py3-none-any.whl → 1.20250223.154045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250220.200053
3
+ Version: 1.20250223.154045
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250220.200053'
2
- long_version = '1.20250220.200053+git.bb5c0aa'
1
+ version = '1.20250223.154045'
2
+ long_version = '1.20250223.154045+git.354cf31'
@@ -350,6 +350,11 @@ class Backend(Generic[T]):
350
350
  """
351
351
  raise NotImplementedError
352
352
 
353
+ @staticmethod
354
+ def stop_gradient_scope() -> Any:
355
+ """stop gradient scope"""
356
+ raise NotImplementedError
357
+
353
358
  @staticmethod
354
359
  def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
355
360
  """
@@ -905,9 +910,9 @@ class Backend(Generic[T]):
905
910
  ) -> Tensor:
906
911
  """
907
912
  Scatters into new zero-tensor.
908
- If entries in indices are duplicated, the corresponding values in source will be added together
909
- (scatter_add in PyTorch).
910
- (TF segment_sum can be implemented via this.)
913
+ If entries in indices are duplicated, with mode="sum", the corresponding values in source will be added together
914
+ (``scatter_add`` in PyTorch), otherwise min/max.
915
+ (segment_sum can be implemented via this.)
911
916
 
912
917
  :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
913
918
  :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
@@ -40,6 +40,8 @@ __all__ = [
40
40
  "gather",
41
41
  "scatter",
42
42
  "scatter_argmax",
43
+ "scatter_logsumexp",
44
+ "scatter_logmeanexp",
43
45
  "slice",
44
46
  "shift_right",
45
47
  "shift_left",
@@ -758,19 +760,20 @@ def scatter(
758
760
  ) -> Tensor:
759
761
  """
760
762
  Scatters into new zero-tensor.
761
- If entries in indices are duplicated, the corresponding values in source will be added together
762
- (scatter_add in PyTorch)
763
- with mode=="sum",
764
- or otherwise it will take the max/min.
763
+ If entries in indices are duplicated, with ``mode=="sum"``,
764
+ the corresponding values in source will be added together
765
+ (``scatter_add`` in PyTorch),
766
+ or otherwise it will take the respective reduction.
765
767
 
766
768
  ``scatter`` is the inverse of :func:`gather`.
767
769
 
768
- (TF segment_sum can be implemented via this.)
770
+ (segment_sum can be implemented via this.)
769
771
 
770
772
  :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
771
773
  :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
772
774
  :param indices_dim:
773
- :param mode: "sum" or "max" or "min". also see :func:`scatter_argmax`.
775
+ :param mode: "sum", "max", "min", "logsumexp", "logmeanexp", "argmax".
776
+ (Note: If you ever need mean, argmin, etc, please open an issue/PR.)
774
777
  :param fill_value:
775
778
  :param out_dim: The indices target dim.
776
779
  If not given, will be automatically determined as the sparse_dim from indices.
@@ -778,6 +781,16 @@ def scatter(
778
781
  and then we use :func:`rf.split_dims` afterwards.
779
782
  :return: [batch_dims..., out_dim(s)..., feature_dims...]
780
783
  """
784
+ if mode == "logsumexp":
785
+ return scatter_logsumexp(
786
+ source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim
787
+ )
788
+ if mode == "logmeanexp":
789
+ return scatter_logmeanexp(
790
+ source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim
791
+ )
792
+ if mode == "argmax":
793
+ return scatter_argmax(source, indices=indices, indices_dim=indices_dim, invalid_idx=fill_value, out_dim=out_dim)
781
794
  if not out_dim:
782
795
  assert isinstance(indices, Tensor) and indices.sparse_dim
783
796
  out_dim = indices.sparse_dim
@@ -858,6 +871,76 @@ def scatter_argmax(
858
871
  return out
859
872
 
860
873
 
874
+ def scatter_logsumexp(
875
+ source: Tensor,
876
+ *,
877
+ indices: Tensor,
878
+ indices_dim: Union[Dim, Sequence[Dim]],
879
+ fill_value: Optional[Union[int, float]] = None,
880
+ out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
881
+ ) -> Tensor:
882
+ """
883
+ Scatters into new zero-tensor.
884
+ If entries in indices are duplicated, the corresponding values in source will be log-sum-exp'ed together.
885
+ This is like :func:`scatter` with ``mode="logsumexp"``.
886
+
887
+ :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
888
+ :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
889
+ :param indices_dim:
890
+ :param fill_value:
891
+ :param out_dim: The indices target dim.
892
+ If not given, will be automatically determined as the sparse_dim from indices.
893
+ If multiple out dims, use indices into the merged out dims,
894
+ and then we use :func:`rf.split_dims` afterwards.
895
+ :return: [batch_dims..., out_dim(s)..., feature_dims...]
896
+ """
897
+ if not out_dim:
898
+ assert isinstance(indices, Tensor) and indices.sparse_dim
899
+ out_dim = indices.sparse_dim
900
+ with rf.stop_gradient_scope():
901
+ max_x = rf.scatter(source, indices=indices, indices_dim=indices_dim, mode="max", out_dim=out_dim) # [D_out,...]
902
+ max_x_ = rf.gather(max_x, indices=indices, axis=out_dim) # [D_src,...]
903
+ src_ = rf.exp(source - max_x_)
904
+ if fill_value is not None:
905
+ fill_value = rf.exp(fill_value - max_x_)
906
+ tensor = rf.scatter(
907
+ src_, indices=indices, indices_dim=indices_dim, mode="sum", fill_value=fill_value, out_dim=out_dim
908
+ )
909
+ tensor = rf.log(tensor)
910
+ tensor = rf.where(rf.is_neg_infinite(max_x), rf.zeros((), dtype=source.dtype, device=source.device), tensor)
911
+ tensor += max_x
912
+ return tensor
913
+
914
+
915
+ def scatter_logmeanexp(
916
+ source: Tensor,
917
+ *,
918
+ indices: Tensor,
919
+ indices_dim: Union[Dim, Sequence[Dim]],
920
+ fill_value: Optional[Union[int, float]] = None,
921
+ out_dim: Optional[Union[Dim, Sequence[Dim]]] = None,
922
+ ) -> Tensor:
923
+ """
924
+ Scatters into new zero-tensor.
925
+ If entries in indices are duplicated, the corresponding values in source will be log-mean-exp'ed together.
926
+ This is like :func:`scatter` with ``mode="logmeanexp"``.
927
+
928
+ :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
929
+ :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
930
+ :param indices_dim:
931
+ :param fill_value:
932
+ :param out_dim: The indices target dim.
933
+ If not given, will be automatically determined as the sparse_dim from indices.
934
+ If multiple out dims, use indices into the merged out dims,
935
+ and then we use :func:`rf.split_dims` afterwards.
936
+ :return: [batch_dims..., out_dim(s)..., feature_dims...]
937
+ """
938
+ ones = rf.ones(dims=indices.dims, dtype=source.dtype, device=source.device)
939
+ counts = rf.scatter(ones, indices=indices, indices_dim=indices_dim, fill_value=1, out_dim=out_dim)
940
+ y = scatter_logsumexp(source, indices=indices, indices_dim=indices_dim, fill_value=fill_value, out_dim=out_dim)
941
+ return y - rf.log(counts)
942
+
943
+
861
944
  # noinspection PyShadowingBuiltins
862
945
  def slice(
863
946
  source: Tensor,
@@ -11,6 +11,7 @@ __all__ = [
11
11
  "set_requires_gradient",
12
12
  "gradient",
13
13
  "stop_gradient",
14
+ "stop_gradient_scope",
14
15
  "scaled_gradient",
15
16
  "scaled_gradient_ext",
16
17
  "gradient_checkpoint_scope",
@@ -42,6 +43,28 @@ def stop_gradient(source: Tensor) -> Tensor:
42
43
  return source._raw_backend.stop_gradient(source)
43
44
 
44
45
 
46
+ def stop_gradient_scope():
47
+ """
48
+ Create a stop gradient scope.
49
+ All tensors created within this scope will have their gradient stopped.
50
+
51
+ Example::
52
+
53
+ a = ...
54
+ b = ...
55
+ with stop_gradient_scope():
56
+ x = a + b
57
+ y = x * c
58
+
59
+ In this example, the tensor ``x`` will have its gradient stopped,
60
+ i.e. the gradient of ``x`` w.r.t. ``a`` and ``b`` will be zero.
61
+
62
+ :return: context manager which enables stopping the gradient. It supports __enter__ and __exit__,
63
+ and the intended usage is with the `with` statement.
64
+ """
65
+ return global_backend.stop_gradient_scope()
66
+
67
+
45
68
  def scaled_gradient(source: Tensor, scale: Union[float, Tensor]) -> Tensor:
46
69
  """
47
70
  :param source:
@@ -3,10 +3,11 @@ torch.distributed utils
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Optional, Any, Dict
6
+ import ast
7
+ import logging
7
8
  import os
8
9
  import socket
9
- import logging
10
+ from typing import Optional, Any, Dict
10
11
 
11
12
  import torch
12
13
  from torch.nn.parallel import DistributedDataParallel
@@ -23,19 +24,31 @@ class DistributedContext:
23
24
  """
24
25
 
25
26
  def __init__(self, options: Dict[str, Any]):
26
- import torch.distributed as dist
27
-
28
27
  self._opts = CollectionReadCheckCovered(options)
29
28
 
30
- # when no backend is specified, both gloo and nccl backends will be created
31
- # the gloo backend will be used for collectives with CPU tensors and
32
- # the nccl backend will be used for collectives with CUDA tensors
33
- dist.init_process_group(backend=self._opts.get("backend", None))
29
+ # Subprocesses have issues initializing torch.distributed process groups.
30
+ #
31
+ # We therefore pass rank/size information of the process group via an env
32
+ # variable that is automatically inherited in any created subprocess.
33
+ env_var_name = "_RETURNN_TORCH_DISTRIBUTED_INIT_INFO"
34
+ prev_init_info = os.environ.get(env_var_name)
35
+ if prev_init_info:
36
+ self.prev_init_info = ast.literal_eval(prev_init_info)
37
+ self._rank = self.prev_init_info["rank"]
38
+ self._size = self.prev_init_info["size"]
39
+ else:
40
+ import torch.distributed as dist
41
+
42
+ # when no backend is specified, both gloo and nccl backends will be created
43
+ # the gloo backend will be used for collectives with CPU tensors and
44
+ # the nccl backend will be used for collectives with CUDA tensors
45
+ dist.init_process_group(backend=self._opts.get("backend", None))
46
+ self._rank = dist.get_rank()
47
+ self._size = dist.get_world_size()
48
+ os.environ[env_var_name] = repr({"rank": self._rank, "size": self._size})
34
49
 
35
50
  self._local_rank = int(os.environ["LOCAL_RANK"])
36
51
  self._local_size = int(os.environ["LOCAL_WORLD_SIZE"])
37
- self._rank = dist.get_rank()
38
- self._size = dist.get_world_size()
39
52
 
40
53
  _logger.info(
41
54
  "Torch distributed initialized. Hostname %s, pid %i, rank %i / size %i, local rank %s / local size %s."
@@ -123,9 +136,9 @@ _is_set_up = False
123
136
  _ctx = None # type: Optional[DistributedContext]
124
137
 
125
138
 
126
- def get_ctx(config=None) -> Optional[DistributedContext]:
139
+ def get_ctx(config: Optional[Config] = None) -> Optional[DistributedContext]:
127
140
  """
128
- :param Config|None config:
141
+ :param config:
129
142
  :returns: the global context if Torch distributed is enabled, or None otherwise.
130
143
  If we did not setup the context yet, it will automatically create it.
131
144
  """
@@ -3,7 +3,7 @@ Backend for exposing PyTorch-specific functionality.
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Optional, Union, Sequence, Tuple, List, Dict, Generator
6
+ from typing import Optional, Union, Any, Sequence, Tuple, List, Dict, Generator
7
7
  import contextlib
8
8
  import torch
9
9
  import numpy
@@ -212,6 +212,11 @@ class TorchBackend(Backend[torch.Tensor]):
212
212
  out.raw_tensor = out.raw_tensor.detach()
213
213
  return out
214
214
 
215
+ @staticmethod
216
+ def stop_gradient_scope() -> Any:
217
+ """stop gradient scope"""
218
+ return torch.no_grad()
219
+
215
220
  @staticmethod
216
221
  def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
217
222
  """scaled gradient"""
@@ -1061,9 +1066,9 @@ class TorchBackend(Backend[torch.Tensor]):
1061
1066
  ) -> Tensor:
1062
1067
  """
1063
1068
  Scatters into new zero-tensor.
1064
- If entries in indices are duplicated, the corresponding values in source will be added together
1065
- (scatter_add in PyTorch).
1066
- (TF segment_sum can be implemented via this.)
1069
+ If entries in indices are duplicated, with mode="sum", the corresponding values in source will be added together
1070
+ (``scatter_add`` in PyTorch), otherwise min/max.
1071
+ (segment_sum can be implemented via this.)
1067
1072
 
1068
1073
  :param source: [batch_dims..., indices_dim(s)..., feature_dims...]
1069
1074
  :param indices: [batch_dims..., indices_dim(s)...] -> out_dim
returnn/util/basic.py CHANGED
@@ -3773,9 +3773,9 @@ def should_write_to_disk(config):
3773
3773
  if config.typed_value("torch_distributed") is not None:
3774
3774
  assert BackendEngine.is_torch_selected(), "torch_distributed assumes PyTorch"
3775
3775
 
3776
- import torch.distributed
3776
+ import returnn.torch.distributed as torch_distributed
3777
3777
 
3778
- if torch.distributed.get_rank() != 0:
3778
+ if torch_distributed.get_ctx(config).rank() != 0:
3779
3779
  return False
3780
3780
  elif config.is_true("use_horovod"):
3781
3781
  assert BackendEngine.is_tensorflow_selected(), "use_horovod currently assumes TensorFlow"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250220.200053
3
+ Version: 1.20250223.154045
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=U58QGiF-75H5Ac8V3JUwKdPkzP3TPwuPkhfzHhpa7Vc,5215
1
+ returnn/PKG-INFO,sha256=osW4TLxe1IbJ9J1E7K31tPJAgyAAEoVhILW6twz1beg,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=OojdMrmzo4naqIdlDTwnSiMHtnmVuqlosY9_dqmm20c,77
6
+ returnn/_setup_info_generated.py,sha256=ZkDu5IsuZj9TiGVIFC5h52LbiTy4KoJCJD-MAi4ZWmc,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -75,12 +75,12 @@ returnn/extern/graph_editor/subgraph.py,sha256=R3uIFqWgiL7L5S4YATm9o9a3wfEa_mSb4
75
75
  returnn/extern/graph_editor/transform.py,sha256=d9fEgu0JC342q0g9niVxRWMKzkQQA9mrrajBGcU1o_s,29349
76
76
  returnn/extern/graph_editor/util.py,sha256=QMrQeQZ7lJwsrNQub9tof0h3quEaoHiGJaZmogQ7jXE,18707
77
77
  returnn/frontend/__init__.py,sha256=2aS7nbxXniIrBp2DODl0xN0f3IJ_dX4Bi9ZlR7W5_DE,1472
78
- returnn/frontend/_backend.py,sha256=W3J3ZSOxonX6wk-wY2dX_aokXHpm1VQ1V0qSjllQxUM,50165
78
+ returnn/frontend/_backend.py,sha256=lRAtOT0oAkgc_WGYBUviGbgIH3Yet6D17sjlEJH56Pg,50327
79
79
  returnn/frontend/_cache.py,sha256=JAhi7L-raQ3A-NC3JUYDtdRTwT3BGJJGGZxrZ8MfEWQ,8403
80
80
  returnn/frontend/_numpy_backend.py,sha256=2oCtG0YCWL_89v4cD_jDj8em1O_Fp-_YWl5EblGi_yo,7858
81
81
  returnn/frontend/_random_journal.py,sha256=_ktP_mjgx8vtQQGX_DofdhewJj0aPiczefTWeemPkmo,5457
82
82
  returnn/frontend/_utils.py,sha256=4A3MSRM0i86J77550uR_AjcBEPu6nymLUZ9Xd1V3Fkc,12073
83
- returnn/frontend/array_.py,sha256=UHTQmb_cFsjVStAELcCqMkCbQNQiBiwN4gQZu6CloIA,44126
83
+ returnn/frontend/array_.py,sha256=x_OSKQ_WyUFqKWEJdf3dHc6bfifvkV_aiVsmaZVCEv0,47816
84
84
  returnn/frontend/attention.py,sha256=GKt-Xqnz8sIyXVrE0i4VCS7J2Wu7dmoH_BA0Cu8CrXQ,45769
85
85
  returnn/frontend/backend.py,sha256=iQ9w4xl8Ea7bgpb0VUaCKq50rV5Bl2E5J8Rhd-oqD_c,883
86
86
  returnn/frontend/build_from_dict.py,sha256=rfWa2rjjhIR_kIQED_nMrygrQBunS6unegzWTLVbC98,3017
@@ -93,7 +93,7 @@ returnn/frontend/device.py,sha256=K7Y1qoQcO4GIHgLkPLQWK-GVT8gKL8GwyQrmPo8LgBE,14
93
93
  returnn/frontend/dims.py,sha256=hKA7IQRB0DbohN1ngNw31W44BsyjdHCtYAccxOcumzQ,10872
94
94
  returnn/frontend/dropout.py,sha256=rsx3p5b0NblBfXXSQZTQFJ8jUUS3fj4Qzc39iffBMCA,5006
95
95
  returnn/frontend/dtype.py,sha256=Ooc5BrcNrTp6XShuFEV9g5V6-niuy4ImP_Lt_Qgq3jE,1886
96
- returnn/frontend/gradient.py,sha256=dOUvLqN-vxsvjKQfpfIvEYlx4TlpHkOk-p9hsB680iA,3376
96
+ returnn/frontend/gradient.py,sha256=G-Qv4gKGHYEeB92Zwco9ao4qjd6umZPUzQC4J-fbYWo,4033
97
97
  returnn/frontend/graph.py,sha256=PIv901WZ1rfTV0QGkyzBv6UxfWk9NsLGxdoJ5x9-8Xg,1818
98
98
  returnn/frontend/hooks.py,sha256=jYPbsb4gy5HORRZvKTEJbLcoJri5hOt5ADbhnTCytQo,5507
99
99
  returnn/frontend/init.py,sha256=bVB7bpghaY8DI_HL0mkB_9z95onWnIX2zlW4hlMYnRw,7494
@@ -206,7 +206,7 @@ returnn/tf/util/ken_lm.py,sha256=R60UAoywriuDIeQ2Hk3Vm_waf2Hxxc88ofzEw6X6Sd4,173
206
206
  returnn/tf/util/open_fst.py,sha256=sZRDw4TbxvhGqpGdUJWy1ebvlZm4_RPhygpRw9uLAOQ,11265
207
207
  returnn/torch/README.md,sha256=jzJ2FpOHW02vxN69yKaV97C9LI-hmvjBglKfdZXIDdc,85
208
208
  returnn/torch/__init__.py,sha256=MHEUyNHB20Vy89uKAqZoj6FxJKF1Gq3HW-i6ra1pNcI,24
209
- returnn/torch/distributed.py,sha256=i13cUVjI7GxpO0TAresrNyCM0ZBAaf-cXNr09Fmg_2k,6266
209
+ returnn/torch/distributed.py,sha256=skFyutdVztxgTEk3HHJ8S83qRWbNpkNT8Tj16Ic0_hE,6981
210
210
  returnn/torch/engine.py,sha256=8BIpdcrpbJL9HrvCX-hISh-14zW9aSrHGvRWT9s0zOk,77103
211
211
  returnn/torch/updater.py,sha256=GqtBvZpElPVMm0lq84JPl4NVLFFETZAzAbR0rTomSao,28249
212
212
  returnn/torch/data/__init__.py,sha256=6cLNEi8KoGI12PF6akN7mI_mtjlx-0hcQAfMYoExwik,132
@@ -216,7 +216,7 @@ returnn/torch/data/queued_data_iter.py,sha256=PoOsGHdHVZjTmcyfq_ZOw--P6hyfTdmAWI
216
216
  returnn/torch/data/returnn_dataset_wrapper.py,sha256=1Bw82-Ge_8m_DSDXZNqQ3zGDic2HQlp6jysELL0NVK0,7369
217
217
  returnn/torch/data/tensor_utils.py,sha256=-Teqi--LLbt6q_5mDRdoHZHmPgSdC83W706ukif_YiU,1284
218
218
  returnn/torch/frontend/__init__.py,sha256=AA48HZnC17ASuKA0EWy8loZ-Bib_yUtqF4T1wYvjst4,62
219
- returnn/torch/frontend/_backend.py,sha256=mjR6Ilt2zlnIO4_CpVPCLQ0XVJa_QmW3HsZtR2KT8yk,101110
219
+ returnn/torch/frontend/_backend.py,sha256=ZHeE5A9nPo6i2KShRRNkiqpIrz4DmA0g3QhWddzFikg,101274
220
220
  returnn/torch/frontend/_rand.py,sha256=1JgIkV2XmpgJD86zXZ-NCAe-QuoP2swr6NaS1oz3Qa8,1830
221
221
  returnn/torch/frontend/bridge.py,sha256=Z2_UW8AagezC7zsXDc5PKcd8G9WwisV7j9SWGHU0m4U,7840
222
222
  returnn/torch/frontend/raw_ops.py,sha256=lF0h-KtYYsdaaqQADylVZp9qzPskOOXA4MfmYDyx5IU,296
@@ -233,7 +233,7 @@ returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko8
233
233
  returnn/torch/util/module.py,sha256=MXHIrF9Isu575DDJIa81212ULKwdqu1oOLxDVZecVSk,1693
234
234
  returnn/torch/util/scaled_gradient.py,sha256=3585VuNypBty-pW6r3BKK047H3MqZQSdMjXeYAb4cmU,3192
235
235
  returnn/util/__init__.py,sha256=UIG1qw4idqhW71BV60ha7h9PktxvEVcBIu0lYRossK8,336
236
- returnn/util/basic.py,sha256=nhCfxWwGL7FchgFW5x9V2OgXD0HtpN885NASdwfeKYg,142339
236
+ returnn/util/basic.py,sha256=__rtDp8crZfm0mEeAKsRxNCdWuBHh9OeOm8UO-X4CJU,142380
237
237
  returnn/util/better_exchook.py,sha256=MVMnuu6KoyqgvlMeQLQNTfdspcPR9MwigCXOpeTVqCI,62956
238
238
  returnn/util/bpe.py,sha256=LWFhICZsEOnMwNws0lybPNzKRX6rSr8yKCvP65vjl9Y,19656
239
239
  returnn/util/debug.py,sha256=wuRzdg9zB84WWCGyTjmRR_zYypu8gXxlc0nZ6si9OC8,28224
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250220.200053.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250220.200053.dist-info/METADATA,sha256=U58QGiF-75H5Ac8V3JUwKdPkzP3TPwuPkhfzHhpa7Vc,5215
258
- returnn-1.20250220.200053.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
- returnn-1.20250220.200053.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250220.200053.dist-info/RECORD,,
256
+ returnn-1.20250223.154045.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250223.154045.dist-info/METADATA,sha256=osW4TLxe1IbJ9J1E7K31tPJAgyAAEoVhILW6twz1beg,5215
258
+ returnn-1.20250223.154045.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
+ returnn-1.20250223.154045.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250223.154045.dist-info/RECORD,,