returnn 1.20250225.201207__py3-none-any.whl → 1.20250226.115259__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250225.201207
3
+ Version: 1.20250226.115259
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250225.201207'
2
- long_version = '1.20250225.201207+git.c7cfe6c'
1
+ version = '1.20250226.115259'
2
+ long_version = '1.20250226.115259+git.0d32534'
@@ -391,17 +391,17 @@ class Backend(Generic[T]):
391
391
  source: Tensor,
392
392
  *,
393
393
  dims: Sequence[Dim],
394
- out_dim: Optional[Dim] = None,
395
- ) -> Tuple[Tensor, Dim]:
394
+ out_dim: Dim,
395
+ ) -> Tensor:
396
396
  """
397
397
  Merges a list of axes into a single one. (Flatten the dims.)
398
398
  E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
399
399
  Or input is (batch, time, height, dim) and axes=(height,dim), then we get (batch, time, height*dim).
400
400
 
401
401
  :param source:
402
- :param dims:
403
- :param out_dim:
404
- :return: tensor, out_dim
402
+ :param dims: list of dims to merge. len(dims) >= 2
403
+ :param out_dim: resulting merged dim
404
+ :return: tensor
405
405
  """
406
406
  raise NotImplementedError
407
407
 
@@ -182,8 +182,29 @@ def merge_dims(
182
182
  else:
183
183
  out_dim = Dim(1, name="ext")
184
184
  return rf.expand_dim(source, out_dim), out_dim
185
+ if len(dims) == 1:
186
+ if out_dim is None or out_dim == dims[0]:
187
+ return source, dims[0]
188
+ return rf.replace_dim(source, in_dim=dims[0], out_dim=out_dim)
189
+ if out_dim is None:
190
+ out_dim = dims[0]
191
+ reset_dyn_size = False
192
+ for d in dims[1:]:
193
+ reset_dyn_size |= d.need_masking() and out_dim.capacity != 1
194
+ out_dim = out_dim * d
195
+ if reset_dyn_size:
196
+ # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
197
+ # This would then potentially discard some of the data in the tensor in subsequent operations,
198
+ # when masking is applied.
199
+ # Thus, discard the dynamic sizes, and just treat it as a flat dim with scalar dynamic size.
200
+ # https://github.com/rwth-i6/returnn/issues/1694
201
+ out_dim_size = dims[0].get_dim_value_tensor()
202
+ for d in dims[1:]:
203
+ out_dim_size *= d.get_dim_value_tensor()
204
+ assert isinstance(out_dim_size, Tensor) and out_dim_size.dims == () # scalar
205
+ out_dim.dyn_size_ext = out_dim_size
185
206
  # noinspection PyProtectedMember
186
- return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim)
207
+ return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim), out_dim
187
208
 
188
209
 
189
210
  def split_dims(
@@ -390,15 +390,15 @@ class _DimMixin:
390
390
  if dim_extra:
391
391
  dim_extra.cache_dyn_size_ext_dev.clear()
392
392
  dim_extra.cache_seq_mask.clear()
393
+ if dim.dyn_size_ext is not None or dim.dimension is None:
394
+ dim_extra.cache_dim_math.clear()
395
+ else:
396
+ dim_extra.cache_dim_math.clear_dynamic()
393
397
  if only_self:
394
398
  return
395
399
  if dim_extra:
396
400
  # Any dims via dim math could also contain raw tensors,
397
401
  # so iterate through them.
398
- if dim.dyn_size_ext is not None or dim.dimension is None:
399
- dim_extra.cache_dim_math.clear()
400
- else:
401
- dim_extra.cache_dim_math.clear_dynamic()
402
402
  queue += dim_extra.cache_dim_math.values()
403
403
  if dim_extra.same_as:
404
404
  queue.append(dim_extra.same_as)
@@ -241,8 +241,8 @@ class ReturnnLayersBackend(Backend[Layer]):
241
241
  source: Tensor,
242
242
  *,
243
243
  dims: Sequence[Dim],
244
- out_dim: Optional[Dim] = None,
245
- ) -> Tuple[Tensor, Dim]:
244
+ out_dim: Dim,
245
+ ) -> Tensor:
246
246
  """
247
247
  Merges a list of axes into a single one. (Flatten the dims.)
248
248
  E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
@@ -251,18 +251,14 @@ class ReturnnLayersBackend(Backend[Layer]):
251
251
  :param source:
252
252
  :param dims:
253
253
  :param out_dim:
254
- :return: tensor, out_dim
254
+ :return: tensor
255
255
  """
256
256
  if not isinstance(source, Tensor):
257
257
  raise TypeError(f"merge_dims: unexpected type for source {source!r}, need tensor")
258
- if out_dim is None:
259
- out_dim = dims[0]
260
- for d in dims[1:]:
261
- out_dim = out_dim * d
262
258
  layer = rfl.make_layer(
263
259
  {"class": "merge_dims", "from": source, "axes": dims, "out_dim": out_dim}, name="merge_dims"
264
260
  )
265
- return layer, out_dim
261
+ return layer
266
262
 
267
263
  @staticmethod
268
264
  def split_dims(
@@ -262,8 +262,8 @@ class TorchBackend(Backend[torch.Tensor]):
262
262
  source: Tensor,
263
263
  *,
264
264
  dims: Sequence[Dim],
265
- out_dim: Optional[Dim] = None,
266
- ) -> Tuple[Tensor, Dim]:
265
+ out_dim: Dim,
266
+ ) -> Tensor:
267
267
  """
268
268
  Merges a list of axes into a single one. (Flatten the dims.)
269
269
  E.g. input is (batch, width, height, dim) and dims=(width,height), then we get (batch, width*height, dim).
@@ -272,18 +272,12 @@ class TorchBackend(Backend[torch.Tensor]):
272
272
  :param source:
273
273
  :param dims:
274
274
  :param out_dim:
275
- :return: tensor, out_dim
275
+ :return: tensor
276
276
  """
277
- assert dims
278
- if len(dims) == 1:
279
- return source, dims[0]
277
+ assert len(dims) >= 2
280
278
  first_axis = min(source.dims.index(d) for d in dims)
281
279
  pre_dims = source.dims[:first_axis]
282
280
  post_dims = [d for d in source.dims if d not in dims and d not in pre_dims]
283
- if out_dim is None:
284
- out_dim = dims[0]
285
- for d in dims[1:]:
286
- out_dim = out_dim * d
287
281
  source = source.copy_transpose(tuple(pre_dims) + tuple(dims) + tuple(post_dims), allow_int=False)
288
282
  out = Tensor(
289
283
  "merge_dims",
@@ -295,7 +289,7 @@ class TorchBackend(Backend[torch.Tensor]):
295
289
  out.raw_tensor = torch.reshape(source.raw_tensor, out_shape)
296
290
  if source.feature_dim and source.feature_dim in dims:
297
291
  out.feature_dim = out_dim
298
- return out, out_dim
292
+ return out
299
293
 
300
294
  @staticmethod
301
295
  def split_dims(
@@ -39,6 +39,7 @@ from io import TextIOBase
39
39
  import traceback
40
40
  from types import FrameType
41
41
  import torch
42
+ import tree
42
43
 
43
44
  # noinspection PyProtectedMember
44
45
  from torch.utils._python_dispatch import TorchDispatchMode
@@ -96,6 +97,7 @@ def debug_inf_nan(
96
97
 
97
98
  # For efficiency, and to be less spammy
98
99
  _TraceFuncNameBlacklist = {
100
+ "aten::empty.memory_format",
99
101
  "aten::zeros_like",
100
102
  "aten::ones_like",
101
103
  "aten::full",
@@ -113,6 +115,7 @@ _TraceFuncNameBlacklist = {
113
115
  "aten::split_with_sizes",
114
116
  "aten::slice.Tensor",
115
117
  "aten::select.int",
118
+ "aten::max_pool2d_with_indices",
116
119
  }
117
120
 
118
121
 
@@ -140,19 +143,20 @@ class _TraceOps(TorchDispatchMode):
140
143
  if self.report_every_op_call:
141
144
  print(f"--- op {func.name()}", file=self.file)
142
145
  out = func(*args, **kwargs)
143
- if isinstance(out, torch.Tensor):
144
- with no_python_dispatcher():
145
- got_nan_inf_t = torch.stack([torch.isnan(out).any(), torch.isinf(out).any()]).cpu()
146
- got_nan = got_nan_inf_t[0].item()
147
- got_inf = got_nan_inf_t[1].item()
148
- if got_nan or got_inf:
149
- s = "/".join([s_ for s_, b in [("nan", got_nan), ("inf", got_inf)] if b])
150
- print(f"--> {s} in {func}: {out}", file=self.file)
151
- traceback.print_list(
152
- _extract_stack_up_to(skip_top_num_frames=1, root_frame=self.root_frame), file=self.file
153
- )
154
- if self.stop_reporting_after_first_inf_nan:
155
- self.enabled = False
146
+ for out_ in tree.flatten(out):
147
+ if isinstance(out_, torch.Tensor):
148
+ with no_python_dispatcher():
149
+ got_nan_inf_t = torch.stack([torch.isnan(out_).any(), torch.isinf(out_).any()]).cpu()
150
+ got_nan = got_nan_inf_t[0].item()
151
+ got_inf = got_nan_inf_t[1].item()
152
+ if got_nan or got_inf:
153
+ s = "/".join([s_ for s_, b in [("nan", got_nan), ("inf", got_inf)] if b])
154
+ print(f"--> {s} in {func}: {out_}", file=self.file)
155
+ traceback.print_list(
156
+ _extract_stack_up_to(skip_top_num_frames=1, root_frame=self.root_frame), file=self.file
157
+ )
158
+ if self.stop_reporting_after_first_inf_nan:
159
+ self.enabled = False
156
160
  return out
157
161
 
158
162
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250225.201207
3
+ Version: 1.20250226.115259
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=MlICTMbISeiq6sz_1NI8XnxGZLlQzE0eH9VfGGDjlKs,5215
1
+ returnn/PKG-INFO,sha256=2Ws--V5aicc3WJ-I6OrqPbbrvVNTH3Cnno6L7yeIyKY,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=DwJubgEQQUSxnLSgr9-UFixkYOeM2bKYKKxyIW_3L3w,77
6
+ returnn/_setup_info_generated.py,sha256=3ur2a8rg2h6MJe2vAo7Tq4axfkV1GYJMcaQdnsmshb8,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -75,12 +75,12 @@ returnn/extern/graph_editor/subgraph.py,sha256=R3uIFqWgiL7L5S4YATm9o9a3wfEa_mSb4
75
75
  returnn/extern/graph_editor/transform.py,sha256=d9fEgu0JC342q0g9niVxRWMKzkQQA9mrrajBGcU1o_s,29349
76
76
  returnn/extern/graph_editor/util.py,sha256=QMrQeQZ7lJwsrNQub9tof0h3quEaoHiGJaZmogQ7jXE,18707
77
77
  returnn/frontend/__init__.py,sha256=2aS7nbxXniIrBp2DODl0xN0f3IJ_dX4Bi9ZlR7W5_DE,1472
78
- returnn/frontend/_backend.py,sha256=VWTe2ps8UK9BQpbnZRqNfbesQ6PGH5WHqkEa4ai8btw,50353
78
+ returnn/frontend/_backend.py,sha256=TNkEdj9GKxJfSM1ZMQ_SdAQzn2TU7SQbG6JGdaWhUeI,50374
79
79
  returnn/frontend/_cache.py,sha256=JAhi7L-raQ3A-NC3JUYDtdRTwT3BGJJGGZxrZ8MfEWQ,8403
80
80
  returnn/frontend/_numpy_backend.py,sha256=2oCtG0YCWL_89v4cD_jDj8em1O_Fp-_YWl5EblGi_yo,7858
81
81
  returnn/frontend/_random_journal.py,sha256=_ktP_mjgx8vtQQGX_DofdhewJj0aPiczefTWeemPkmo,5457
82
82
  returnn/frontend/_utils.py,sha256=4A3MSRM0i86J77550uR_AjcBEPu6nymLUZ9Xd1V3Fkc,12073
83
- returnn/frontend/array_.py,sha256=CYk8lQinS2EDINBttl4UqSYP2BhqikeSjnbNy9Mzpx4,48013
83
+ returnn/frontend/array_.py,sha256=M5vCeH0nlwJ-zrdjbZpsMLN6StOLn0iM7PnXvGLLE3g,49154
84
84
  returnn/frontend/attention.py,sha256=GKt-Xqnz8sIyXVrE0i4VCS7J2Wu7dmoH_BA0Cu8CrXQ,45769
85
85
  returnn/frontend/backend.py,sha256=iQ9w4xl8Ea7bgpb0VUaCKq50rV5Bl2E5J8Rhd-oqD_c,883
86
86
  returnn/frontend/build_from_dict.py,sha256=rfWa2rjjhIR_kIQED_nMrygrQBunS6unegzWTLVbC98,3017
@@ -154,7 +154,7 @@ returnn/sprint/extern_interface.py,sha256=l-v1X-Yg0UpTFe7Y3c4FwWOqpSNuv9Oy5EzqlK
154
154
  returnn/sprint/interface.py,sha256=_IGNQlOFcJcwsSeVkKcM-y8g2NDJv07jFhii47KfWtg,36490
155
155
  returnn/tensor/README.md,sha256=X6BqcRLrPLPnwF9yR69uqIFrMnNluj9pBkOPHwNgzuo,501
156
156
  returnn/tensor/__init__.py,sha256=on6j5PEOQpck50UcsR4nJzJSDmoVy34z1Oq4efv6Ax0,154
157
- returnn/tensor/_dim_extra.py,sha256=ywfU-vMewufkDYeM1UE3Gfee3NhAUkoSZxvC6L-lkH8,122551
157
+ returnn/tensor/_dim_extra.py,sha256=kL_nnGNaRpKIQLlvCo6TJ35WynS_jIssNZusFmtOAE0,122551
158
158
  returnn/tensor/_tensor_extra.py,sha256=v8oacDyrNMlDTRF0XR0LcU04snr5I1D9_yidw1ZWKk4,164859
159
159
  returnn/tensor/_tensor_mixin_base.py,sha256=H5z86I0NejxrSgMH1c5oXQzBqS6L9HpvP4y7oegBaSc,643
160
160
  returnn/tensor/_tensor_op_overloads.py,sha256=kVVcnYtcZdW7Vjj78V1Im_yVX2M2r6dUTgeiAQZ37X0,5449
@@ -177,7 +177,7 @@ returnn/tf/sprint.py,sha256=Yqjh0-6sCWHpdDPQCzHKx7TwQCOjJyjfd0KHtnYdd-8,5471
177
177
  returnn/tf/updater.py,sha256=St4Z5iBjlkWaB6CiS-K1VNc_iLaan2e6-mVMTTPldzk,72034
178
178
  returnn/tf/frontend_layers/README.md,sha256=P4vVl_EK-4jT55m40mq-K4Nr9yFY0tJR5fmDzTHSDFE,1096
179
179
  returnn/tf/frontend_layers/__init__.py,sha256=MGUn7rv6fOefbtkX-5pq6fC1T6Y5h0oh1uOPSEcv1_I,506
180
- returnn/tf/frontend_layers/_backend.py,sha256=6bT_4fjfV0IRcFqcZ0kcWLx0eYZGRqAJDTEfWSRIFnA,47451
180
+ returnn/tf/frontend_layers/_backend.py,sha256=U7rbRY9XgMkxxyWY2D8KG-KesSOEGLCxn-Gl6dgwmPc,47277
181
181
  returnn/tf/frontend_layers/_utils.py,sha256=ijByaDOqPDod5mZC9EoTkt8PHBEODXHsWbkwDOF9XW4,4205
182
182
  returnn/tf/frontend_layers/cond.py,sha256=yQ2h5W0sgMZndJdrWv2EE9k9yIcspQ1U0HwBSh3hOKE,14830
183
183
  returnn/tf/frontend_layers/config_entry_points.py,sha256=t01RWOiaZohzuqPXX-MLV0P5yCOfE0dz-9dZ77_pK4c,5751
@@ -216,7 +216,7 @@ returnn/torch/data/queued_data_iter.py,sha256=PoOsGHdHVZjTmcyfq_ZOw--P6hyfTdmAWI
216
216
  returnn/torch/data/returnn_dataset_wrapper.py,sha256=1Bw82-Ge_8m_DSDXZNqQ3zGDic2HQlp6jysELL0NVK0,7369
217
217
  returnn/torch/data/tensor_utils.py,sha256=-Teqi--LLbt6q_5mDRdoHZHmPgSdC83W706ukif_YiU,1284
218
218
  returnn/torch/frontend/__init__.py,sha256=AA48HZnC17ASuKA0EWy8loZ-Bib_yUtqF4T1wYvjst4,62
219
- returnn/torch/frontend/_backend.py,sha256=8rCnNRoiUf_Sqmb1u2Y7Mf89Hmzd0LkrroLoXVKn6ww,101468
219
+ returnn/torch/frontend/_backend.py,sha256=TqyDWNP4XCvJNNGn8jyxaT8BOEjVE24QCUR3qsTIS3A,101242
220
220
  returnn/torch/frontend/_rand.py,sha256=1JgIkV2XmpgJD86zXZ-NCAe-QuoP2swr6NaS1oz3Qa8,1830
221
221
  returnn/torch/frontend/bridge.py,sha256=Z2_UW8AagezC7zsXDc5PKcd8G9WwisV7j9SWGHU0m4U,7840
222
222
  returnn/torch/frontend/raw_ops.py,sha256=lF0h-KtYYsdaaqQADylVZp9qzPskOOXA4MfmYDyx5IU,296
@@ -226,7 +226,7 @@ returnn/torch/optim/lion.py,sha256=jV_qfwyyO5HAgqW94caap-ALkVjU688RpRgkZyLNZ5Y,5
226
226
  returnn/torch/util/README.md,sha256=AW-6ueWhgcwDcm57md6sm227QXNkvLnlRLwaH7NlS-w,193
227
227
  returnn/torch/util/__init__.py,sha256=AOXYUjzPm0XrzFJCPAXo9Jj_FvqD1XH3FfKtho80Vl8,26
228
228
  returnn/torch/util/array_.py,sha256=ell3VZvn01SLtF9Pw2fvPzFNO-XDQ7tSB9VCrVSKmSA,2556
229
- returnn/torch/util/debug_inf_nan.py,sha256=v0IzLy4kRKBWChSV70O4x829QtEuXMwB9mBqAyE4O2o,6223
229
+ returnn/torch/util/debug_inf_nan.py,sha256=pXAHwgyn1aimLjD-XUblY2syBRCK0J20ioWgpvWfHvg,6400
230
230
  returnn/torch/util/diagnose_gpu.py,sha256=PYMmSk7iQ-jC3RXKKNXlYx1Q744C0LXqz0SB6ympwQg,5844
231
231
  returnn/torch/util/exception_helper.py,sha256=4e7YEf9D42aAUEkM3uSjnOxpNEYgtyPSpNV0-1L6PSU,4319
232
232
  returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko87ppIvRKAbtpQ,27995
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250225.201207.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250225.201207.dist-info/METADATA,sha256=MlICTMbISeiq6sz_1NI8XnxGZLlQzE0eH9VfGGDjlKs,5215
258
- returnn-1.20250225.201207.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
- returnn-1.20250225.201207.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250225.201207.dist-info/RECORD,,
256
+ returnn-1.20250226.115259.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250226.115259.dist-info/METADATA,sha256=2Ws--V5aicc3WJ-I6OrqPbbrvVNTH3Cnno6L7yeIyKY,5215
258
+ returnn-1.20250226.115259.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
+ returnn-1.20250226.115259.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250226.115259.dist-info/RECORD,,