returnn 1.20250204.160236__py3-none-any.whl → 1.20250206.151011__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250204.160236
3
+ Version: 1.20250206.151011
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20250204.160236'
2
- long_version = '1.20250204.160236+git.e147886'
1
+ version = '1.20250206.151011'
2
+ long_version = '1.20250206.151011+git.6fa4b38'
@@ -35,6 +35,7 @@ class TransformerEncoder(rf.Module):
35
35
  layer: Optional[Union[TransformerEncoderLayer, rf.Module, type, Dict[str, Any], Any]] = None,
36
36
  layer_opts: Optional[Dict[str, Any]] = None,
37
37
  embed_dim: Optional[Dim] = None,
38
+ input_embedding: Union[None, rf.Module, type, Dict[str, Any]] = rf.Embedding,
38
39
  input_embedding_scale: float = None,
39
40
  input_dropout: float = None,
40
41
  sequential=rf.Sequential,
@@ -53,6 +54,7 @@ class TransformerEncoder(rf.Module):
53
54
  :param layer: an instance of :class:`TransformerEncoderLayer` or similar
54
55
  :param layer_opts: options for the encoder layer
55
56
  :param embed_dim: if given, will first have an embedding [vocab,embed] and then a linear [embed,model].
57
+ :param input_embedding:
56
58
  :param input_embedding_scale:
57
59
  :param input_dropout:
58
60
  :param sequential:
@@ -77,9 +79,15 @@ class TransformerEncoder(rf.Module):
77
79
  self.model_dim = model_dim
78
80
  self.embed_dim = embed_dim
79
81
 
80
- # We could make this optional or configurable if we ever need to.
81
- # Or maybe you would just have another separate implementation of this module then...
82
- self.input_embedding = rf.Embedding(vocab_dim, embed_dim or model_dim)
82
+ if input_embedding is None or isinstance(input_embedding, rf.Module):
83
+ pass
84
+ elif isinstance(input_embedding, type):
85
+ input_embedding: rf.Embedding = input_embedding(vocab_dim, embed_dim or model_dim)
86
+ elif isinstance(input_embedding, dict):
87
+ input_embedding = rf.build_from_dict(input_embedding, vocab_dim, embed_dim or model_dim)
88
+ else:
89
+ raise TypeError(f"unexpected input_embedding {input_embedding!r} type {type(input_embedding)}")
90
+ self.input_embedding = input_embedding
83
91
 
84
92
  self.input_embedding_proj = None
85
93
  if embed_dim:
@@ -88,17 +96,13 @@ class TransformerEncoder(rf.Module):
88
96
  if pos_enc is None:
89
97
  pass
90
98
  elif isinstance(pos_enc, dict):
91
- pos_enc = rf.build_from_dict(
92
- pos_enc, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
93
- )
99
+ pos_enc = rf.build_from_dict(pos_enc, feat_dim=embed_dim or model_dim, dtype=rf.get_default_float_dtype())
94
100
  elif isinstance(pos_enc, rf.Module):
95
101
  pass
96
102
  elif isinstance(pos_enc, FunctionType):
97
- pos_enc = functools.partial(
98
- pos_enc, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
99
- )
103
+ pos_enc = functools.partial(pos_enc, feat_dim=embed_dim or model_dim, dtype=rf.get_default_float_dtype())
100
104
  else:
101
- raise TypeError(f"unexpected pos_enc type {pos_enc!r}")
105
+ raise TypeError(f"unexpected pos_enc {pos_enc!r} type {type(pos_enc)}")
102
106
  self.pos_enc = pos_enc
103
107
  if input_embedding_scale is None:
104
108
  input_embedding_scale = model_dim.dimension**0.5
@@ -157,7 +161,11 @@ class TransformerEncoder(rf.Module):
157
161
  :param collected_outputs:
158
162
  :return: final encoder output, after final layer norm
159
163
  """
160
- decoded = self.input_embedding(source) * self.input_embedding_scale
164
+ if self.input_embedding is not None:
165
+ decoded = self.input_embedding(source) * self.input_embedding_scale
166
+ else:
167
+ assert self.model_dim in source.dims
168
+ decoded = source
161
169
  if self.pos_enc is not None:
162
170
  decoded = decoded + self.pos_enc(spatial_dim=spatial_dim)
163
171
  decoded = rf.dropout(decoded, self.input_dropout)
@@ -139,7 +139,7 @@ class TensorDict:
139
139
  """
140
140
  visited_dims = set()
141
141
  for key, value in self.data.items():
142
- assert key in raw_tensor_dict
142
+ assert key in raw_tensor_dict, f"key {key} not in raw_tensor_dict {list(raw_tensor_dict.keys())}"
143
143
  value.raw_tensor = raw_tensor_dict[key]
144
144
  for i, dim in enumerate(value.dims):
145
145
  dim: Dim
returnn/torch/engine.py CHANGED
@@ -3,7 +3,7 @@ Main engine for PyTorch
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Optional, Any, Union, Callable, Dict, Set
6
+ from typing import Optional, Any, Union, Callable, Dict, Set, Tuple
7
7
  from contextlib import nullcontext, ExitStack, contextmanager
8
8
 
9
9
  import gc
@@ -371,6 +371,7 @@ class Engine(EngineBase):
371
371
  total_data_size_packed = NumbersDict()
372
372
  total_data_size_padded = NumbersDict()
373
373
 
374
+ report_prefix = f"ep {self.epoch} train"
374
375
  try:
375
376
  while True:
376
377
  with torch.no_grad():
@@ -398,21 +399,13 @@ class Engine(EngineBase):
398
399
  {k: int(util.prod(extern_data_raw[k].shape[:2])) for k in keys_w_seq_len},
399
400
  )
400
401
 
401
- num_seqs_ = (
402
- int(extern_data_raw["num_seqs"]) if extern_data_raw.get("num_seqs", None) is not None else -1
402
+ num_seqs, last_seq_idx = _get_num_seqs_last_seq_idx(
403
+ report_prefix=report_prefix,
404
+ extern_data_raw=extern_data_raw,
405
+ step_idx=step_idx,
406
+ prev_num_seqs=num_seqs,
407
+ prev_last_seq_idx=last_seq_idx,
403
408
  )
404
- # Note: The batches might have been shuffled,
405
- # thus we cannot really assert that the seq_idx is always increasing.
406
- last_seq_idx = max(int(extern_data_raw["seq_idx"].max()), last_seq_idx)
407
- if step_idx == 0:
408
- if num_seqs_ >= 0:
409
- print(f"Epoch {self.epoch} num_seqs: {num_seqs_}", file=log.v5)
410
- num_seqs = num_seqs_
411
- elif num_seqs_ >= 0:
412
- assert num_seqs_ == num_seqs
413
- del num_seqs_
414
- if num_seqs is not None:
415
- assert last_seq_idx < num_seqs
416
409
  epoch_continuous = (self.epoch - 1 + (last_seq_idx + 1) / num_seqs) if num_seqs is not None else None
417
410
 
418
411
  # clear the gradients when every gradient accumulation loop starts
@@ -485,7 +478,7 @@ class Engine(EngineBase):
485
478
  accumulated_inv_norm_factors_dict += inv_norm_factors_dict
486
479
  eval_info = self._maybe_extend_losses_info(losses_dict / inv_norm_factors_dict)
487
480
  _print_process(
488
- f"ep {self.epoch} train",
481
+ report_prefix,
489
482
  step=step_idx,
490
483
  eval_info=dict(eval_info),
491
484
  step_duration=step_duration,
@@ -505,12 +498,35 @@ class Engine(EngineBase):
505
498
  file=log.v1,
506
499
  )
507
500
 
501
+ print("Checking for inf/nan in model parameters...", file=log.v1)
502
+ count_nan_inf_params = 0
503
+ for name, param in self._pt_model.named_parameters():
504
+ got_nan_inf_t = torch.stack([torch.isnan(param).any(), torch.isinf(param).any()]).cpu()
505
+ got_nan = got_nan_inf_t[0].item()
506
+ got_inf = got_nan_inf_t[1].item()
507
+ if got_nan or got_inf:
508
+ s = "/".join([s_ for s_, b in [("nan", got_nan), ("inf", got_inf)] if b])
509
+ print(f" {name} {param}: {s}", file=log.v1)
510
+ count_nan_inf_params += 1
511
+ if count_nan_inf_params == 0:
512
+ print("(No inf/nan in model parameters.)", file=log.v1)
513
+
508
514
  def _debug_func() -> torch.Tensor:
509
515
  self._run_step(extern_data, train_flag=True, train_func=True)
510
- return rf.get_run_ctx().total_loss()
516
+ loss = rf.get_run_ctx().total_loss()
517
+ assert isinstance(loss, Tensor)
518
+ return loss.raw_tensor
511
519
 
512
520
  print("Running debug_inf_nan...", file=log.v1)
513
521
  debug_inf_nan(_debug_func, with_grad=True)
522
+ if count_nan_inf_params > 0 and self.global_train_step == 1:
523
+ print(
524
+ "This was the second step, so likely the first step grad was broken."
525
+ " Try again with reset model...",
526
+ file=log.v1,
527
+ )
528
+ self._load_model()
529
+ debug_inf_nan(_debug_func, with_grad=True)
514
530
  raise Exception(f"Inf/nan score in step {step_idx}.")
515
531
 
516
532
  step_idx += 1
@@ -1253,6 +1269,8 @@ class Engine(EngineBase):
1253
1269
  new_dim.dyn_size_ext = _get_tensor_wo_batch_numpy(dim.dyn_size_ext)
1254
1270
  return new_dim
1255
1271
 
1272
+ num_seqs = None
1273
+ last_seq_idx = 0
1256
1274
  report_prefix = f"ep {self.epoch} {dataset.name} forward"
1257
1275
  with torch.no_grad():
1258
1276
  callback.init(model=self._orig_model)
@@ -1260,6 +1278,15 @@ class Engine(EngineBase):
1260
1278
  step_idx = 0
1261
1279
  for extern_data_raw in data_loader:
1262
1280
  step_begin_time = time.monotonic()
1281
+
1282
+ num_seqs, last_seq_idx = _get_num_seqs_last_seq_idx(
1283
+ report_prefix=report_prefix,
1284
+ extern_data_raw=extern_data_raw,
1285
+ step_idx=step_idx,
1286
+ prev_num_seqs=num_seqs,
1287
+ prev_last_seq_idx=last_seq_idx,
1288
+ )
1289
+
1263
1290
  if self._forward_step_expected_outputs:
1264
1291
  # Also resets any dyn dims, which might have been set in the prev step.
1265
1292
  self._forward_step_expected_outputs.reset_content()
@@ -1296,11 +1323,19 @@ class Engine(EngineBase):
1296
1323
  model_outputs_per_batch.data[k] = _get_tensor_wo_batch_numpy(v)
1297
1324
  callback.process_seq(seq_tag=seq_tag, outputs=model_outputs_per_batch)
1298
1325
 
1299
- elapsed_computation_time += time.monotonic() - step_begin_time
1326
+ step_end_time = time.monotonic()
1327
+ step_duration = step_end_time - step_begin_time
1328
+ elapsed_computation_time += step_duration
1329
+
1300
1330
  _print_process(
1301
1331
  report_prefix,
1302
1332
  step=step_idx,
1303
1333
  eval_info=None,
1334
+ step_duration=step_duration,
1335
+ start_elapsed=step_end_time - epoch_start_time,
1336
+ seq_idx=last_seq_idx,
1337
+ num_seqs=num_seqs,
1338
+ batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
1304
1339
  log_memory_usage_device=self._device if self._log_memory_usage else None,
1305
1340
  )
1306
1341
  step_idx += 1
@@ -1578,3 +1613,27 @@ def _get_total_grad_norm(model: torch.nn.Module, p: float) -> float:
1578
1613
  p=p,
1579
1614
  ).item()
1580
1615
  )
1616
+
1617
+
1618
+ def _get_num_seqs_last_seq_idx(
1619
+ *,
1620
+ report_prefix: str,
1621
+ extern_data_raw: Dict[str, Any],
1622
+ step_idx: int,
1623
+ prev_num_seqs: Optional[int],
1624
+ prev_last_seq_idx: int,
1625
+ ) -> Tuple[Optional[int], int]:
1626
+ num_seqs = prev_num_seqs
1627
+ num_seqs_ = int(extern_data_raw["num_seqs"]) if extern_data_raw.get("num_seqs", None) is not None else -1
1628
+ # Note: The batches might have been shuffled,
1629
+ # thus we cannot really assert that the seq_idx is always increasing.
1630
+ last_seq_idx = max(int(extern_data_raw["seq_idx"].max()), prev_last_seq_idx)
1631
+ if step_idx == 0:
1632
+ if num_seqs_ >= 0:
1633
+ print(f"{report_prefix} num_seqs: {num_seqs_}", file=log.v5)
1634
+ num_seqs = num_seqs_
1635
+ elif num_seqs_ >= 0:
1636
+ assert num_seqs_ == num_seqs
1637
+ if num_seqs is not None:
1638
+ assert last_seq_idx < num_seqs
1639
+ return num_seqs, last_seq_idx
@@ -52,6 +52,7 @@ def debug_inf_nan(
52
52
  *,
53
53
  with_grad: bool = False,
54
54
  report_every_op_call: bool = True,
55
+ stop_reporting_after_first_inf_nan: bool = True,
55
56
  file: Optional[Union[TextIO, TextIOBase]] = None,
56
57
  ):
57
58
  """
@@ -61,6 +62,7 @@ def debug_inf_nan(
61
62
  and we will call `loss = func(); loss.backward()`.
62
63
  :param with_grad: whether to compute and debug gradients for inf/nan.
63
64
  :param report_every_op_call: whether to report every op call.
65
+ :param stop_reporting_after_first_inf_nan: whether to stop reporting after the first inf/nan.
64
66
  :param file: where to write the output to. Default is stdout.
65
67
  """
66
68
 
@@ -69,13 +71,18 @@ def debug_inf_nan(
69
71
 
70
72
  # noinspection PyUnresolvedReferences,PyProtectedMember
71
73
  cur_frame: FrameType = sys._getframe()
72
- trace_ops = _TraceOps(root_frame=cur_frame, file=file, report_every_op_call=report_every_op_call)
74
+ trace_ops = _TraceOps(
75
+ root_frame=cur_frame,
76
+ file=file,
77
+ report_every_op_call=report_every_op_call,
78
+ stop_reporting_after_first_inf_nan=stop_reporting_after_first_inf_nan,
79
+ )
73
80
 
74
81
  if with_grad:
75
-
76
82
  with torch.autograd.detect_anomaly():
77
83
  with trace_ops: # currently only for forward (but we might want to trace the backward too)
78
84
  loss = func()
85
+ file.flush() # the backward detect_anomaly might screw up the output otherwise
79
86
  try:
80
87
  loss.backward()
81
88
  except RuntimeError as exc:
@@ -89,23 +96,46 @@ def debug_inf_nan(
89
96
 
90
97
  # For efficiency, and to be less spammy
91
98
  _TraceFuncNameBlacklist = {
92
- "aten::detach",
93
99
  "aten::zeros_like",
94
100
  "aten::ones_like",
101
+ "aten::full",
102
+ "aten::scalar_tensor", # when we deliberately create a scalar inf tensor
103
+ "aten::_local_scalar_dense",
104
+ "aten::where.self", # when we intentionally mask with inf
105
+ "aten::detach",
106
+ "aten::_to_copy",
107
+ "aten::clone",
108
+ "aten::stack",
109
+ "aten::view",
110
+ "aten::_unsafe_view",
111
+ "aten::permute",
112
+ "aten::t",
113
+ "aten::split_with_sizes",
114
+ "aten::slice.Tensor",
115
+ "aten::select.int",
95
116
  }
96
117
 
97
118
 
98
119
  class _TraceOps(TorchDispatchMode):
99
- def __init__(self, *, root_frame: FrameType, file: Union[TextIO, TextIOBase], report_every_op_call: bool = True):
120
+ def __init__(
121
+ self,
122
+ *,
123
+ root_frame: FrameType,
124
+ file: Union[TextIO, TextIOBase],
125
+ report_every_op_call: bool = True,
126
+ stop_reporting_after_first_inf_nan: bool = True,
127
+ ):
100
128
  super().__init__()
101
129
  self.root_frame = root_frame
102
130
  self.file = file
131
+ self.enabled = True
103
132
  self.report_every_op_call = report_every_op_call
133
+ self.stop_reporting_after_first_inf_nan = stop_reporting_after_first_inf_nan
104
134
 
105
135
  def __torch_dispatch__(self, func, types, args=(), kwargs=None):
106
136
  if kwargs is None:
107
137
  kwargs = {}
108
- if func.name() in _TraceFuncNameBlacklist:
138
+ if not self.enabled or func.name() in _TraceFuncNameBlacklist:
109
139
  return func(*args, **kwargs)
110
140
  if self.report_every_op_call:
111
141
  print(f"--- op {func.name()}", file=self.file)
@@ -121,6 +151,8 @@ class _TraceOps(TorchDispatchMode):
121
151
  traceback.print_list(
122
152
  _extract_stack_up_to(skip_top_num_frames=1, root_frame=self.root_frame), file=self.file
123
153
  )
154
+ if self.stop_reporting_after_first_inf_nan:
155
+ self.enabled = False
124
156
  return out
125
157
 
126
158
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250204.160236
3
+ Version: 1.20250206.151011
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=tVaxTG1KNp2EVd4-m0vHijnHu6CbjU8wpugsPQKty_M,5215
1
+ returnn/PKG-INFO,sha256=BbQPkoVha1AYEcED8txzZyyyDiJt3J29FBKlYy1rTYc,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=6h1pDgIqJ8MnrBvdiNdCUPZ56ZcnQn3Wg5MSw9gwXGs,77
6
+ returnn/_setup_info_generated.py,sha256=9T1yfQUP7ASjffpzcwvOLEGNWMdiwS4EmjqY_bI2EdY,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -141,7 +141,7 @@ returnn/frontend/encoder/base.py,sha256=A759EwCYAmSi-kzXz1vaTjR2l59TvNGQlzaNdp3U
141
141
  returnn/frontend/encoder/conformer.py,sha256=YPtH0Clx2QrKOoxbtUSkYR7QiDp-EYmoOcGc_gc2ZEk,19778
142
142
  returnn/frontend/encoder/conformer_v2.py,sha256=vAYdT8m2Zzg3IIZZafeccClFHU1_c9T-EgBOsHadQPA,7701
143
143
  returnn/frontend/encoder/e_branchformer.py,sha256=zEla-iXJciK7bCenlTwsPB8dXo_VPMlFm2xc3op_lPY,12278
144
- returnn/frontend/encoder/transformer.py,sha256=k-tJjp5ymJ7QzKjyQdKVBfHVCw1-mJTfIzhIpGosxDs,11066
144
+ returnn/frontend/encoder/transformer.py,sha256=Jj0mF1D2MohOk-9sGYdsLtVW_86fwoq4pKWCdPMvPR8,11580
145
145
  returnn/import_/__init__.py,sha256=L2dKxWCcn0fz_7H7OS-zw5i5Yrljjjh_d61dEcFP_JY,243
146
146
  returnn/import_/common.py,sha256=0cmvyd7NtMLH55IskEoSDtkcMwChxLhauV2UZ4mK68I,8148
147
147
  returnn/import_/git.py,sha256=IXBVOybQAHf5OlMfVY6oZ-7eiDYPG0OR7MyDJKcVHSM,13961
@@ -162,7 +162,7 @@ returnn/tensor/control_flow_ctx.py,sha256=L9e32AfYDUDgsEDHL07thSFyYFqwhyVSqzE_bM
162
162
  returnn/tensor/dim.py,sha256=652DlcSe6o6l5OyY5xt9Yigij_Xry-ToG9AemMX3roY,4208
163
163
  returnn/tensor/marked_dim.py,sha256=Ae2hQIb5QixRU2gDhQEm0tmYt8TmomWoGERB414jR8o,1884
164
164
  returnn/tensor/tensor.py,sha256=bisF7j3rU5Rvx8C8S57C9hGo2jgWwTaQ6wc_Db7Mwpw,9087
165
- returnn/tensor/tensor_dict.py,sha256=0QLUnIqc0za3bk2ytU4Cdmri2Z732O6BOc6hW1dYE8Q,7078
165
+ returnn/tensor/tensor_dict.py,sha256=WTqMefemeHQG381MVUjvHMmYVd2TV9IQ0qU4i_XJi3c,7146
166
166
  returnn/tensor/utils.py,sha256=B6_XyNTXPIyLxWk061Qo-Md8_DnINGdVwpXJF6pahBk,9772
167
167
  returnn/tf/__init__.py,sha256=X4g2LFCFTl0uiybMRkfBY8AYkgMa6HX0vVxxTk0nMiE,88
168
168
  returnn/tf/compat.py,sha256=NkAkdlR37m2d9qh3i33sIfEGilOaFBeCofAQpQwnZpY,1632
@@ -207,7 +207,7 @@ returnn/tf/util/open_fst.py,sha256=sZRDw4TbxvhGqpGdUJWy1ebvlZm4_RPhygpRw9uLAOQ,1
207
207
  returnn/torch/README.md,sha256=jzJ2FpOHW02vxN69yKaV97C9LI-hmvjBglKfdZXIDdc,85
208
208
  returnn/torch/__init__.py,sha256=MHEUyNHB20Vy89uKAqZoj6FxJKF1Gq3HW-i6ra1pNcI,24
209
209
  returnn/torch/distributed.py,sha256=i13cUVjI7GxpO0TAresrNyCM0ZBAaf-cXNr09Fmg_2k,6266
210
- returnn/torch/engine.py,sha256=eWWHk_wOEV8ysLx8VkQHnA2613uOkEx1-Ibp-YGDncw,73615
210
+ returnn/torch/engine.py,sha256=neM-AL7XQLpZ3V1K4ziqVmij19ey1k2MpLCaFXATOpg,76301
211
211
  returnn/torch/updater.py,sha256=GqtBvZpElPVMm0lq84JPl4NVLFFETZAzAbR0rTomSao,28249
212
212
  returnn/torch/data/__init__.py,sha256=6cLNEi8KoGI12PF6akN7mI_mtjlx-0hcQAfMYoExwik,132
213
213
  returnn/torch/data/extern_data.py,sha256=_uT_9_gd5HIh1IoRsrebVG-nufSnb7fgC5jyU05GxJg,7580
@@ -226,7 +226,7 @@ returnn/torch/optim/lion.py,sha256=jV_qfwyyO5HAgqW94caap-ALkVjU688RpRgkZyLNZ5Y,5
226
226
  returnn/torch/util/README.md,sha256=AW-6ueWhgcwDcm57md6sm227QXNkvLnlRLwaH7NlS-w,193
227
227
  returnn/torch/util/__init__.py,sha256=AOXYUjzPm0XrzFJCPAXo9Jj_FvqD1XH3FfKtho80Vl8,26
228
228
  returnn/torch/util/array_.py,sha256=ell3VZvn01SLtF9Pw2fvPzFNO-XDQ7tSB9VCrVSKmSA,2556
229
- returnn/torch/util/debug_inf_nan.py,sha256=UnCU-Yt0UC2vzpbXVs3cDUrR4pa3F6X_CWHEBaKqDcM,5113
229
+ returnn/torch/util/debug_inf_nan.py,sha256=v0IzLy4kRKBWChSV70O4x829QtEuXMwB9mBqAyE4O2o,6223
230
230
  returnn/torch/util/diagnose_gpu.py,sha256=PYMmSk7iQ-jC3RXKKNXlYx1Q744C0LXqz0SB6ympwQg,5844
231
231
  returnn/torch/util/exception_helper.py,sha256=4e7YEf9D42aAUEkM3uSjnOxpNEYgtyPSpNV0-1L6PSU,4319
232
232
  returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko87ppIvRKAbtpQ,27995
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
253
253
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
254
254
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
255
255
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
256
- returnn-1.20250204.160236.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
- returnn-1.20250204.160236.dist-info/METADATA,sha256=tVaxTG1KNp2EVd4-m0vHijnHu6CbjU8wpugsPQKty_M,5215
258
- returnn-1.20250204.160236.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
- returnn-1.20250204.160236.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
- returnn-1.20250204.160236.dist-info/RECORD,,
256
+ returnn-1.20250206.151011.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
257
+ returnn-1.20250206.151011.dist-info/METADATA,sha256=BbQPkoVha1AYEcED8txzZyyyDiJt3J29FBKlYy1rTYc,5215
258
+ returnn-1.20250206.151011.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
259
+ returnn-1.20250206.151011.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
260
+ returnn-1.20250206.151011.dist-info/RECORD,,