returnn 1.20250204.160236__py3-none-any.whl → 1.20250206.151011__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- returnn/PKG-INFO +1 -1
- returnn/_setup_info_generated.py +2 -2
- returnn/frontend/encoder/transformer.py +19 -11
- returnn/tensor/tensor_dict.py +1 -1
- returnn/torch/engine.py +77 -18
- returnn/torch/util/debug_inf_nan.py +37 -5
- {returnn-1.20250204.160236.dist-info → returnn-1.20250206.151011.dist-info}/METADATA +1 -1
- {returnn-1.20250204.160236.dist-info → returnn-1.20250206.151011.dist-info}/RECORD +11 -11
- {returnn-1.20250204.160236.dist-info → returnn-1.20250206.151011.dist-info}/LICENSE +0 -0
- {returnn-1.20250204.160236.dist-info → returnn-1.20250206.151011.dist-info}/WHEEL +0 -0
- {returnn-1.20250204.160236.dist-info → returnn-1.20250206.151011.dist-info}/top_level.txt +0 -0
returnn/PKG-INFO
CHANGED
returnn/_setup_info_generated.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
version = '1.
|
|
2
|
-
long_version = '1.
|
|
1
|
+
version = '1.20250206.151011'
|
|
2
|
+
long_version = '1.20250206.151011+git.6fa4b38'
|
|
@@ -35,6 +35,7 @@ class TransformerEncoder(rf.Module):
|
|
|
35
35
|
layer: Optional[Union[TransformerEncoderLayer, rf.Module, type, Dict[str, Any], Any]] = None,
|
|
36
36
|
layer_opts: Optional[Dict[str, Any]] = None,
|
|
37
37
|
embed_dim: Optional[Dim] = None,
|
|
38
|
+
input_embedding: Union[None, rf.Module, type, Dict[str, Any]] = rf.Embedding,
|
|
38
39
|
input_embedding_scale: float = None,
|
|
39
40
|
input_dropout: float = None,
|
|
40
41
|
sequential=rf.Sequential,
|
|
@@ -53,6 +54,7 @@ class TransformerEncoder(rf.Module):
|
|
|
53
54
|
:param layer: an instance of :class:`TransformerEncoderLayer` or similar
|
|
54
55
|
:param layer_opts: options for the encoder layer
|
|
55
56
|
:param embed_dim: if given, will first have an embedding [vocab,embed] and then a linear [embed,model].
|
|
57
|
+
:param input_embedding:
|
|
56
58
|
:param input_embedding_scale:
|
|
57
59
|
:param input_dropout:
|
|
58
60
|
:param sequential:
|
|
@@ -77,9 +79,15 @@ class TransformerEncoder(rf.Module):
|
|
|
77
79
|
self.model_dim = model_dim
|
|
78
80
|
self.embed_dim = embed_dim
|
|
79
81
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
82
|
+
if input_embedding is None or isinstance(input_embedding, rf.Module):
|
|
83
|
+
pass
|
|
84
|
+
elif isinstance(input_embedding, type):
|
|
85
|
+
input_embedding: rf.Embedding = input_embedding(vocab_dim, embed_dim or model_dim)
|
|
86
|
+
elif isinstance(input_embedding, dict):
|
|
87
|
+
input_embedding = rf.build_from_dict(input_embedding, vocab_dim, embed_dim or model_dim)
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError(f"unexpected input_embedding {input_embedding!r} type {type(input_embedding)}")
|
|
90
|
+
self.input_embedding = input_embedding
|
|
83
91
|
|
|
84
92
|
self.input_embedding_proj = None
|
|
85
93
|
if embed_dim:
|
|
@@ -88,17 +96,13 @@ class TransformerEncoder(rf.Module):
|
|
|
88
96
|
if pos_enc is None:
|
|
89
97
|
pass
|
|
90
98
|
elif isinstance(pos_enc, dict):
|
|
91
|
-
pos_enc = rf.build_from_dict(
|
|
92
|
-
pos_enc, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
|
|
93
|
-
)
|
|
99
|
+
pos_enc = rf.build_from_dict(pos_enc, feat_dim=embed_dim or model_dim, dtype=rf.get_default_float_dtype())
|
|
94
100
|
elif isinstance(pos_enc, rf.Module):
|
|
95
101
|
pass
|
|
96
102
|
elif isinstance(pos_enc, FunctionType):
|
|
97
|
-
pos_enc = functools.partial(
|
|
98
|
-
pos_enc, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
|
|
99
|
-
)
|
|
103
|
+
pos_enc = functools.partial(pos_enc, feat_dim=embed_dim or model_dim, dtype=rf.get_default_float_dtype())
|
|
100
104
|
else:
|
|
101
|
-
raise TypeError(f"unexpected pos_enc
|
|
105
|
+
raise TypeError(f"unexpected pos_enc {pos_enc!r} type {type(pos_enc)}")
|
|
102
106
|
self.pos_enc = pos_enc
|
|
103
107
|
if input_embedding_scale is None:
|
|
104
108
|
input_embedding_scale = model_dim.dimension**0.5
|
|
@@ -157,7 +161,11 @@ class TransformerEncoder(rf.Module):
|
|
|
157
161
|
:param collected_outputs:
|
|
158
162
|
:return: final encoder output, after final layer norm
|
|
159
163
|
"""
|
|
160
|
-
|
|
164
|
+
if self.input_embedding is not None:
|
|
165
|
+
decoded = self.input_embedding(source) * self.input_embedding_scale
|
|
166
|
+
else:
|
|
167
|
+
assert self.model_dim in source.dims
|
|
168
|
+
decoded = source
|
|
161
169
|
if self.pos_enc is not None:
|
|
162
170
|
decoded = decoded + self.pos_enc(spatial_dim=spatial_dim)
|
|
163
171
|
decoded = rf.dropout(decoded, self.input_dropout)
|
returnn/tensor/tensor_dict.py
CHANGED
|
@@ -139,7 +139,7 @@ class TensorDict:
|
|
|
139
139
|
"""
|
|
140
140
|
visited_dims = set()
|
|
141
141
|
for key, value in self.data.items():
|
|
142
|
-
assert key in raw_tensor_dict
|
|
142
|
+
assert key in raw_tensor_dict, f"key {key} not in raw_tensor_dict {list(raw_tensor_dict.keys())}"
|
|
143
143
|
value.raw_tensor = raw_tensor_dict[key]
|
|
144
144
|
for i, dim in enumerate(value.dims):
|
|
145
145
|
dim: Dim
|
returnn/torch/engine.py
CHANGED
|
@@ -3,7 +3,7 @@ Main engine for PyTorch
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
|
-
from typing import Optional, Any, Union, Callable, Dict, Set
|
|
6
|
+
from typing import Optional, Any, Union, Callable, Dict, Set, Tuple
|
|
7
7
|
from contextlib import nullcontext, ExitStack, contextmanager
|
|
8
8
|
|
|
9
9
|
import gc
|
|
@@ -371,6 +371,7 @@ class Engine(EngineBase):
|
|
|
371
371
|
total_data_size_packed = NumbersDict()
|
|
372
372
|
total_data_size_padded = NumbersDict()
|
|
373
373
|
|
|
374
|
+
report_prefix = f"ep {self.epoch} train"
|
|
374
375
|
try:
|
|
375
376
|
while True:
|
|
376
377
|
with torch.no_grad():
|
|
@@ -398,21 +399,13 @@ class Engine(EngineBase):
|
|
|
398
399
|
{k: int(util.prod(extern_data_raw[k].shape[:2])) for k in keys_w_seq_len},
|
|
399
400
|
)
|
|
400
401
|
|
|
401
|
-
|
|
402
|
-
|
|
402
|
+
num_seqs, last_seq_idx = _get_num_seqs_last_seq_idx(
|
|
403
|
+
report_prefix=report_prefix,
|
|
404
|
+
extern_data_raw=extern_data_raw,
|
|
405
|
+
step_idx=step_idx,
|
|
406
|
+
prev_num_seqs=num_seqs,
|
|
407
|
+
prev_last_seq_idx=last_seq_idx,
|
|
403
408
|
)
|
|
404
|
-
# Note: The batches might have been shuffled,
|
|
405
|
-
# thus we cannot really assert that the seq_idx is always increasing.
|
|
406
|
-
last_seq_idx = max(int(extern_data_raw["seq_idx"].max()), last_seq_idx)
|
|
407
|
-
if step_idx == 0:
|
|
408
|
-
if num_seqs_ >= 0:
|
|
409
|
-
print(f"Epoch {self.epoch} num_seqs: {num_seqs_}", file=log.v5)
|
|
410
|
-
num_seqs = num_seqs_
|
|
411
|
-
elif num_seqs_ >= 0:
|
|
412
|
-
assert num_seqs_ == num_seqs
|
|
413
|
-
del num_seqs_
|
|
414
|
-
if num_seqs is not None:
|
|
415
|
-
assert last_seq_idx < num_seqs
|
|
416
409
|
epoch_continuous = (self.epoch - 1 + (last_seq_idx + 1) / num_seqs) if num_seqs is not None else None
|
|
417
410
|
|
|
418
411
|
# clear the gradients when every gradient accumulation loop starts
|
|
@@ -485,7 +478,7 @@ class Engine(EngineBase):
|
|
|
485
478
|
accumulated_inv_norm_factors_dict += inv_norm_factors_dict
|
|
486
479
|
eval_info = self._maybe_extend_losses_info(losses_dict / inv_norm_factors_dict)
|
|
487
480
|
_print_process(
|
|
488
|
-
|
|
481
|
+
report_prefix,
|
|
489
482
|
step=step_idx,
|
|
490
483
|
eval_info=dict(eval_info),
|
|
491
484
|
step_duration=step_duration,
|
|
@@ -505,12 +498,35 @@ class Engine(EngineBase):
|
|
|
505
498
|
file=log.v1,
|
|
506
499
|
)
|
|
507
500
|
|
|
501
|
+
print("Checking for inf/nan in model parameters...", file=log.v1)
|
|
502
|
+
count_nan_inf_params = 0
|
|
503
|
+
for name, param in self._pt_model.named_parameters():
|
|
504
|
+
got_nan_inf_t = torch.stack([torch.isnan(param).any(), torch.isinf(param).any()]).cpu()
|
|
505
|
+
got_nan = got_nan_inf_t[0].item()
|
|
506
|
+
got_inf = got_nan_inf_t[1].item()
|
|
507
|
+
if got_nan or got_inf:
|
|
508
|
+
s = "/".join([s_ for s_, b in [("nan", got_nan), ("inf", got_inf)] if b])
|
|
509
|
+
print(f" {name} {param}: {s}", file=log.v1)
|
|
510
|
+
count_nan_inf_params += 1
|
|
511
|
+
if count_nan_inf_params == 0:
|
|
512
|
+
print("(No inf/nan in model parameters.)", file=log.v1)
|
|
513
|
+
|
|
508
514
|
def _debug_func() -> torch.Tensor:
|
|
509
515
|
self._run_step(extern_data, train_flag=True, train_func=True)
|
|
510
|
-
|
|
516
|
+
loss = rf.get_run_ctx().total_loss()
|
|
517
|
+
assert isinstance(loss, Tensor)
|
|
518
|
+
return loss.raw_tensor
|
|
511
519
|
|
|
512
520
|
print("Running debug_inf_nan...", file=log.v1)
|
|
513
521
|
debug_inf_nan(_debug_func, with_grad=True)
|
|
522
|
+
if count_nan_inf_params > 0 and self.global_train_step == 1:
|
|
523
|
+
print(
|
|
524
|
+
"This was the second step, so likely the first step grad was broken."
|
|
525
|
+
" Try again with reset model...",
|
|
526
|
+
file=log.v1,
|
|
527
|
+
)
|
|
528
|
+
self._load_model()
|
|
529
|
+
debug_inf_nan(_debug_func, with_grad=True)
|
|
514
530
|
raise Exception(f"Inf/nan score in step {step_idx}.")
|
|
515
531
|
|
|
516
532
|
step_idx += 1
|
|
@@ -1253,6 +1269,8 @@ class Engine(EngineBase):
|
|
|
1253
1269
|
new_dim.dyn_size_ext = _get_tensor_wo_batch_numpy(dim.dyn_size_ext)
|
|
1254
1270
|
return new_dim
|
|
1255
1271
|
|
|
1272
|
+
num_seqs = None
|
|
1273
|
+
last_seq_idx = 0
|
|
1256
1274
|
report_prefix = f"ep {self.epoch} {dataset.name} forward"
|
|
1257
1275
|
with torch.no_grad():
|
|
1258
1276
|
callback.init(model=self._orig_model)
|
|
@@ -1260,6 +1278,15 @@ class Engine(EngineBase):
|
|
|
1260
1278
|
step_idx = 0
|
|
1261
1279
|
for extern_data_raw in data_loader:
|
|
1262
1280
|
step_begin_time = time.monotonic()
|
|
1281
|
+
|
|
1282
|
+
num_seqs, last_seq_idx = _get_num_seqs_last_seq_idx(
|
|
1283
|
+
report_prefix=report_prefix,
|
|
1284
|
+
extern_data_raw=extern_data_raw,
|
|
1285
|
+
step_idx=step_idx,
|
|
1286
|
+
prev_num_seqs=num_seqs,
|
|
1287
|
+
prev_last_seq_idx=last_seq_idx,
|
|
1288
|
+
)
|
|
1289
|
+
|
|
1263
1290
|
if self._forward_step_expected_outputs:
|
|
1264
1291
|
# Also resets any dyn dims, which might have been set in the prev step.
|
|
1265
1292
|
self._forward_step_expected_outputs.reset_content()
|
|
@@ -1296,11 +1323,19 @@ class Engine(EngineBase):
|
|
|
1296
1323
|
model_outputs_per_batch.data[k] = _get_tensor_wo_batch_numpy(v)
|
|
1297
1324
|
callback.process_seq(seq_tag=seq_tag, outputs=model_outputs_per_batch)
|
|
1298
1325
|
|
|
1299
|
-
|
|
1326
|
+
step_end_time = time.monotonic()
|
|
1327
|
+
step_duration = step_end_time - step_begin_time
|
|
1328
|
+
elapsed_computation_time += step_duration
|
|
1329
|
+
|
|
1300
1330
|
_print_process(
|
|
1301
1331
|
report_prefix,
|
|
1302
1332
|
step=step_idx,
|
|
1303
1333
|
eval_info=None,
|
|
1334
|
+
step_duration=step_duration,
|
|
1335
|
+
start_elapsed=step_end_time - epoch_start_time,
|
|
1336
|
+
seq_idx=last_seq_idx,
|
|
1337
|
+
num_seqs=num_seqs,
|
|
1338
|
+
batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
|
|
1304
1339
|
log_memory_usage_device=self._device if self._log_memory_usage else None,
|
|
1305
1340
|
)
|
|
1306
1341
|
step_idx += 1
|
|
@@ -1578,3 +1613,27 @@ def _get_total_grad_norm(model: torch.nn.Module, p: float) -> float:
|
|
|
1578
1613
|
p=p,
|
|
1579
1614
|
).item()
|
|
1580
1615
|
)
|
|
1616
|
+
|
|
1617
|
+
|
|
1618
|
+
def _get_num_seqs_last_seq_idx(
|
|
1619
|
+
*,
|
|
1620
|
+
report_prefix: str,
|
|
1621
|
+
extern_data_raw: Dict[str, Any],
|
|
1622
|
+
step_idx: int,
|
|
1623
|
+
prev_num_seqs: Optional[int],
|
|
1624
|
+
prev_last_seq_idx: int,
|
|
1625
|
+
) -> Tuple[Optional[int], int]:
|
|
1626
|
+
num_seqs = prev_num_seqs
|
|
1627
|
+
num_seqs_ = int(extern_data_raw["num_seqs"]) if extern_data_raw.get("num_seqs", None) is not None else -1
|
|
1628
|
+
# Note: The batches might have been shuffled,
|
|
1629
|
+
# thus we cannot really assert that the seq_idx is always increasing.
|
|
1630
|
+
last_seq_idx = max(int(extern_data_raw["seq_idx"].max()), prev_last_seq_idx)
|
|
1631
|
+
if step_idx == 0:
|
|
1632
|
+
if num_seqs_ >= 0:
|
|
1633
|
+
print(f"{report_prefix} num_seqs: {num_seqs_}", file=log.v5)
|
|
1634
|
+
num_seqs = num_seqs_
|
|
1635
|
+
elif num_seqs_ >= 0:
|
|
1636
|
+
assert num_seqs_ == num_seqs
|
|
1637
|
+
if num_seqs is not None:
|
|
1638
|
+
assert last_seq_idx < num_seqs
|
|
1639
|
+
return num_seqs, last_seq_idx
|
|
@@ -52,6 +52,7 @@ def debug_inf_nan(
|
|
|
52
52
|
*,
|
|
53
53
|
with_grad: bool = False,
|
|
54
54
|
report_every_op_call: bool = True,
|
|
55
|
+
stop_reporting_after_first_inf_nan: bool = True,
|
|
55
56
|
file: Optional[Union[TextIO, TextIOBase]] = None,
|
|
56
57
|
):
|
|
57
58
|
"""
|
|
@@ -61,6 +62,7 @@ def debug_inf_nan(
|
|
|
61
62
|
and we will call `loss = func(); loss.backward()`.
|
|
62
63
|
:param with_grad: whether to compute and debug gradients for inf/nan.
|
|
63
64
|
:param report_every_op_call: whether to report every op call.
|
|
65
|
+
:param stop_reporting_after_first_inf_nan: whether to stop reporting after the first inf/nan.
|
|
64
66
|
:param file: where to write the output to. Default is stdout.
|
|
65
67
|
"""
|
|
66
68
|
|
|
@@ -69,13 +71,18 @@ def debug_inf_nan(
|
|
|
69
71
|
|
|
70
72
|
# noinspection PyUnresolvedReferences,PyProtectedMember
|
|
71
73
|
cur_frame: FrameType = sys._getframe()
|
|
72
|
-
trace_ops = _TraceOps(
|
|
74
|
+
trace_ops = _TraceOps(
|
|
75
|
+
root_frame=cur_frame,
|
|
76
|
+
file=file,
|
|
77
|
+
report_every_op_call=report_every_op_call,
|
|
78
|
+
stop_reporting_after_first_inf_nan=stop_reporting_after_first_inf_nan,
|
|
79
|
+
)
|
|
73
80
|
|
|
74
81
|
if with_grad:
|
|
75
|
-
|
|
76
82
|
with torch.autograd.detect_anomaly():
|
|
77
83
|
with trace_ops: # currently only for forward (but we might want to trace the backward too)
|
|
78
84
|
loss = func()
|
|
85
|
+
file.flush() # the backward detect_anomaly might screw up the output otherwise
|
|
79
86
|
try:
|
|
80
87
|
loss.backward()
|
|
81
88
|
except RuntimeError as exc:
|
|
@@ -89,23 +96,46 @@ def debug_inf_nan(
|
|
|
89
96
|
|
|
90
97
|
# For efficiency, and to be less spammy
|
|
91
98
|
_TraceFuncNameBlacklist = {
|
|
92
|
-
"aten::detach",
|
|
93
99
|
"aten::zeros_like",
|
|
94
100
|
"aten::ones_like",
|
|
101
|
+
"aten::full",
|
|
102
|
+
"aten::scalar_tensor", # when we deliberately create a scalar inf tensor
|
|
103
|
+
"aten::_local_scalar_dense",
|
|
104
|
+
"aten::where.self", # when we intentionally mask with inf
|
|
105
|
+
"aten::detach",
|
|
106
|
+
"aten::_to_copy",
|
|
107
|
+
"aten::clone",
|
|
108
|
+
"aten::stack",
|
|
109
|
+
"aten::view",
|
|
110
|
+
"aten::_unsafe_view",
|
|
111
|
+
"aten::permute",
|
|
112
|
+
"aten::t",
|
|
113
|
+
"aten::split_with_sizes",
|
|
114
|
+
"aten::slice.Tensor",
|
|
115
|
+
"aten::select.int",
|
|
95
116
|
}
|
|
96
117
|
|
|
97
118
|
|
|
98
119
|
class _TraceOps(TorchDispatchMode):
|
|
99
|
-
def __init__(
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
*,
|
|
123
|
+
root_frame: FrameType,
|
|
124
|
+
file: Union[TextIO, TextIOBase],
|
|
125
|
+
report_every_op_call: bool = True,
|
|
126
|
+
stop_reporting_after_first_inf_nan: bool = True,
|
|
127
|
+
):
|
|
100
128
|
super().__init__()
|
|
101
129
|
self.root_frame = root_frame
|
|
102
130
|
self.file = file
|
|
131
|
+
self.enabled = True
|
|
103
132
|
self.report_every_op_call = report_every_op_call
|
|
133
|
+
self.stop_reporting_after_first_inf_nan = stop_reporting_after_first_inf_nan
|
|
104
134
|
|
|
105
135
|
def __torch_dispatch__(self, func, types, args=(), kwargs=None):
|
|
106
136
|
if kwargs is None:
|
|
107
137
|
kwargs = {}
|
|
108
|
-
if func.name() in _TraceFuncNameBlacklist:
|
|
138
|
+
if not self.enabled or func.name() in _TraceFuncNameBlacklist:
|
|
109
139
|
return func(*args, **kwargs)
|
|
110
140
|
if self.report_every_op_call:
|
|
111
141
|
print(f"--- op {func.name()}", file=self.file)
|
|
@@ -121,6 +151,8 @@ class _TraceOps(TorchDispatchMode):
|
|
|
121
151
|
traceback.print_list(
|
|
122
152
|
_extract_stack_up_to(skip_top_num_frames=1, root_frame=self.root_frame), file=self.file
|
|
123
153
|
)
|
|
154
|
+
if self.stop_reporting_after_first_inf_nan:
|
|
155
|
+
self.enabled = False
|
|
124
156
|
return out
|
|
125
157
|
|
|
126
158
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
returnn/PKG-INFO,sha256=
|
|
1
|
+
returnn/PKG-INFO,sha256=BbQPkoVha1AYEcED8txzZyyyDiJt3J29FBKlYy1rTYc,5215
|
|
2
2
|
returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
|
|
3
3
|
returnn/__main__.py,sha256=qBFbuB1yN3adgVM5pXt2-Yq9vorjRNchNPL8kDKx44M,31752
|
|
4
4
|
returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
|
|
5
5
|
returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
|
|
6
|
-
returnn/_setup_info_generated.py,sha256=
|
|
6
|
+
returnn/_setup_info_generated.py,sha256=9T1yfQUP7ASjffpzcwvOLEGNWMdiwS4EmjqY_bI2EdY,77
|
|
7
7
|
returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
|
|
8
8
|
returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
|
|
9
9
|
returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
|
|
@@ -141,7 +141,7 @@ returnn/frontend/encoder/base.py,sha256=A759EwCYAmSi-kzXz1vaTjR2l59TvNGQlzaNdp3U
|
|
|
141
141
|
returnn/frontend/encoder/conformer.py,sha256=YPtH0Clx2QrKOoxbtUSkYR7QiDp-EYmoOcGc_gc2ZEk,19778
|
|
142
142
|
returnn/frontend/encoder/conformer_v2.py,sha256=vAYdT8m2Zzg3IIZZafeccClFHU1_c9T-EgBOsHadQPA,7701
|
|
143
143
|
returnn/frontend/encoder/e_branchformer.py,sha256=zEla-iXJciK7bCenlTwsPB8dXo_VPMlFm2xc3op_lPY,12278
|
|
144
|
-
returnn/frontend/encoder/transformer.py,sha256=
|
|
144
|
+
returnn/frontend/encoder/transformer.py,sha256=Jj0mF1D2MohOk-9sGYdsLtVW_86fwoq4pKWCdPMvPR8,11580
|
|
145
145
|
returnn/import_/__init__.py,sha256=L2dKxWCcn0fz_7H7OS-zw5i5Yrljjjh_d61dEcFP_JY,243
|
|
146
146
|
returnn/import_/common.py,sha256=0cmvyd7NtMLH55IskEoSDtkcMwChxLhauV2UZ4mK68I,8148
|
|
147
147
|
returnn/import_/git.py,sha256=IXBVOybQAHf5OlMfVY6oZ-7eiDYPG0OR7MyDJKcVHSM,13961
|
|
@@ -162,7 +162,7 @@ returnn/tensor/control_flow_ctx.py,sha256=L9e32AfYDUDgsEDHL07thSFyYFqwhyVSqzE_bM
|
|
|
162
162
|
returnn/tensor/dim.py,sha256=652DlcSe6o6l5OyY5xt9Yigij_Xry-ToG9AemMX3roY,4208
|
|
163
163
|
returnn/tensor/marked_dim.py,sha256=Ae2hQIb5QixRU2gDhQEm0tmYt8TmomWoGERB414jR8o,1884
|
|
164
164
|
returnn/tensor/tensor.py,sha256=bisF7j3rU5Rvx8C8S57C9hGo2jgWwTaQ6wc_Db7Mwpw,9087
|
|
165
|
-
returnn/tensor/tensor_dict.py,sha256=
|
|
165
|
+
returnn/tensor/tensor_dict.py,sha256=WTqMefemeHQG381MVUjvHMmYVd2TV9IQ0qU4i_XJi3c,7146
|
|
166
166
|
returnn/tensor/utils.py,sha256=B6_XyNTXPIyLxWk061Qo-Md8_DnINGdVwpXJF6pahBk,9772
|
|
167
167
|
returnn/tf/__init__.py,sha256=X4g2LFCFTl0uiybMRkfBY8AYkgMa6HX0vVxxTk0nMiE,88
|
|
168
168
|
returnn/tf/compat.py,sha256=NkAkdlR37m2d9qh3i33sIfEGilOaFBeCofAQpQwnZpY,1632
|
|
@@ -207,7 +207,7 @@ returnn/tf/util/open_fst.py,sha256=sZRDw4TbxvhGqpGdUJWy1ebvlZm4_RPhygpRw9uLAOQ,1
|
|
|
207
207
|
returnn/torch/README.md,sha256=jzJ2FpOHW02vxN69yKaV97C9LI-hmvjBglKfdZXIDdc,85
|
|
208
208
|
returnn/torch/__init__.py,sha256=MHEUyNHB20Vy89uKAqZoj6FxJKF1Gq3HW-i6ra1pNcI,24
|
|
209
209
|
returnn/torch/distributed.py,sha256=i13cUVjI7GxpO0TAresrNyCM0ZBAaf-cXNr09Fmg_2k,6266
|
|
210
|
-
returnn/torch/engine.py,sha256=
|
|
210
|
+
returnn/torch/engine.py,sha256=neM-AL7XQLpZ3V1K4ziqVmij19ey1k2MpLCaFXATOpg,76301
|
|
211
211
|
returnn/torch/updater.py,sha256=GqtBvZpElPVMm0lq84JPl4NVLFFETZAzAbR0rTomSao,28249
|
|
212
212
|
returnn/torch/data/__init__.py,sha256=6cLNEi8KoGI12PF6akN7mI_mtjlx-0hcQAfMYoExwik,132
|
|
213
213
|
returnn/torch/data/extern_data.py,sha256=_uT_9_gd5HIh1IoRsrebVG-nufSnb7fgC5jyU05GxJg,7580
|
|
@@ -226,7 +226,7 @@ returnn/torch/optim/lion.py,sha256=jV_qfwyyO5HAgqW94caap-ALkVjU688RpRgkZyLNZ5Y,5
|
|
|
226
226
|
returnn/torch/util/README.md,sha256=AW-6ueWhgcwDcm57md6sm227QXNkvLnlRLwaH7NlS-w,193
|
|
227
227
|
returnn/torch/util/__init__.py,sha256=AOXYUjzPm0XrzFJCPAXo9Jj_FvqD1XH3FfKtho80Vl8,26
|
|
228
228
|
returnn/torch/util/array_.py,sha256=ell3VZvn01SLtF9Pw2fvPzFNO-XDQ7tSB9VCrVSKmSA,2556
|
|
229
|
-
returnn/torch/util/debug_inf_nan.py,sha256=
|
|
229
|
+
returnn/torch/util/debug_inf_nan.py,sha256=v0IzLy4kRKBWChSV70O4x829QtEuXMwB9mBqAyE4O2o,6223
|
|
230
230
|
returnn/torch/util/diagnose_gpu.py,sha256=PYMmSk7iQ-jC3RXKKNXlYx1Q744C0LXqz0SB6ympwQg,5844
|
|
231
231
|
returnn/torch/util/exception_helper.py,sha256=4e7YEf9D42aAUEkM3uSjnOxpNEYgtyPSpNV0-1L6PSU,4319
|
|
232
232
|
returnn/torch/util/gradient_checkpoint.py,sha256=iLy-FB65DC8O6LxzmMvFjnSdpIVpko87ppIvRKAbtpQ,27995
|
|
@@ -253,8 +253,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
|
|
|
253
253
|
returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
|
|
254
254
|
returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
|
|
255
255
|
returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
|
|
256
|
-
returnn-1.
|
|
257
|
-
returnn-1.
|
|
258
|
-
returnn-1.
|
|
259
|
-
returnn-1.
|
|
260
|
-
returnn-1.
|
|
256
|
+
returnn-1.20250206.151011.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
|
|
257
|
+
returnn-1.20250206.151011.dist-info/METADATA,sha256=BbQPkoVha1AYEcED8txzZyyyDiJt3J29FBKlYy1rTYc,5215
|
|
258
|
+
returnn-1.20250206.151011.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
|
259
|
+
returnn-1.20250206.151011.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
|
|
260
|
+
returnn-1.20250206.151011.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|