returnn 1.20251013.3246__py3-none-any.whl → 1.20251013.131953__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- returnn/PKG-INFO +1 -1
- returnn/_setup_info_generated.py +2 -2
- returnn/torch/engine.py +31 -0
- {returnn-1.20251013.3246.dist-info → returnn-1.20251013.131953.dist-info}/METADATA +1 -1
- {returnn-1.20251013.3246.dist-info → returnn-1.20251013.131953.dist-info}/RECORD +8 -8
- {returnn-1.20251013.3246.dist-info → returnn-1.20251013.131953.dist-info}/LICENSE +0 -0
- {returnn-1.20251013.3246.dist-info → returnn-1.20251013.131953.dist-info}/WHEEL +0 -0
- {returnn-1.20251013.3246.dist-info → returnn-1.20251013.131953.dist-info}/top_level.txt +0 -0
returnn/PKG-INFO
CHANGED
returnn/_setup_info_generated.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
version = '1.20251013.
|
|
2
|
-
long_version = '1.20251013.
|
|
1
|
+
version = '1.20251013.131953'
|
|
2
|
+
long_version = '1.20251013.131953+git.e1762d8'
|
returnn/torch/engine.py
CHANGED
|
@@ -134,6 +134,14 @@ class Engine(EngineBase):
|
|
|
134
134
|
self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
|
|
135
135
|
self._stop_on_nonfinite_train_score = config.bool("stop_on_nonfinite_train_score", True)
|
|
136
136
|
|
|
137
|
+
if config.bool("use_tensorboard", False):
|
|
138
|
+
from torch.utils.tensorboard import SummaryWriter
|
|
139
|
+
|
|
140
|
+
self._tensorboard_writer = SummaryWriter()
|
|
141
|
+
self._tensorboard_opts = config.typed_value("tensorboard_opts", {})
|
|
142
|
+
else:
|
|
143
|
+
self._tensorboard_writer = None
|
|
144
|
+
|
|
137
145
|
default_float_dtype = config.value("default_float_dtype", None)
|
|
138
146
|
if default_float_dtype is not None:
|
|
139
147
|
assert isinstance(default_float_dtype, str)
|
|
@@ -257,6 +265,9 @@ class Engine(EngineBase):
|
|
|
257
265
|
self.init_train_epoch()
|
|
258
266
|
self.train_epoch()
|
|
259
267
|
|
|
268
|
+
if self._tensorboard_writer:
|
|
269
|
+
self._tensorboard_writer.close()
|
|
270
|
+
|
|
260
271
|
print(f"Finished training at epoch {self.epoch}, global train step {self.global_train_step}", file=log.v3)
|
|
261
272
|
|
|
262
273
|
def init_train_epoch(self):
|
|
@@ -513,6 +524,18 @@ class Engine(EngineBase):
|
|
|
513
524
|
batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
|
|
514
525
|
log_memory_usage_device=self._device if self._log_memory_usage else None,
|
|
515
526
|
)
|
|
527
|
+
if (
|
|
528
|
+
self._tensorboard_writer
|
|
529
|
+
and self.global_train_step % self._tensorboard_opts.get("log_every_n_train_steps", 100) == 0
|
|
530
|
+
):
|
|
531
|
+
# write losses/errors to tensorboard
|
|
532
|
+
for key, val in eval_info.items():
|
|
533
|
+
self._tensorboard_writer.add_scalar(f"train/{key}", val, global_step=self.global_train_step)
|
|
534
|
+
self._tensorboard_writer.add_scalar(
|
|
535
|
+
f"train/learning_rate",
|
|
536
|
+
self._updater.get_effective_learning_rate(),
|
|
537
|
+
global_step=self.global_train_step,
|
|
538
|
+
)
|
|
516
539
|
|
|
517
540
|
if self._stop_on_nonfinite_train_score:
|
|
518
541
|
if any(np.isinf(v) or np.isnan(v) for v in accumulated_losses_dict.values()):
|
|
@@ -702,12 +725,20 @@ class Engine(EngineBase):
|
|
|
702
725
|
start_elapsed=step_end_time - eval_start_time,
|
|
703
726
|
log_memory_usage_device=self._device if self._log_memory_usage else None,
|
|
704
727
|
)
|
|
728
|
+
|
|
705
729
|
step_idx += 1
|
|
706
730
|
|
|
707
731
|
assert step_idx > 0, f"No data in dataset {dataset_name!r}."
|
|
708
732
|
accumulated_losses_dict = accumulated_losses_dict / accumulated_inv_norm_factors_dict
|
|
709
733
|
accumulated_losses_dict = self._maybe_extend_losses_info(accumulated_losses_dict)
|
|
710
734
|
|
|
735
|
+
if self._tensorboard_writer:
|
|
736
|
+
# write losses/errors to tensorboard
|
|
737
|
+
for key, val in accumulated_losses_dict.items():
|
|
738
|
+
self._tensorboard_writer.add_scalar(
|
|
739
|
+
f"{dataset_name}/{key}", val, global_step=self.global_train_step
|
|
740
|
+
)
|
|
741
|
+
|
|
711
742
|
self.learning_rate_control.set_epoch_error(
|
|
712
743
|
self.epoch, {f"{dataset_name}_loss_{k}": v for k, v in accumulated_losses_dict.items()}
|
|
713
744
|
)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
returnn/PKG-INFO,sha256=
|
|
1
|
+
returnn/PKG-INFO,sha256=gol7200kSqRcGRCYmHX_Vfg0o3XeXoixOD4si9dZQpM,5215
|
|
2
2
|
returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
|
|
3
3
|
returnn/__main__.py,sha256=lHyZcu_0yc9f7Vf_Kfdy9PmeU0T76XVXnpalHi5WKro,31740
|
|
4
4
|
returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
|
|
5
5
|
returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
|
|
6
|
-
returnn/_setup_info_generated.py,sha256=
|
|
6
|
+
returnn/_setup_info_generated.py,sha256=mhKUcDNacm00cBntcCP2-qZd0QlSy6jmbw7ILyfu7dQ,77
|
|
7
7
|
returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
|
|
8
8
|
returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
|
|
9
9
|
returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
|
|
@@ -208,7 +208,7 @@ returnn/tf/util/open_fst.py,sha256=sZRDw4TbxvhGqpGdUJWy1ebvlZm4_RPhygpRw9uLAOQ,1
|
|
|
208
208
|
returnn/torch/README.md,sha256=jzJ2FpOHW02vxN69yKaV97C9LI-hmvjBglKfdZXIDdc,85
|
|
209
209
|
returnn/torch/__init__.py,sha256=MHEUyNHB20Vy89uKAqZoj6FxJKF1Gq3HW-i6ra1pNcI,24
|
|
210
210
|
returnn/torch/distributed.py,sha256=_lyJR71HIoCHpMi5GztGM7YwrX54Am8zSkjnDkE1Lbk,7524
|
|
211
|
-
returnn/torch/engine.py,sha256=
|
|
211
|
+
returnn/torch/engine.py,sha256=Lj_Go3Q5oHOVrHdNcz_6YLjODsiGUsJVMbU0EzSDByk,79677
|
|
212
212
|
returnn/torch/updater.py,sha256=nNd1mBPQyvIB096BEFi0KKmRI-U3jnRETzb743p2B9c,32064
|
|
213
213
|
returnn/torch/data/__init__.py,sha256=6cLNEi8KoGI12PF6akN7mI_mtjlx-0hcQAfMYoExwik,132
|
|
214
214
|
returnn/torch/data/extern_data.py,sha256=5al706ZaYtHWLp5VH2vS-rW69YXP3NHyOFRKY0WY714,7810
|
|
@@ -254,8 +254,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
|
|
|
254
254
|
returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
|
|
255
255
|
returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
|
|
256
256
|
returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
|
|
257
|
-
returnn-1.20251013.
|
|
258
|
-
returnn-1.20251013.
|
|
259
|
-
returnn-1.20251013.
|
|
260
|
-
returnn-1.20251013.
|
|
261
|
-
returnn-1.20251013.
|
|
257
|
+
returnn-1.20251013.131953.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
|
|
258
|
+
returnn-1.20251013.131953.dist-info/METADATA,sha256=gol7200kSqRcGRCYmHX_Vfg0o3XeXoixOD4si9dZQpM,5215
|
|
259
|
+
returnn-1.20251013.131953.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
260
|
+
returnn-1.20251013.131953.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
|
|
261
|
+
returnn-1.20251013.131953.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|