returnn 1.20251013.113026__py3-none-any.whl → 1.20251013.131953__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

returnn/PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20251013.113026
3
+ Version: 1.20251013.131953
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,2 +1,2 @@
1
- version = '1.20251013.113026'
2
- long_version = '1.20251013.113026+git.dbc7e8e'
1
+ version = '1.20251013.131953'
2
+ long_version = '1.20251013.131953+git.e1762d8'
returnn/torch/engine.py CHANGED
@@ -134,6 +134,14 @@ class Engine(EngineBase):
134
134
  self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
135
135
  self._stop_on_nonfinite_train_score = config.bool("stop_on_nonfinite_train_score", True)
136
136
 
137
+ if config.bool("use_tensorboard", False):
138
+ from torch.utils.tensorboard import SummaryWriter
139
+
140
+ self._tensorboard_writer = SummaryWriter()
141
+ self._tensorboard_opts = config.typed_value("tensorboard_opts", {})
142
+ else:
143
+ self._tensorboard_writer = None
144
+
137
145
  default_float_dtype = config.value("default_float_dtype", None)
138
146
  if default_float_dtype is not None:
139
147
  assert isinstance(default_float_dtype, str)
@@ -257,6 +265,9 @@ class Engine(EngineBase):
257
265
  self.init_train_epoch()
258
266
  self.train_epoch()
259
267
 
268
+ if self._tensorboard_writer:
269
+ self._tensorboard_writer.close()
270
+
260
271
  print(f"Finished training at epoch {self.epoch}, global train step {self.global_train_step}", file=log.v3)
261
272
 
262
273
  def init_train_epoch(self):
@@ -513,6 +524,18 @@ class Engine(EngineBase):
513
524
  batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
514
525
  log_memory_usage_device=self._device if self._log_memory_usage else None,
515
526
  )
527
+ if (
528
+ self._tensorboard_writer
529
+ and self.global_train_step % self._tensorboard_opts.get("log_every_n_train_steps", 100) == 0
530
+ ):
531
+ # write losses/errors to tensorboard
532
+ for key, val in eval_info.items():
533
+ self._tensorboard_writer.add_scalar(f"train/{key}", val, global_step=self.global_train_step)
534
+ self._tensorboard_writer.add_scalar(
535
+ f"train/learning_rate",
536
+ self._updater.get_effective_learning_rate(),
537
+ global_step=self.global_train_step,
538
+ )
516
539
 
517
540
  if self._stop_on_nonfinite_train_score:
518
541
  if any(np.isinf(v) or np.isnan(v) for v in accumulated_losses_dict.values()):
@@ -702,12 +725,20 @@ class Engine(EngineBase):
702
725
  start_elapsed=step_end_time - eval_start_time,
703
726
  log_memory_usage_device=self._device if self._log_memory_usage else None,
704
727
  )
728
+
705
729
  step_idx += 1
706
730
 
707
731
  assert step_idx > 0, f"No data in dataset {dataset_name!r}."
708
732
  accumulated_losses_dict = accumulated_losses_dict / accumulated_inv_norm_factors_dict
709
733
  accumulated_losses_dict = self._maybe_extend_losses_info(accumulated_losses_dict)
710
734
 
735
+ if self._tensorboard_writer:
736
+ # write losses/errors to tensorboard
737
+ for key, val in accumulated_losses_dict.items():
738
+ self._tensorboard_writer.add_scalar(
739
+ f"{dataset_name}/{key}", val, global_step=self.global_train_step
740
+ )
741
+
711
742
  self.learning_rate_control.set_epoch_error(
712
743
  self.epoch, {f"{dataset_name}_loss_{k}": v for k, v in accumulated_losses_dict.items()}
713
744
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20251013.113026
3
+ Version: 1.20251013.131953
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -1,9 +1,9 @@
1
- returnn/PKG-INFO,sha256=21rH_SscXsPoNZbOjOnE6SMzkvwNNZWKTFzl8ZkxEmI,5215
1
+ returnn/PKG-INFO,sha256=gol7200kSqRcGRCYmHX_Vfg0o3XeXoixOD4si9dZQpM,5215
2
2
  returnn/__init__.py,sha256=biBtRsM0WZ406vShaeH-9WFoqJ8XwTbn6g0EeFJ7l8E,1012
3
3
  returnn/__main__.py,sha256=lHyZcu_0yc9f7Vf_Kfdy9PmeU0T76XVXnpalHi5WKro,31740
4
4
  returnn/__old_mod_loader__.py,sha256=nvsNY-xELdS_IPNkv66Q9Rmvg4dbGW0-EBRDcCmctos,7654
5
5
  returnn/__setup__.py,sha256=22kQn2fh11iPM0hLb2Fy5sLmoU1JGvmDxXRYuRgQkwU,4659
6
- returnn/_setup_info_generated.py,sha256=UBxhUB_ZDJrxS3j3Z2ZbXF6eKFLL9jhpqCApzvEmhr8,77
6
+ returnn/_setup_info_generated.py,sha256=mhKUcDNacm00cBntcCP2-qZd0QlSy6jmbw7ILyfu7dQ,77
7
7
  returnn/config.py,sha256=3tmKhB6FnQZaNdtcYsiB61JnEY--iZ2qmJ4yq0b6tE0,29140
8
8
  returnn/forward_iface.py,sha256=A_OJiaXsX4MlXQRzST86ylyxSUZbC402PQL1REcqHjM,911
9
9
  returnn/learning_rate_control.py,sha256=ZvWryAn_tv9DhV8sh1LV3eE34Yltl3On3mYZAG4hR9s,34684
@@ -208,7 +208,7 @@ returnn/tf/util/open_fst.py,sha256=sZRDw4TbxvhGqpGdUJWy1ebvlZm4_RPhygpRw9uLAOQ,1
208
208
  returnn/torch/README.md,sha256=jzJ2FpOHW02vxN69yKaV97C9LI-hmvjBglKfdZXIDdc,85
209
209
  returnn/torch/__init__.py,sha256=MHEUyNHB20Vy89uKAqZoj6FxJKF1Gq3HW-i6ra1pNcI,24
210
210
  returnn/torch/distributed.py,sha256=_lyJR71HIoCHpMi5GztGM7YwrX54Am8zSkjnDkE1Lbk,7524
211
- returnn/torch/engine.py,sha256=JSsQZZiVs9TxRyFEJuR3iH-YZb9sRw7TzoIAIqmplZY,78275
211
+ returnn/torch/engine.py,sha256=Lj_Go3Q5oHOVrHdNcz_6YLjODsiGUsJVMbU0EzSDByk,79677
212
212
  returnn/torch/updater.py,sha256=nNd1mBPQyvIB096BEFi0KKmRI-U3jnRETzb743p2B9c,32064
213
213
  returnn/torch/data/__init__.py,sha256=6cLNEi8KoGI12PF6akN7mI_mtjlx-0hcQAfMYoExwik,132
214
214
  returnn/torch/data/extern_data.py,sha256=5al706ZaYtHWLp5VH2vS-rW69YXP3NHyOFRKY0WY714,7810
@@ -254,8 +254,8 @@ returnn/util/sig_proc.py,sha256=Tjz0VOAVyqu2qDCF5HZ1JjALjcFsHcNkcd96WgZeKfE,7265
254
254
  returnn/util/task_system.py,sha256=y4sMVXQ25Qd2z0rx03uOlXlkE-jbCYC1Sjfn-XlraVU,26003
255
255
  returnn/util/train_proc_manager.py,sha256=Pjht28k6uz6BNQ47uW6Gf880iyq5q4wx7P_K2tmoAM8,3266
256
256
  returnn/util/watch_memory.py,sha256=BR5P2kvBN6UI81cE0_1WAA6Hd1SByLbBaiDxvLhPOew,4213
257
- returnn-1.20251013.113026.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
258
- returnn-1.20251013.113026.dist-info/METADATA,sha256=21rH_SscXsPoNZbOjOnE6SMzkvwNNZWKTFzl8ZkxEmI,5215
259
- returnn-1.20251013.113026.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
260
- returnn-1.20251013.113026.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
261
- returnn-1.20251013.113026.dist-info/RECORD,,
257
+ returnn-1.20251013.131953.dist-info/LICENSE,sha256=ywBD_U2aD4vpuoIgNAsjIGBYydl0tVKll3De0Z8s77c,11041
258
+ returnn-1.20251013.131953.dist-info/METADATA,sha256=gol7200kSqRcGRCYmHX_Vfg0o3XeXoixOD4si9dZQpM,5215
259
+ returnn-1.20251013.131953.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
260
+ returnn-1.20251013.131953.dist-info/top_level.txt,sha256=Lsn4WZc5Pbfk0-xDQOgnFCxOoqxL4CyeM3N1TFbJncw,8
261
+ returnn-1.20251013.131953.dist-info/RECORD,,