returnn 1.20241017.4429__tar.gz → 1.20241018.213651__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (465) hide show
  1. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/PKG-INFO +1 -1
  2. returnn-1.20241018.213651/_setup_info_generated.py +2 -0
  3. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/data/pipeline.py +4 -1
  4. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/data/returnn_dataset_wrapper.py +11 -0
  5. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/engine.py +60 -12
  6. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/updater.py +26 -4
  7. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn.egg-info/PKG-INFO +1 -1
  8. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_torch_engine.py +66 -2
  9. returnn-1.20241017.4429/_setup_info_generated.py +0 -2
  10. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/.editorconfig +0 -0
  11. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/.gitignore +0 -0
  12. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/.gitmodules +0 -0
  13. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/.kateconfig +0 -0
  14. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/CHANGELOG.md +0 -0
  15. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/CODEOWNERS +0 -0
  16. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/CONTRIBUTING.md +0 -0
  17. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/LICENSE +0 -0
  18. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/MANIFEST.in +0 -0
  19. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/README.rst +0 -0
  20. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/__init__.py +0 -0
  21. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/12AX.cluster_map +0 -0
  22. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/_setup_returnn_env.py +0 -0
  23. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-fwd.config +0 -0
  24. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-horovod-mpi.py +0 -0
  25. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-horovod-mpi.py.sh +0 -0
  26. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-horovod-mpi.sh +0 -0
  27. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-hyper-param-tuning.config +0 -0
  28. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-iter-dataset.py +0 -0
  29. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-list-devices.py +0 -0
  30. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-lua-torch-layer.config +0 -0
  31. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-pretrain.config +0 -0
  32. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-record-and-push-to-webserver.py +0 -0
  33. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-returnn-as-framework.py +0 -0
  34. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-rf-pt-benchmark.py +0 -0
  35. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-rf.config +0 -0
  36. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-rhn-enwik8.config +0 -0
  37. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-sprint-interface.py +0 -0
  38. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-att-copy.config +0 -0
  39. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-attention.config +0 -0
  40. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  41. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  42. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-enc-dec.config +0 -0
  43. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-hard-att-copy.config +0 -0
  44. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-lstm-benchmark.py +0 -0
  45. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  46. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  47. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-native-lstm.12ax.config +0 -0
  48. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  49. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  50. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  51. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  52. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  53. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-rec-self-att.config +0 -0
  54. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-search-compiled-graph.py +0 -0
  55. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  56. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-timit-lstm-ctc.config +0 -0
  57. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-torch.config +0 -0
  58. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  59. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/demo.sh +0 -0
  60. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  61. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  62. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  63. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/README.txt +0 -0
  64. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/chars.txt +0 -0
  65. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/config_demo +0 -0
  66. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/config_fwd +0 -0
  67. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/config_real +0 -0
  68. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  69. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/decode.py +0 -0
  70. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  71. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/go.sh +0 -0
  72. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/lines.txt +0 -0
  73. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/split/eval.txt +0 -0
  74. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/split/train.txt +0 -0
  75. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/IAM/split/valid.txt +0 -0
  76. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/README.md +0 -0
  77. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  78. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/artificial/forwardconfig +0 -0
  79. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/artificial/go.sh +0 -0
  80. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/artificial/trainconfig +0 -0
  81. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  82. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  83. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  84. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  85. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/pyproject.toml +0 -0
  86. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/requirements.txt +0 -0
  87. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/__init__.py +0 -0
  88. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/__main__.py +0 -0
  89. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/__old_mod_loader__.py +0 -0
  90. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/__setup__.py +0 -0
  91. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/config.py +0 -0
  92. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/__init__.py +0 -0
  93. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/audio.py +0 -0
  94. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/basic.py +0 -0
  95. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/bundle_file.py +0 -0
  96. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/cached.py +0 -0
  97. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/cached2.py +0 -0
  98. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/distrib_files.py +0 -0
  99. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/generating.py +0 -0
  100. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/hdf.py +0 -0
  101. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/lm.py +0 -0
  102. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/map.py +0 -0
  103. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/meta.py +0 -0
  104. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/multi_proc.py +0 -0
  105. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/normalization_data.py +0 -0
  106. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/numpy_dump.py +0 -0
  107. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/postprocessing.py +0 -0
  108. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/raw_wav.py +0 -0
  109. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/sprint.py +0 -0
  110. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/stereo.py +0 -0
  111. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/util/__init__.py +0 -0
  112. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/util/feature_extraction.py +0 -0
  113. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/util/strings.py +0 -0
  114. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/datasets/util/vocabulary.py +0 -0
  115. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/engine/__init__.py +0 -0
  116. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/engine/base.py +0 -0
  117. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/engine/batch.py +0 -0
  118. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/__init__.py +0 -0
  119. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/__main__.py +0 -0
  120. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  121. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  122. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  123. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  124. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  125. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  126. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  127. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  128. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  129. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  130. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  131. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  132. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  133. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  134. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  135. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  136. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  137. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  138. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  139. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  140. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  141. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  142. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  143. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  144. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  145. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/__init__.py +0 -0
  146. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/graph_editor/README.md +0 -0
  147. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/graph_editor/__init__.py +0 -0
  148. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/graph_editor/edit.py +0 -0
  149. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/graph_editor/reroute.py +0 -0
  150. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/graph_editor/select.py +0 -0
  151. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/graph_editor/subgraph.py +0 -0
  152. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/graph_editor/transform.py +0 -0
  153. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/extern/graph_editor/util.py +0 -0
  154. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/forward_iface.py +0 -0
  155. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/__init__.py +0 -0
  156. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_backend.py +0 -0
  157. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_native/__init__.py +0 -0
  158. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_native/backend.cpp +0 -0
  159. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_native/backend.hpp +0 -0
  160. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_native/module.cpp +0 -0
  161. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_native/module.hpp +0 -0
  162. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_native/py_utils.hpp +0 -0
  163. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  164. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  165. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_numpy_backend.py +0 -0
  166. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_random_journal.py +0 -0
  167. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/_utils.py +0 -0
  168. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/array_.py +0 -0
  169. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/attention.py +0 -0
  170. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/audio/__init__.py +0 -0
  171. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/audio/mel.py +0 -0
  172. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/audio/specaugment.py +0 -0
  173. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/backend.py +0 -0
  174. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/build_from_dict.py +0 -0
  175. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/cond.py +0 -0
  176. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/const.py +0 -0
  177. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/container.py +0 -0
  178. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/control_flow_ctx.py +0 -0
  179. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/conv.py +0 -0
  180. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/conversions/__init__.py +0 -0
  181. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  182. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/conversions/hf_llama.py +0 -0
  183. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/conversions/torch_nn.py +0 -0
  184. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/decoder/__init__.py +0 -0
  185. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/decoder/transformer.py +0 -0
  186. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/device.py +0 -0
  187. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/dims.py +0 -0
  188. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/dropout.py +0 -0
  189. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/dtype.py +0 -0
  190. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/encoder/__init__.py +0 -0
  191. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/encoder/base.py +0 -0
  192. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/encoder/conformer.py +0 -0
  193. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/encoder/e_branchformer.py +0 -0
  194. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/encoder/transformer.py +0 -0
  195. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/gradient.py +0 -0
  196. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/graph.py +0 -0
  197. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/hooks.py +0 -0
  198. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/init.py +0 -0
  199. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/label_smoothing.py +0 -0
  200. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/linear.py +0 -0
  201. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/loop.py +0 -0
  202. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/loss.py +0 -0
  203. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/math_.py +0 -0
  204. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/matmul.py +0 -0
  205. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/module.py +0 -0
  206. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/normalization.py +0 -0
  207. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/parameter.py +0 -0
  208. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/parametrizations.py +0 -0
  209. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/parametrize.py +0 -0
  210. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/piecewise_linear.py +0 -0
  211. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/rand.py +0 -0
  212. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/rec.py +0 -0
  213. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/reduce.py +0 -0
  214. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/run_ctx.py +0 -0
  215. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/signal.py +0 -0
  216. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/state.py +0 -0
  217. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/stepwise_scheduler.py +0 -0
  218. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/tensor_array.py +0 -0
  219. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/frontend/types.py +0 -0
  220. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/import_/__init__.py +0 -0
  221. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/import_/common.py +0 -0
  222. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/import_/git.py +0 -0
  223. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/import_/import_.py +0 -0
  224. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/learning_rate_control.py +0 -0
  225. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/log.py +0 -0
  226. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/native_op.cpp +0 -0
  227. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/native_op.py +0 -0
  228. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/pretrain.py +0 -0
  229. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/sprint/__init__.py +0 -0
  230. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/sprint/cache.py +0 -0
  231. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/sprint/control.py +0 -0
  232. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/sprint/error_signals.py +0 -0
  233. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/sprint/extern_interface.py +0 -0
  234. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/sprint/interface.py +0 -0
  235. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/README.md +0 -0
  236. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/__init__.py +0 -0
  237. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/_dim_extra.py +0 -0
  238. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/_tensor_extra.py +0 -0
  239. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/_tensor_mixin_base.py +0 -0
  240. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/_tensor_op_overloads.py +0 -0
  241. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/control_flow_ctx.py +0 -0
  242. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/dim.py +0 -0
  243. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/marked_dim.py +0 -0
  244. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/tensor.py +0 -0
  245. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/tensor_dict.py +0 -0
  246. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tensor/utils.py +0 -0
  247. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/__init__.py +0 -0
  248. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/compat.py +0 -0
  249. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/data_pipeline.py +0 -0
  250. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/distributed.py +0 -0
  251. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/engine.py +0 -0
  252. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/README.md +0 -0
  253. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/__init__.py +0 -0
  254. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/_backend.py +0 -0
  255. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/_utils.py +0 -0
  256. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/cond.py +0 -0
  257. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  258. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  259. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/dims.py +0 -0
  260. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/layer.py +0 -0
  261. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/loop.py +0 -0
  262. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/make_layer.py +0 -0
  263. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  264. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  265. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  266. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_low_level/__init__.py +0 -0
  267. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/frontend_low_level/_backend.py +0 -0
  268. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/horovod.py +0 -0
  269. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/hyper_param_tuning.py +0 -0
  270. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/layers/__init__.py +0 -0
  271. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/layers/base.py +0 -0
  272. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/layers/basic.py +0 -0
  273. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/layers/rec.py +0 -0
  274. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/layers/segmental_model.py +0 -0
  275. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/layers/signal_processing.py +0 -0
  276. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/layers/variable.py +0 -0
  277. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/native_op.py +0 -0
  278. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/network.py +0 -0
  279. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/sprint.py +0 -0
  280. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/updater.py +0 -0
  281. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/util/__init__.py +0 -0
  282. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/util/basic.py +0 -0
  283. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/util/data.py +0 -0
  284. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/util/gradient_checkpoint.py +0 -0
  285. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/util/ken_lm.py +0 -0
  286. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/tf/util/open_fst.py +0 -0
  287. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/README.md +0 -0
  288. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/__init__.py +0 -0
  289. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/data/__init__.py +0 -0
  290. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/data/extern_data.py +0 -0
  291. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/data/queued_data_iter.py +0 -0
  292. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/data/tensor_utils.py +0 -0
  293. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/distributed.py +0 -0
  294. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/frontend/__init__.py +0 -0
  295. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/frontend/_backend.py +0 -0
  296. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/frontend/_rand.py +0 -0
  297. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/frontend/bridge.py +0 -0
  298. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/frontend/raw_ops.py +0 -0
  299. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/util/README.md +0 -0
  300. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/util/__init__.py +0 -0
  301. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/util/array_.py +0 -0
  302. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/util/diagnose_gpu.py +0 -0
  303. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/util/exception_helper.py +0 -0
  304. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/util/gradient_checkpoint.py +0 -0
  305. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/util/module.py +0 -0
  306. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/torch/util/scaled_gradient.py +0 -0
  307. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/__init__.py +0 -0
  308. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/basic.py +0 -0
  309. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/better_exchook.py +0 -0
  310. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/bpe.py +0 -0
  311. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/debug.py +0 -0
  312. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/debug_helpers.py +0 -0
  313. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/file_cache.py +0 -0
  314. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/fsa.py +0 -0
  315. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/literal_py_to_pickle.py +0 -0
  316. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/math.py +0 -0
  317. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  318. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/native_code_compiler.py +0 -0
  319. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/pprint.py +0 -0
  320. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/py-to-pickle.cpp +0 -0
  321. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/py_compat.py +0 -0
  322. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/py_ext_mod_compiler.py +0 -0
  323. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/result_with_reason.py +0 -0
  324. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/sig_proc.py +0 -0
  325. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/task_system.py +0 -0
  326. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/train_proc_manager.py +0 -0
  327. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn/util/watch_memory.py +0 -0
  328. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn.egg-info/SOURCES.txt +0 -0
  329. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn.egg-info/dependency_links.txt +0 -0
  330. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/returnn.egg-info/top_level.txt +0 -0
  331. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/rnn.py +0 -0
  332. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/setup.cfg +0 -0
  333. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/setup.py +0 -0
  334. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/DummySprintExec.py +0 -0
  335. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm-inspection-profile.xml +0 -0
  336. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/.gitignore +0 -0
  337. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/.name +0 -0
  338. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  339. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  340. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  341. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  342. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  343. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/misc.xml +0 -0
  344. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/modules.xml +0 -0
  345. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/returnn.iml +0 -0
  346. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  347. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/_set_num_threads1.py +0 -0
  348. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/_setup_returnn_env.py +0 -0
  349. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/_setup_test_env.py +0 -0
  350. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/bpe-unicode-demo.codes +0 -0
  351. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/bpe-unicode-demo.vocab +0 -0
  352. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/lexicon_opt.fst +0 -0
  353. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/lexicon_opt.isyms +0 -0
  354. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/lexicon_opt.jpg +0 -0
  355. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/lexicon_opt.osyms +0 -0
  356. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/lint_common.py +0 -0
  357. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/pycharm-inspect.py +0 -0
  358. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/pylint.py +0 -0
  359. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/returnn-as-framework.py +0 -0
  360. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/rf_utils.py +0 -0
  361. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/spelling.dic +0 -0
  362. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_Config.py +0 -0
  363. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_Dataset.py +0 -0
  364. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_Fsa.py +0 -0
  365. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_GeneratingDataset.py +0 -0
  366. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_HDFDataset.py +0 -0
  367. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_LearningRateControl.py +0 -0
  368. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_Log.py +0 -0
  369. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_MultiProcDataset.py +0 -0
  370. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_Pretrain.py +0 -0
  371. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_ResNet.py +0 -0
  372. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_SprintDataset.py +0 -0
  373. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_SprintInterface.py +0 -0
  374. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TFEngine.py +0 -0
  375. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TFNativeOp.py +0 -0
  376. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TFNetworkLayer.py +0 -0
  377. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TFNetworkRecLayer.py +0 -0
  378. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TFNetworkSigProcLayer.py +0 -0
  379. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TFUpdater.py +0 -0
  380. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TFUtil.py +0 -0
  381. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TF_determinism.py +0 -0
  382. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TaskSystem.py +0 -0
  383. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TaskSystem_SharedMem.py +0 -0
  384. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_TranslationDataset.py +0 -0
  385. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_Util.py +0 -0
  386. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_demos.py +0 -0
  387. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_fork_exec.py +0 -0
  388. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_hdf_dump.py +0 -0
  389. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_array.py +0 -0
  390. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_attention.py +0 -0
  391. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_base.py +0 -0
  392. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_cond.py +0 -0
  393. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_const.py +0 -0
  394. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_container.py +0 -0
  395. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_conv.py +0 -0
  396. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_decoder_transformer.py +0 -0
  397. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_encoder_conformer.py +0 -0
  398. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_gradient.py +0 -0
  399. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_label_smoothing.py +0 -0
  400. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_loop.py +0 -0
  401. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_math.py +0 -0
  402. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_normalization.py +0 -0
  403. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_piecewise_linear.py +0 -0
  404. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_rec.py +0 -0
  405. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_reduce.py +0 -0
  406. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_rf_signal.py +0 -0
  407. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_tensor.py +0 -0
  408. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_tools.py +0 -0
  409. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_torch_dataset.py +0 -0
  410. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_torch_frontend.py +0 -0
  411. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_torch_internal_frontend.py +0 -0
  412. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/test_torch_util.py +0 -0
  413. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tests/torch_utils.py +0 -0
  414. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/_setup_returnn_env.py +0 -0
  415. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/analyze-dataset-batches.py +0 -0
  416. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/bliss-collect-seq-lens.py +0 -0
  417. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/bliss-dump-text.py +0 -0
  418. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/bliss-get-segment-names.py +0 -0
  419. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/bliss-to-ogg-zip.py +0 -0
  420. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/bpe-create-lexicon.py +0 -0
  421. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/calculate-word-error-rate.py +0 -0
  422. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/cleanup-old-models.py +0 -0
  423. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/collect-orth-symbols.py +0 -0
  424. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/collect-words.py +0 -0
  425. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/compile_native_op.py +0 -0
  426. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/compile_tf_graph.py +0 -0
  427. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/debug-dump-search-scores.py +0 -0
  428. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/debug-plot-search-scores.py +0 -0
  429. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/dump-dataset-raw-strings.py +0 -0
  430. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/dump-dataset.py +0 -0
  431. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/dump-forward-stats.py +0 -0
  432. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/dump-forward.py +0 -0
  433. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/dump-network-json.py +0 -0
  434. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/dump-pickle.py +0 -0
  435. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/extract_state_tying_from_dataset.py +0 -0
  436. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/get-attention-weights.py +0 -0
  437. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/get-best-model-epoch.py +0 -0
  438. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/hdf_dump.py +0 -0
  439. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/hdf_dump_translation_dataset.py +0 -0
  440. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/import-blocks-mt-model.py +0 -0
  441. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/import-t2t-mt-model.py +0 -0
  442. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/.gitignore +0 -0
  443. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/Makefile +0 -0
  444. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/README.md +0 -0
  445. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/example/README.md +0 -0
  446. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/example/libs_list +0 -0
  447. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  448. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  449. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  450. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/example/state_vars_list +0 -0
  451. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  452. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/file.h +0 -0
  453. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  454. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  455. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/main.cc +0 -0
  456. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/rescorer.h +0 -0
  457. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/vocabulary.cc +0 -0
  458. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/lattice_rescorer/vocabulary.h +0 -0
  459. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/tf_avg_checkpoints.py +0 -0
  460. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/tf_inspect_checkpoint.py +0 -0
  461. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/tf_inspect_summary_log.py +0 -0
  462. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/torch_avg_checkpoints.py +0 -0
  463. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/torch_export_to_onnx.py +0 -0
  464. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/torch_inspect_checkpoint.py +0 -0
  465. {returnn-1.20241017.4429 → returnn-1.20241018.213651}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241017.4429
3
+ Version: 1.20241018.213651
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20241018.213651'
2
+ long_version = '1.20241018.213651+git.2ac447f'
@@ -59,6 +59,9 @@ def collate_batch(batch: List[Dict[str, numpy.ndarray]]) -> Dict[str, Union[torc
59
59
 
60
60
  res = {}
61
61
  for key in data_keys:
62
+ if key == "num_seqs":
63
+ res[key] = batch[0][key] # it should always be the same
64
+ continue
62
65
  ls = [create_tensor(sample[key]) for sample in batch]
63
66
  if not ls:
64
67
  raise ValueError("batch is empty?")
@@ -116,7 +119,7 @@ class ChunkingIterDataPipe(torch.utils.data.IterDataPipe):
116
119
 
117
120
  if not chunking_data_keys:
118
121
  chunking_data_keys = list(data_dict.keys()) # use all if not configured separately
119
- chunking_data_key_black_list = ["seq_tag"]
122
+ chunking_data_key_black_list = ["seq_tag", "seq_idx", "num_seqs"]
120
123
  for key in chunking_data_key_black_list:
121
124
  if key in chunking_data_keys:
122
125
  chunking_data_keys.remove(key)
@@ -75,6 +75,13 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
75
75
  """
76
76
  :return: generator providing data samples in the form of a dict data_key -> data
77
77
  """
78
+ # noinspection PyBroadException
79
+ try:
80
+ num_seqs = self._dataset.num_seqs
81
+ except Exception: # might not work for all datasets
82
+ num_seqs = -1
83
+ num_seqs = numpy.array(num_seqs)
84
+
78
85
  try:
79
86
  data_keys = self._dataset.get_data_keys()
80
87
 
@@ -83,6 +90,10 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
83
90
  self._dataset.load_seqs(seq_index, seq_index + 1)
84
91
  data = {data_key: self._dataset.get_data(seq_index, data_key) for data_key in data_keys}
85
92
  data["seq_tag"] = str_to_numpy_array(self._dataset.get_tag(seq_index))
93
+ data["seq_idx"] = numpy.array(seq_index)
94
+ # It's slightly redundant to have num_seqs in each entry,
95
+ # but it's difficult to pass this back to the main proc otherwise.
96
+ data["num_seqs"] = num_seqs
86
97
  yield data
87
98
  seq_index += 1
88
99
 
@@ -252,7 +252,9 @@ class Engine(EngineBase):
252
252
 
253
253
  # Update learning rate
254
254
  self._updater.set_learning_rate(self.learning_rate)
255
- self._updater.set_current_train_step(global_train_step=self.global_train_step, epoch=self.epoch)
255
+ self._updater.set_current_train_step(
256
+ global_train_step=self.global_train_step, epoch=self.epoch, epoch_continuous=self.epoch - 1
257
+ )
256
258
 
257
259
  self.learning_rate_control.epoch_data[self.epoch].meta.update(
258
260
  {
@@ -311,7 +313,7 @@ class Engine(EngineBase):
311
313
  accumulated_losses_dict = NumbersDict()
312
314
  accumulated_inv_norm_factors_dict = NumbersDict()
313
315
  step_idx = 0
314
- epoch_start_time = time.time()
316
+ epoch_start_time = time.monotonic()
315
317
 
316
318
  data_iter = iter(self._train_dataloader)
317
319
  elapsed_computation_time = 0
@@ -339,12 +341,14 @@ class Engine(EngineBase):
339
341
  zero_grad_next_step = True
340
342
  cur_count_grad_accum = 0
341
343
  extern_data = None
344
+ num_seqs = None
345
+ last_seq_idx = 0
342
346
  try:
343
347
  while True:
344
348
  with torch.no_grad():
345
349
  extern_data_raw = next(data_iter, None)
346
350
 
347
- step_begin_time = time.time()
351
+ step_begin_time = time.monotonic()
348
352
 
349
353
  _has_data = torch.tensor([extern_data_raw is not None], dtype=torch.int8)
350
354
  if self._torch_distributed_ctx:
@@ -353,6 +357,22 @@ class Engine(EngineBase):
353
357
  torch.distributed.all_reduce(_has_data, op=torch.distributed.ReduceOp.MIN)
354
358
  if not _has_data[0]:
355
359
  break
360
+ num_seqs_ = (
361
+ int(extern_data_raw["num_seqs"]) if extern_data_raw.get("num_seqs", None) is not None else -1
362
+ )
363
+ last_seq_idx_ = extern_data_raw["seq_idx"].max()
364
+ assert last_seq_idx_ >= last_seq_idx
365
+ last_seq_idx = int(last_seq_idx_)
366
+ del last_seq_idx_
367
+ if step_idx == 0:
368
+ if num_seqs_ >= 0:
369
+ print(f"Epoch {self.epoch} num_seqs: {num_seqs_}", file=log.v5)
370
+ num_seqs = num_seqs_
371
+ elif num_seqs_ >= 0:
372
+ assert num_seqs_ == num_seqs
373
+ del num_seqs_
374
+ if num_seqs is not None:
375
+ assert last_seq_idx < num_seqs
356
376
 
357
377
  # clear the gradients when every gradient accumulation loop starts
358
378
  if zero_grad_next_step:
@@ -404,7 +424,8 @@ class Engine(EngineBase):
404
424
  if self._torch_distributed_ctx:
405
425
  self._torch_distributed_ctx.step_after_param_update(module=self._pt_model, epoch_step_idx=step_idx)
406
426
 
407
- step_duration = time.time() - step_begin_time
427
+ step_end_time = time.monotonic()
428
+ step_duration = step_end_time - step_begin_time
408
429
  elapsed_computation_time += step_duration
409
430
 
410
431
  accumulated_losses_dict += losses_dict
@@ -415,6 +436,9 @@ class Engine(EngineBase):
415
436
  step=step_idx,
416
437
  eval_info=dict(eval_info),
417
438
  step_duration=step_duration,
439
+ start_elapsed=step_end_time - epoch_start_time,
440
+ seq_idx=last_seq_idx,
441
+ num_seqs=num_seqs,
418
442
  batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
419
443
  log_memory_usage_device=self._device if self._log_memory_usage else None,
420
444
  )
@@ -431,12 +455,16 @@ class Engine(EngineBase):
431
455
 
432
456
  step_idx += 1
433
457
  self.global_train_step += 1
434
- self._updater.set_current_train_step(global_train_step=self.global_train_step, epoch=self.epoch)
458
+ self._updater.set_current_train_step(
459
+ global_train_step=self.global_train_step,
460
+ epoch=self.epoch,
461
+ epoch_continuous=(self.epoch - 1 + (last_seq_idx + 1) / num_seqs) if num_seqs is not None else None,
462
+ )
435
463
  except Exception as exc:
436
464
  help_on_torch_exception(exc, step_idx=step_idx, model=self._orig_model, extern_data=extern_data)
437
465
  raise
438
466
 
439
- elapsed = time.time() - epoch_start_time
467
+ elapsed = time.monotonic() - epoch_start_time
440
468
  elapsed_computation_percentage = elapsed_computation_time / elapsed
441
469
  print(
442
470
  "Trained %i steps, %s elapsed (%.1f%% computing time)"
@@ -1008,7 +1036,7 @@ class Engine(EngineBase):
1008
1036
  assert isinstance(dataset, Dataset)
1009
1037
  assert isinstance(callback, ForwardCallbackIface)
1010
1038
 
1011
- epoch_start_time = time.time()
1039
+ epoch_start_time = time.monotonic()
1012
1040
  elapsed_computation_time = 0.0
1013
1041
 
1014
1042
  self._pt_model.eval()
@@ -1087,7 +1115,7 @@ class Engine(EngineBase):
1087
1115
 
1088
1116
  step_idx = 0
1089
1117
  for extern_data_raw in data_loader:
1090
- step_begin_time = time.time()
1118
+ step_begin_time = time.monotonic()
1091
1119
  if self._forward_step_expected_outputs:
1092
1120
  # Also resets any dyn dims, which might have been set in the prev step.
1093
1121
  self._forward_step_expected_outputs.reset_content()
@@ -1121,7 +1149,7 @@ class Engine(EngineBase):
1121
1149
  model_outputs_per_batch.data[k] = _get_tensor_wo_batch_numpy(v)
1122
1150
  callback.process_seq(seq_tag=seq_tag, outputs=model_outputs_per_batch)
1123
1151
 
1124
- elapsed_computation_time += time.time() - step_begin_time
1152
+ elapsed_computation_time += time.monotonic() - step_begin_time
1125
1153
  _print_process(
1126
1154
  report_prefix,
1127
1155
  step=step_idx,
@@ -1132,7 +1160,7 @@ class Engine(EngineBase):
1132
1160
 
1133
1161
  callback.finish()
1134
1162
 
1135
- elapsed = time.time() - epoch_start_time
1163
+ elapsed = time.monotonic() - epoch_start_time
1136
1164
  elapsed_computation_percentage = elapsed_computation_time / elapsed
1137
1165
  print(
1138
1166
  "Forward %i steps, %s elapsed (%.1f%% computing time)"
@@ -1202,20 +1230,26 @@ def _to_raw(n: Union[int, float, Tensor]):
1202
1230
 
1203
1231
  def _print_process(
1204
1232
  report_prefix: str,
1233
+ *,
1205
1234
  step: int,
1206
1235
  eval_info: Optional[Dict[str, Any]] = None,
1207
1236
  batch_size_info: Optional[Dict[str, Any]] = None,
1208
1237
  step_duration: Optional[float] = None,
1238
+ start_elapsed: Optional[float] = None,
1239
+ seq_idx: Optional[int] = None,
1240
+ num_seqs: Optional[int] = None,
1209
1241
  log_memory_usage_device: Optional[str] = None,
1210
1242
  ):
1211
1243
  """
1212
1244
  Similar but simplified from TF engine _print_process.
1213
1245
 
1214
1246
  :param report_prefix:
1215
- :param step:
1247
+ :param step: for this epoch
1216
1248
  :param eval_info:
1217
1249
  :param batch_size_info:
1218
- :param step_duration:
1250
+ :param step_duration: time elapsed for this step (secs)
1251
+ :param start_elapsed: time elapsed since epoch start (secs)
1252
+ :param num_seqs: total number of sequences for this epoch
1219
1253
  :param log_memory_usage_device: if given, will log memory usage (peak allocated memory)
1220
1254
  :return: nothing, will be printed to log
1221
1255
  """
@@ -1233,6 +1267,20 @@ def _print_process(
1233
1267
  ]
1234
1268
  if step_duration is not None:
1235
1269
  info += ["%.3f sec/step" % step_duration]
1270
+ if start_elapsed is not None:
1271
+ info += ["elapsed %s" % hms(start_elapsed)]
1272
+ if num_seqs is not None:
1273
+ assert seq_idx is not None and start_elapsed is not None # unexpected combination...
1274
+ complete = (seq_idx + 1) / num_seqs
1275
+ assert 1 >= complete > 0, f"{step} step, {num_seqs} num_seqs"
1276
+ total_time_estimated = start_elapsed / complete
1277
+ remaining_estimated = total_time_estimated - start_elapsed
1278
+ info += [
1279
+ "exp. remaining %s" % hms(remaining_estimated),
1280
+ "complete %.02f%%" % (complete * 100),
1281
+ ]
1282
+ if start_elapsed is not None and num_seqs is None:
1283
+ info += ["(unk epoch len)"]
1236
1284
  print(", ".join(filter(None, info)), file=log.v5)
1237
1285
 
1238
1286
 
@@ -13,7 +13,7 @@ import typing
13
13
 
14
14
  import returnn
15
15
  from returnn.log import log
16
- from returnn.util.basic import RefIdEq
16
+ from returnn.util.basic import RefIdEq, get_fwd_compat_kwargs
17
17
  import returnn.frontend as rf
18
18
  from returnn.torch.frontend.bridge import wrapped_pt_module_to_rf_module
19
19
 
@@ -96,8 +96,11 @@ class Updater:
96
96
  self._effective_learning_rate = self.learning_rate
97
97
  self.network = network
98
98
  self._device = device
99
+ # Just set the very first step as initial values here.
100
+ # They will be overwritten via set_current_train_step() below.
99
101
  self._current_train_step = 0
100
- self._current_epoch = 0
102
+ self._current_epoch = 1
103
+ self._current_epoch_continuous = 0.0
101
104
 
102
105
  self.learning_rate_function = self.config.typed_value("dynamic_learning_rate", None)
103
106
  if self.learning_rate_function is not None:
@@ -163,19 +166,38 @@ class Updater:
163
166
  self._effective_learning_rate = self.learning_rate
164
167
  if self.learning_rate_function is not None:
165
168
  lr = self.learning_rate_function(
166
- global_train_step=self._current_train_step, epoch=self._current_epoch, learning_rate=self.learning_rate
169
+ global_train_step=self._current_train_step,
170
+ epoch=self._current_epoch,
171
+ epoch_continuous=self._current_epoch_continuous,
172
+ learning_rate=self.learning_rate,
173
+ **get_fwd_compat_kwargs(),
167
174
  )
168
175
  self._effective_learning_rate = float(lr)
169
176
  if self.optimizer:
170
177
  for param_group in self.optimizer.param_groups:
171
178
  param_group["lr"] = self._effective_learning_rate
172
179
 
173
- def set_current_train_step(self, *, global_train_step: int, epoch: int):
180
+ def set_current_train_step(self, *, global_train_step: int, epoch: int, epoch_continuous: Optional[float] = None):
174
181
  """
175
182
  Obtains an updated learning rate for the current training step inside a (sub)epoch.
183
+
184
+ :param global_train_step: Current global training step over the whole training process.
185
+ In the first epoch, this starts at 0.
186
+ :param epoch: Current epoch. (First epoch is 1 by RETURNN convention.)
187
+ :param epoch_continuous: How much of the epoch is finished.
188
+ In the first step of the first epoch, this starts at 0.0,
189
+ and when the fist epoch is finished, this reaches 1.0,
190
+ and the values in between are the fraction of the epoch that is finished.
191
+ The second epoch (epoch=2) starts at 1.0,
192
+ and when the second epoch is finished, this reaches 2.0, and so on.
193
+ We usually calculate this based on ``epoch-1+(last_seq_idx+1)/num_seqs``,
194
+ if the dataset can provide ``num_seqs``.
195
+ Other schemes based on the step_idx might be used as well to calculate this,
196
+ if the number of steps per epoch is known in advance.
176
197
  """
177
198
  self._current_train_step = global_train_step
178
199
  self._current_epoch = epoch
200
+ self._current_epoch_continuous = epoch_continuous
179
201
  self._update_effective_learning_rate()
180
202
 
181
203
  def step(self, *, grad_scaler: Optional[torch.cuda.amp.GradScaler] = None):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241017.4429
3
+ Version: 1.20241018.213651
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -4,6 +4,7 @@ Tests for PyTorch engine.
4
4
 
5
5
  from __future__ import annotations
6
6
  import _setup_test_env # noqa
7
+ from typing import Optional
7
8
  import sys
8
9
  import unittest
9
10
  import tempfile
@@ -294,7 +295,6 @@ def test_forward_beam_seq_lens():
294
295
 
295
296
 
296
297
  def test_min_seq_len():
297
-
298
298
  from returnn.datasets.generating import DummyDataset
299
299
 
300
300
  config = Config({"min_seq_length": 2, "batch_size": 3})
@@ -318,7 +318,6 @@ def test_min_seq_len():
318
318
 
319
319
 
320
320
  def test_max_seq_len():
321
-
322
321
  from returnn.datasets.generating import DummyDataset
323
322
 
324
323
  config = Config({"max_seq_length": 4, "batch_size": 3})
@@ -520,6 +519,71 @@ def test_torch_engine_train_exception():
520
519
  raise Exception("did not get expected exception")
521
520
 
522
521
 
522
+ def test_dynamic_learning_rate():
523
+ num_epochs = 3
524
+ last_global_train_step: Optional[float] = None
525
+ last_epoch_continuous: Optional[float] = None
526
+ epoch_continuous_diffs = []
527
+
528
+ def _dynamic_learning_rate(
529
+ *, global_train_step: int, epoch: int, epoch_continuous: float, learning_rate: float, **_kwargs
530
+ ) -> float:
531
+ nonlocal last_global_train_step, last_epoch_continuous
532
+ assert isinstance(global_train_step, int)
533
+ assert isinstance(epoch, int)
534
+ assert isinstance(epoch_continuous, (int, float))
535
+ assert isinstance(learning_rate, (int, float))
536
+ print(f"global_train_step: {global_train_step}, epoch: {epoch}, epoch_continuous: {epoch_continuous}")
537
+ if last_global_train_step is None:
538
+ assert global_train_step == 0 and epoch == 1
539
+ else:
540
+ # The call to this function could be repeated.
541
+ assert global_train_step in (last_global_train_step, last_global_train_step + 1)
542
+ if last_epoch_continuous is None:
543
+ assert epoch_continuous == 0
544
+ elif global_train_step == last_global_train_step: # repeated call
545
+ assert epoch_continuous == last_epoch_continuous
546
+ else:
547
+ assert epoch_continuous > last_epoch_continuous
548
+ assert epoch >= epoch_continuous >= epoch - 1
549
+ epoch_continuous_diffs.append(epoch_continuous - last_epoch_continuous)
550
+ last_global_train_step = global_train_step
551
+ last_epoch_continuous = epoch_continuous
552
+ return learning_rate * epoch_continuous / num_epochs
553
+
554
+ config = Config(
555
+ dict(
556
+ task="train",
557
+ device="cpu",
558
+ extern_data={"data": {"dim": 9}, "classes": {"dim": 2, "sparse": True}},
559
+ get_model=TrainTestModel,
560
+ train_step=TrainTestModel.train_step,
561
+ batch_size=500,
562
+ optimizer={"class": "adam"},
563
+ dynamic_learning_rate=_dynamic_learning_rate,
564
+ num_epochs=num_epochs,
565
+ )
566
+ )
567
+ num_seqs_per_epoch = 100
568
+ dataset = init_dataset({"class": "Task12AXDataset", "num_seqs": num_seqs_per_epoch, "name": "train"})
569
+ dataset.init_seq_order(epoch=1)
570
+
571
+ with global_config_ctx(config):
572
+ engine = Engine(config=config)
573
+ engine.init_train_from_config(train_data=dataset)
574
+ engine.train()
575
+
576
+ assert last_epoch_continuous == num_epochs
577
+ assert epoch_continuous_diffs
578
+ print("epoch continuous diffs:", epoch_continuous_diffs)
579
+ # Just some sanity check. The exact number here depends on num_seqs_per_epoch, batch_size, etc.
580
+ assert numpy.min(epoch_continuous_diffs) >= 0.01
581
+ assert numpy.max(epoch_continuous_diffs) <= 0.1
582
+ # It's one more (non-repeated) call than num steps (first + very last),
583
+ # and the diffs is one less, so the length should match final global train step.
584
+ assert len(epoch_continuous_diffs) == engine.global_train_step
585
+
586
+
523
587
  if __name__ == "__main__":
524
588
  better_exchook.install()
525
589
  if len(sys.argv) <= 1:
@@ -1,2 +0,0 @@
1
- version = '1.20241017.004429'
2
- long_version = '1.20241017.004429+git.c3878ec'