returnn 1.20241030.185827__tar.gz → 1.20241106.124322__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (469) hide show
  1. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/PKG-INFO +1 -1
  2. returnn-1.20241106.124322/_setup_info_generated.py +2 -0
  3. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/array_.py +11 -0
  4. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/learning_rate_control.py +2 -2
  5. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/pipeline.py +64 -13
  6. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/returnn_dataset_wrapper.py +12 -1
  7. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/engine.py +65 -28
  8. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/basic.py +36 -40
  9. returnn-1.20241106.124322/returnn/util/math.py +87 -0
  10. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn.egg-info/PKG-INFO +1 -1
  11. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Util.py +30 -0
  12. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_demos.py +3 -0
  13. returnn-1.20241030.185827/_setup_info_generated.py +0 -2
  14. returnn-1.20241030.185827/returnn/util/math.py +0 -34
  15. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/.editorconfig +0 -0
  16. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/.gitignore +0 -0
  17. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/.gitmodules +0 -0
  18. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/.kateconfig +0 -0
  19. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/CHANGELOG.md +0 -0
  20. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/CODEOWNERS +0 -0
  21. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/CONTRIBUTING.md +0 -0
  22. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/LICENSE +0 -0
  23. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/MANIFEST.in +0 -0
  24. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/README.rst +0 -0
  25. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/__init__.py +0 -0
  26. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/12AX.cluster_map +0 -0
  27. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/_setup_returnn_env.py +0 -0
  28. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-fwd.config +0 -0
  29. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-horovod-mpi.py +0 -0
  30. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-horovod-mpi.py.sh +0 -0
  31. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-horovod-mpi.sh +0 -0
  32. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-hyper-param-tuning.config +0 -0
  33. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-iter-dataset.py +0 -0
  34. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-list-devices.py +0 -0
  35. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-lua-torch-layer.config +0 -0
  36. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-pretrain.config +0 -0
  37. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-record-and-push-to-webserver.py +0 -0
  38. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-returnn-as-framework.py +0 -0
  39. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-rf-pt-benchmark.py +0 -0
  40. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-rf.config +0 -0
  41. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-rhn-enwik8.config +0 -0
  42. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-sprint-interface.py +0 -0
  43. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-att-copy.config +0 -0
  44. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-attention.config +0 -0
  45. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  46. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  47. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-enc-dec.config +0 -0
  48. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-hard-att-copy.config +0 -0
  49. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-lstm-benchmark.py +0 -0
  50. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  51. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  52. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-native-lstm.12ax.config +0 -0
  53. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  54. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  55. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  56. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  57. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  58. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-rec-self-att.config +0 -0
  59. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-search-compiled-graph.py +0 -0
  60. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  61. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-timit-lstm-ctc.config +0 -0
  62. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-torch.config +0 -0
  63. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  64. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo.sh +0 -0
  65. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  66. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  67. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  68. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/README.txt +0 -0
  69. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/chars.txt +0 -0
  70. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/config_demo +0 -0
  71. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/config_fwd +0 -0
  72. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/config_real +0 -0
  73. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  74. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/decode.py +0 -0
  75. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  76. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/go.sh +0 -0
  77. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/lines.txt +0 -0
  78. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/split/eval.txt +0 -0
  79. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/split/train.txt +0 -0
  80. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/split/valid.txt +0 -0
  81. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/README.md +0 -0
  82. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  83. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial/forwardconfig +0 -0
  84. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial/go.sh +0 -0
  85. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial/trainconfig +0 -0
  86. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  87. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  88. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  89. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  90. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/pyproject.toml +0 -0
  91. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/requirements.txt +0 -0
  92. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/__init__.py +0 -0
  93. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/__main__.py +0 -0
  94. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/__old_mod_loader__.py +0 -0
  95. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/__setup__.py +0 -0
  96. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/config.py +0 -0
  97. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/__init__.py +0 -0
  98. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/audio.py +0 -0
  99. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/basic.py +0 -0
  100. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/bundle_file.py +0 -0
  101. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/cached.py +0 -0
  102. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/cached2.py +0 -0
  103. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/distrib_files.py +0 -0
  104. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/generating.py +0 -0
  105. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/hdf.py +0 -0
  106. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/lm.py +0 -0
  107. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/map.py +0 -0
  108. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/meta.py +0 -0
  109. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/multi_proc.py +0 -0
  110. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/normalization_data.py +0 -0
  111. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/numpy_dump.py +0 -0
  112. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/postprocessing.py +0 -0
  113. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/raw_wav.py +0 -0
  114. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/sprint.py +0 -0
  115. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/stereo.py +0 -0
  116. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/util/__init__.py +0 -0
  117. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/util/feature_extraction.py +0 -0
  118. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/util/strings.py +0 -0
  119. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/util/vocabulary.py +0 -0
  120. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/engine/__init__.py +0 -0
  121. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/engine/base.py +0 -0
  122. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/engine/batch.py +0 -0
  123. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/__init__.py +0 -0
  124. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/__main__.py +0 -0
  125. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  126. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  127. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  128. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  129. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  130. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  131. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  132. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  133. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  134. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  135. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  136. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  137. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  138. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  139. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  140. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  141. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  142. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  143. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  144. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  145. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  146. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  147. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  148. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  149. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  150. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/__init__.py +0 -0
  151. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/README.md +0 -0
  152. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/__init__.py +0 -0
  153. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/edit.py +0 -0
  154. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/reroute.py +0 -0
  155. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/select.py +0 -0
  156. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/subgraph.py +0 -0
  157. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/transform.py +0 -0
  158. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/util.py +0 -0
  159. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/forward_iface.py +0 -0
  160. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/__init__.py +0 -0
  161. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_backend.py +0 -0
  162. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/__init__.py +0 -0
  163. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/backend.cpp +0 -0
  164. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/backend.hpp +0 -0
  165. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/module.cpp +0 -0
  166. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/module.hpp +0 -0
  167. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/py_utils.hpp +0 -0
  168. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  169. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  170. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_numpy_backend.py +0 -0
  171. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_random_journal.py +0 -0
  172. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_utils.py +0 -0
  173. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/attention.py +0 -0
  174. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/audio/__init__.py +0 -0
  175. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/audio/mel.py +0 -0
  176. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/audio/specaugment.py +0 -0
  177. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/backend.py +0 -0
  178. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/build_from_dict.py +0 -0
  179. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/cond.py +0 -0
  180. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/const.py +0 -0
  181. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/container.py +0 -0
  182. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/control_flow_ctx.py +0 -0
  183. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conv.py +0 -0
  184. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conversions/__init__.py +0 -0
  185. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  186. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conversions/hf_llama.py +0 -0
  187. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conversions/torch_nn.py +0 -0
  188. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/decoder/__init__.py +0 -0
  189. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/decoder/transformer.py +0 -0
  190. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/device.py +0 -0
  191. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/dims.py +0 -0
  192. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/dropout.py +0 -0
  193. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/dtype.py +0 -0
  194. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/__init__.py +0 -0
  195. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/base.py +0 -0
  196. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/conformer.py +0 -0
  197. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/e_branchformer.py +0 -0
  198. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/transformer.py +0 -0
  199. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/gradient.py +0 -0
  200. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/graph.py +0 -0
  201. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/hooks.py +0 -0
  202. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/init.py +0 -0
  203. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/label_smoothing.py +0 -0
  204. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/linear.py +0 -0
  205. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/loop.py +0 -0
  206. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/loss.py +0 -0
  207. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/math_.py +0 -0
  208. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/matmul.py +0 -0
  209. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/module.py +0 -0
  210. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/normalization.py +0 -0
  211. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/parameter.py +0 -0
  212. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/parametrizations.py +0 -0
  213. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/parametrize.py +0 -0
  214. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/piecewise_linear.py +0 -0
  215. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/rand.py +0 -0
  216. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/rec.py +0 -0
  217. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/reduce.py +0 -0
  218. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/run_ctx.py +0 -0
  219. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/signal.py +0 -0
  220. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/state.py +0 -0
  221. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/stepwise_scheduler.py +0 -0
  222. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/tensor_array.py +0 -0
  223. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/types.py +0 -0
  224. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/import_/__init__.py +0 -0
  225. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/import_/common.py +0 -0
  226. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/import_/git.py +0 -0
  227. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/import_/import_.py +0 -0
  228. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/log.py +0 -0
  229. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/native_op.cpp +0 -0
  230. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/native_op.py +0 -0
  231. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/pretrain.py +0 -0
  232. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/__init__.py +0 -0
  233. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/cache.py +0 -0
  234. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/control.py +0 -0
  235. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/error_signals.py +0 -0
  236. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/extern_interface.py +0 -0
  237. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/interface.py +0 -0
  238. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/README.md +0 -0
  239. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/__init__.py +0 -0
  240. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/_dim_extra.py +0 -0
  241. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/_tensor_extra.py +0 -0
  242. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/_tensor_mixin_base.py +0 -0
  243. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/_tensor_op_overloads.py +0 -0
  244. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/control_flow_ctx.py +0 -0
  245. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/dim.py +0 -0
  246. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/marked_dim.py +0 -0
  247. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/tensor.py +0 -0
  248. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/tensor_dict.py +0 -0
  249. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/utils.py +0 -0
  250. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/__init__.py +0 -0
  251. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/compat.py +0 -0
  252. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/data_pipeline.py +0 -0
  253. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/distributed.py +0 -0
  254. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/engine.py +0 -0
  255. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/README.md +0 -0
  256. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/__init__.py +0 -0
  257. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/_backend.py +0 -0
  258. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/_utils.py +0 -0
  259. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/cond.py +0 -0
  260. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  261. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  262. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/dims.py +0 -0
  263. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/layer.py +0 -0
  264. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/loop.py +0 -0
  265. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/make_layer.py +0 -0
  266. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  267. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  268. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  269. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_low_level/__init__.py +0 -0
  270. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_low_level/_backend.py +0 -0
  271. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/horovod.py +0 -0
  272. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/hyper_param_tuning.py +0 -0
  273. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/__init__.py +0 -0
  274. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/base.py +0 -0
  275. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/basic.py +0 -0
  276. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/rec.py +0 -0
  277. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/segmental_model.py +0 -0
  278. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/signal_processing.py +0 -0
  279. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/variable.py +0 -0
  280. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/native_op.py +0 -0
  281. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/network.py +0 -0
  282. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/sprint.py +0 -0
  283. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/updater.py +0 -0
  284. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/__init__.py +0 -0
  285. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/basic.py +0 -0
  286. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/data.py +0 -0
  287. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/gradient_checkpoint.py +0 -0
  288. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/ken_lm.py +0 -0
  289. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/open_fst.py +0 -0
  290. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/README.md +0 -0
  291. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/__init__.py +0 -0
  292. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/__init__.py +0 -0
  293. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/extern_data.py +0 -0
  294. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/queued_data_iter.py +0 -0
  295. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/tensor_utils.py +0 -0
  296. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/distributed.py +0 -0
  297. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/__init__.py +0 -0
  298. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/_backend.py +0 -0
  299. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/_rand.py +0 -0
  300. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/bridge.py +0 -0
  301. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/raw_ops.py +0 -0
  302. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/optim/README.md +0 -0
  303. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/optim/__init__.py +0 -0
  304. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/optim/lion.py +0 -0
  305. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/updater.py +0 -0
  306. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/README.md +0 -0
  307. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/__init__.py +0 -0
  308. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/array_.py +0 -0
  309. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/diagnose_gpu.py +0 -0
  310. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/exception_helper.py +0 -0
  311. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/gradient_checkpoint.py +0 -0
  312. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/module.py +0 -0
  313. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/scaled_gradient.py +0 -0
  314. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/__init__.py +0 -0
  315. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/better_exchook.py +0 -0
  316. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/bpe.py +0 -0
  317. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/debug.py +0 -0
  318. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/debug_helpers.py +0 -0
  319. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/file_cache.py +0 -0
  320. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/fsa.py +0 -0
  321. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/literal_py_to_pickle.py +0 -0
  322. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  323. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/native_code_compiler.py +0 -0
  324. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/pprint.py +0 -0
  325. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/py-to-pickle.cpp +0 -0
  326. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/py_compat.py +0 -0
  327. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/py_ext_mod_compiler.py +0 -0
  328. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/result_with_reason.py +0 -0
  329. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/sig_proc.py +0 -0
  330. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/task_system.py +0 -0
  331. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/train_proc_manager.py +0 -0
  332. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/watch_memory.py +0 -0
  333. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn.egg-info/SOURCES.txt +0 -0
  334. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn.egg-info/dependency_links.txt +0 -0
  335. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn.egg-info/top_level.txt +0 -0
  336. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/rnn.py +0 -0
  337. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/setup.cfg +0 -0
  338. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/setup.py +0 -0
  339. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/DummySprintExec.py +0 -0
  340. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm-inspection-profile.xml +0 -0
  341. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/.gitignore +0 -0
  342. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/.name +0 -0
  343. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  344. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  345. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  346. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  347. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  348. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/misc.xml +0 -0
  349. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/modules.xml +0 -0
  350. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/returnn.iml +0 -0
  351. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  352. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/_set_num_threads1.py +0 -0
  353. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/_setup_returnn_env.py +0 -0
  354. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/_setup_test_env.py +0 -0
  355. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/bpe-unicode-demo.codes +0 -0
  356. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/bpe-unicode-demo.vocab +0 -0
  357. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lexicon_opt.fst +0 -0
  358. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lexicon_opt.isyms +0 -0
  359. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lexicon_opt.jpg +0 -0
  360. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lexicon_opt.osyms +0 -0
  361. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lint_common.py +0 -0
  362. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/pycharm-inspect.py +0 -0
  363. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/pylint.py +0 -0
  364. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/returnn-as-framework.py +0 -0
  365. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/rf_utils.py +0 -0
  366. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/spelling.dic +0 -0
  367. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Config.py +0 -0
  368. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Dataset.py +0 -0
  369. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Fsa.py +0 -0
  370. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_GeneratingDataset.py +0 -0
  371. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_HDFDataset.py +0 -0
  372. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_LearningRateControl.py +0 -0
  373. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Log.py +0 -0
  374. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_MultiProcDataset.py +0 -0
  375. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Pretrain.py +0 -0
  376. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_ResNet.py +0 -0
  377. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_SprintDataset.py +0 -0
  378. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_SprintInterface.py +0 -0
  379. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFEngine.py +0 -0
  380. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFNativeOp.py +0 -0
  381. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFNetworkLayer.py +0 -0
  382. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFNetworkRecLayer.py +0 -0
  383. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFNetworkSigProcLayer.py +0 -0
  384. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFUpdater.py +0 -0
  385. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFUtil.py +0 -0
  386. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TF_determinism.py +0 -0
  387. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TaskSystem.py +0 -0
  388. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TaskSystem_SharedMem.py +0 -0
  389. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TranslationDataset.py +0 -0
  390. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_fork_exec.py +0 -0
  391. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_hdf_dump.py +0 -0
  392. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_array.py +0 -0
  393. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_attention.py +0 -0
  394. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_base.py +0 -0
  395. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_cond.py +0 -0
  396. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_const.py +0 -0
  397. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_container.py +0 -0
  398. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_conv.py +0 -0
  399. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_decoder_transformer.py +0 -0
  400. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_encoder_conformer.py +0 -0
  401. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_gradient.py +0 -0
  402. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_label_smoothing.py +0 -0
  403. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_loop.py +0 -0
  404. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_math.py +0 -0
  405. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_normalization.py +0 -0
  406. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_piecewise_linear.py +0 -0
  407. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_rec.py +0 -0
  408. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_reduce.py +0 -0
  409. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_signal.py +0 -0
  410. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_tensor.py +0 -0
  411. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_tools.py +0 -0
  412. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_dataset.py +0 -0
  413. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_engine.py +0 -0
  414. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_frontend.py +0 -0
  415. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_internal_frontend.py +0 -0
  416. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_util.py +0 -0
  417. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/torch_utils.py +0 -0
  418. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/_setup_returnn_env.py +0 -0
  419. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/analyze-dataset-batches.py +0 -0
  420. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bliss-collect-seq-lens.py +0 -0
  421. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bliss-dump-text.py +0 -0
  422. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bliss-get-segment-names.py +0 -0
  423. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bliss-to-ogg-zip.py +0 -0
  424. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bpe-create-lexicon.py +0 -0
  425. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/calculate-word-error-rate.py +0 -0
  426. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/cleanup-old-models.py +0 -0
  427. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/collect-orth-symbols.py +0 -0
  428. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/collect-words.py +0 -0
  429. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/compile_native_op.py +0 -0
  430. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/compile_tf_graph.py +0 -0
  431. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/debug-dump-search-scores.py +0 -0
  432. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/debug-plot-search-scores.py +0 -0
  433. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-dataset-raw-strings.py +0 -0
  434. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-dataset.py +0 -0
  435. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-forward-stats.py +0 -0
  436. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-forward.py +0 -0
  437. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-network-json.py +0 -0
  438. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-pickle.py +0 -0
  439. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/extract_state_tying_from_dataset.py +0 -0
  440. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/get-attention-weights.py +0 -0
  441. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/get-best-model-epoch.py +0 -0
  442. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/hdf_dump.py +0 -0
  443. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/hdf_dump_translation_dataset.py +0 -0
  444. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/import-blocks-mt-model.py +0 -0
  445. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/import-t2t-mt-model.py +0 -0
  446. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/.gitignore +0 -0
  447. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/Makefile +0 -0
  448. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/README.md +0 -0
  449. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/README.md +0 -0
  450. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/libs_list +0 -0
  451. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  452. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  453. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  454. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/state_vars_list +0 -0
  455. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  456. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/file.h +0 -0
  457. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  458. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  459. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/main.cc +0 -0
  460. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/rescorer.h +0 -0
  461. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/vocabulary.cc +0 -0
  462. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/vocabulary.h +0 -0
  463. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/tf_avg_checkpoints.py +0 -0
  464. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/tf_inspect_checkpoint.py +0 -0
  465. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/tf_inspect_summary_log.py +0 -0
  466. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/torch_avg_checkpoints.py +0 -0
  467. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/torch_export_to_onnx.py +0 -0
  468. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/torch_inspect_checkpoint.py +0 -0
  469. {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241030.185827
3
+ Version: 1.20241106.124322
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20241106.124322'
2
+ long_version = '1.20241106.124322+git.bf0c605'
@@ -36,6 +36,7 @@ __all__ = [
36
36
  "masked_scatter",
37
37
  "sequence_mask",
38
38
  "pack_padded",
39
+ "pad_packed",
39
40
  "gather",
40
41
  "scatter",
41
42
  "scatter_argmax",
@@ -627,6 +628,8 @@ def pack_padded(
627
628
  Packing means to only store the non-padded frames.
628
629
  This uses :func:`masked_select` internally based on the mask of non-masked frames.
629
630
 
631
+ See :func:`pad_packed` for the inverse operation.
632
+
630
633
  :param source:
631
634
  :param dims: dims in source to pack. the order defines the format. first dim is major, etc.
632
635
  if there are no padded frames, e.g. dims=[B,T] would just result in the [B*T,...] reshaped tensor.
@@ -648,6 +651,14 @@ def pack_padded(
648
651
  return rf.masked_select(source, mask=mask, dims=dims, out_dim=out_dim)
649
652
 
650
653
 
654
+ def pad_packed(source: Tensor, *, in_dim: Dim, dims: Sequence[Dim]) -> Tensor:
655
+ """
656
+ Inverse of :func:`pack_padded`, i.e. unpack the sequence, i.e. pad it back to the original length.
657
+ """
658
+ mask = rf.sequence_mask(dims, device=source.device)
659
+ return rf.masked_scatter(source, mask=mask, in_dim=in_dim, dims=dims)
660
+
661
+
651
662
  # noinspection PyUnusedLocal
652
663
  def gather(
653
664
  source: Tensor,
@@ -5,7 +5,7 @@ The base class is :class:`LearningRateControl`.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- from typing import Optional, Any, Dict
8
+ from typing import Optional, Union, Any, Dict
9
9
  import typing
10
10
  import os
11
11
  import returnn.util.basic as util
@@ -350,7 +350,7 @@ class LearningRateControl:
350
350
  relative_error /= learning_rate / self.default_learning_rate
351
351
  return relative_error
352
352
 
353
- def set_epoch_error(self, epoch, error):
353
+ def set_epoch_error(self, epoch: int, error: Dict[str, Union[float, Dict[str, float]]]):
354
354
  """
355
355
  :type epoch: int
356
356
  :type error: dict[str,float|dict[str,float]]
@@ -28,7 +28,7 @@ import numpy
28
28
  import torch
29
29
  import torch.utils.data
30
30
 
31
- from returnn.util.basic import NumbersDict
31
+ from returnn.util.basic import NumbersDict, get_fwd_compat_kwargs
32
32
 
33
33
 
34
34
  def create_tensor(array: numpy.ndarray) -> Union[torch.Tensor, numpy.ndarray]:
@@ -59,7 +59,7 @@ def collate_batch(batch: List[Dict[str, numpy.ndarray]]) -> Dict[str, Union[torc
59
59
 
60
60
  res = {}
61
61
  for key in data_keys:
62
- if key == "num_seqs":
62
+ if key in ("num_seqs", "epoch"):
63
63
  res[key] = batch[0][key] # it should always be the same
64
64
  continue
65
65
  ls = [create_tensor(sample[key]) for sample in batch]
@@ -119,7 +119,7 @@ class ChunkingIterDataPipe(torch.utils.data.IterDataPipe):
119
119
 
120
120
  if not chunking_data_keys:
121
121
  chunking_data_keys = list(data_dict.keys()) # use all if not configured separately
122
- chunking_data_key_black_list = ["seq_tag", "seq_idx", "num_seqs"]
122
+ chunking_data_key_black_list = ["seq_tag", "seq_idx", "num_seqs", "epoch"]
123
123
  for key in chunking_data_key_black_list:
124
124
  if key in chunking_data_keys:
125
125
  chunking_data_keys.remove(key)
@@ -208,20 +208,66 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
208
208
  def __init__(self, dataset: torch.utils.data.IterableDataset, batch_size=1, max_seqs=None):
209
209
  """
210
210
  :param dataset: dataset to apply batching to
211
- :param int|dict[str,int]|None batch_size: Maximum number of time steps (e.g. audio frames / words) in one
212
- batch (padding included).
211
+ :param int|dict[str,int]|None|function batch_size: Maximum number of time steps (e.g. audio frames / words)
212
+ in one batch (padding included).
213
213
  If given as a dict data_key -> value, sets different individual limits per data key.
214
214
  If None, no limit.
215
- :param int|None max_seqs: maximum number of sequences in a batch,
216
- None means unlimited (also -1 to match TF backend)
215
+ Can also be a callable with kwargs epoch, seq_idx, epoch_continuous, **_other_kwargs,
216
+ returning the batch size.
217
+ :param int|None|function max_seqs: maximum number of sequences in a batch,
218
+ None means unlimited (also -1 to match TF backend).
219
+ Can also be a callable with kwargs epoch, seq_idx, epoch_continuous, **_other_kwargs,
220
+ returning the max seqs.
217
221
  """
218
222
  super().__init__()
219
223
  self._dataset = dataset
220
- self._max_batch_size = NumbersDict(sys.maxsize if batch_size is None else batch_size)
221
- self._max_seqs = sys.maxsize if (max_seqs is None or max_seqs == -1) else max_seqs
224
+ self._max_batch_size = self._parse_batch_size(batch_size)
225
+ self._max_seqs = self._parse_max_seqs(max_seqs)
222
226
 
223
- assert self._max_batch_size.min_value() > 0
224
- assert self._max_seqs > 0
227
+ if not callable(self._max_batch_size):
228
+ assert isinstance(self._max_batch_size, NumbersDict) and self._max_batch_size.min_value() > 0
229
+ if not callable(self._max_seqs):
230
+ assert isinstance(self._max_seqs, int) and self._max_seqs > 0
231
+
232
+ @staticmethod
233
+ def _parse_batch_size(
234
+ batch_size: Union[int, Dict[str, int], NumbersDict, None, Callable],
235
+ *,
236
+ data_dict: Optional[Dict[str, Any]] = None,
237
+ ) -> Union[NumbersDict, Callable]:
238
+ """
239
+ :param batch_size: see __init__()
240
+ :return: batch_size
241
+ """
242
+ if callable(batch_size):
243
+ if data_dict:
244
+ batch_size = batch_size(**BatchingIterDataPipe._get_user_func_kwargs_from_data_dict(data_dict))
245
+ else:
246
+ return batch_size
247
+ return NumbersDict(sys.maxsize if batch_size is None else batch_size)
248
+
249
+ @staticmethod
250
+ def _parse_max_seqs(
251
+ max_seqs: Union[int, None, Callable], *, data_dict: Optional[Dict[str, Any]] = None
252
+ ) -> Union[int, Callable]:
253
+ """
254
+ :param max_seqs: see __init__()
255
+ :return: max_seqs
256
+ """
257
+ if callable(max_seqs):
258
+ if data_dict:
259
+ max_seqs = max_seqs(**BatchingIterDataPipe._get_user_func_kwargs_from_data_dict(data_dict))
260
+ else:
261
+ return max_seqs
262
+ return sys.maxsize if (max_seqs is None or max_seqs == -1) else max_seqs
263
+
264
+ @staticmethod
265
+ def _get_user_func_kwargs_from_data_dict(data_dict: Dict[str, Any]) -> Dict[str, Any]:
266
+ epoch = int(data_dict["epoch"])
267
+ seq_idx = int(data_dict["seq_idx"])
268
+ num_seqs = int(data_dict["num_seqs"]) # >=1 if known, otherwise -1
269
+ epoch_continuous = (epoch - 1 + (seq_idx + 1) / num_seqs) if num_seqs > 0 else None
270
+ return {"epoch": epoch, "seq_idx": seq_idx, "epoch_continuous": epoch_continuous, **get_fwd_compat_kwargs()}
225
271
 
226
272
  def __iter__(self):
227
273
  """
@@ -233,7 +279,12 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
233
279
  current_max_sequence_lengths = NumbersDict(0) # data_key -> length of longest sequence in current batch
234
280
 
235
281
  for data_dict in self._dataset:
236
- if len(current_batch) == self._max_seqs:
282
+ max_seqs = self._parse_max_seqs(self._max_seqs, data_dict=data_dict)
283
+ max_batch_size = self._parse_batch_size(self._max_batch_size, data_dict=data_dict)
284
+ assert isinstance(max_seqs, int) and max_seqs > 0
285
+ assert isinstance(max_batch_size, NumbersDict) and max_batch_size.min_value() > 0
286
+
287
+ if len(current_batch) >= max_seqs:
237
288
  yield current_batch
238
289
  current_batch = []
239
290
  current_max_sequence_lengths = NumbersDict(0)
@@ -246,7 +297,7 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
246
297
  max_sequence_lengths_if_included = NumbersDict.max([current_max_sequence_lengths, sequence_lengths])
247
298
  batch_size_if_included = max_sequence_lengths_if_included * (len(current_batch) + 1) # including padding
248
299
 
249
- if current_batch and batch_size_if_included.any_compare(self._max_batch_size, (lambda a, b: a > b)):
300
+ if current_batch and batch_size_if_included.any_compare(max_batch_size, (lambda a, b: a > b)):
250
301
  yield current_batch
251
302
  current_batch = [data_dict]
252
303
  current_max_sequence_lengths = sequence_lengths
@@ -67,7 +67,14 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
67
67
 
68
68
  def reset(self):
69
69
  """
70
- :return:
70
+ This is called by PyTorch DataLoader mechanism once we create a new iterator over the DataLoader.
71
+ This happens at the beginning of each epoch.
72
+
73
+ (Note: The mechanism where ``reset()`` is actually called is very obfuscated in PyTorch.
74
+ As I understand it, there is a IterDataPipe metaclass (_IterDataPipeMeta)
75
+ which automatically registers a hook on ``__iter__`` via ``hook_iterator``.
76
+ Deep inside the complex logic of this hook, it calls ``_set_datapipe_valid_iterator_id``
77
+ which then calls ``reset()``.)
71
78
  """
72
79
  self._reset_callback()
73
80
 
@@ -81,6 +88,8 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
81
88
  except Exception: # might not work for all datasets
82
89
  num_seqs = -1
83
90
  num_seqs = numpy.array(num_seqs)
91
+ assert self._dataset.epoch is not None
92
+ epoch = numpy.array(self._dataset.epoch)
84
93
 
85
94
  try:
86
95
  data_keys = self._dataset.get_data_keys()
@@ -94,6 +103,8 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
94
103
  # It's slightly redundant to have num_seqs in each entry,
95
104
  # but it's difficult to pass this back to the main proc otherwise.
96
105
  data["num_seqs"] = num_seqs
106
+ # epoch is also redundant, but that's the cleanest/simplest way to pass it on to BatchingIterDataPipe.
107
+ data["epoch"] = epoch
97
108
  yield data
98
109
  seq_index += 1
99
110
 
@@ -34,6 +34,7 @@ from returnn.util import NumbersDict
34
34
  from returnn.util.basic import hms, NotSpecified
35
35
  from returnn.util.result_with_reason import ResultWithReason
36
36
  from returnn.util.debug import debug_shell
37
+ from returnn.util.math import simplify_and_format_number
37
38
  from returnn.forward_iface import ForwardCallbackIface
38
39
 
39
40
  from .updater import Updater
@@ -125,6 +126,7 @@ class Engine(EngineBase):
125
126
  self._log_memory_usage = config.bool("torch_log_memory_usage", False)
126
127
  self._log_batch_size = config.bool("log_batch_size", False) and log.verbose[5]
127
128
  self._calculate_exp_loss = config.bool("calculate_exp_loss", False)
129
+ self._log_grad_norm = _parse_log_grad_norm(config)
128
130
  self._reset_dev_memory_caches = config.bool("reset_dev_memory_caches", False)
129
131
  self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
130
132
  self._stop_on_nonfinite_train_score = config.bool("stop_on_nonfinite_train_score", True)
@@ -383,6 +385,7 @@ class Engine(EngineBase):
383
385
  del num_seqs_
384
386
  if num_seqs is not None:
385
387
  assert last_seq_idx < num_seqs
388
+ epoch_continuous = (self.epoch - 1 + (last_seq_idx + 1) / num_seqs) if num_seqs is not None else None
386
389
 
387
390
  # clear the gradients when every gradient accumulation loop starts
388
391
  if zero_grad_next_step:
@@ -415,7 +418,10 @@ class Engine(EngineBase):
415
418
 
416
419
  if accum_grad_multiple_step_dyn:
417
420
  accum_grad_multiple_step = accum_grad_multiple_step_dyn(
418
- epoch=self.epoch, global_train_step=self.global_train_step
421
+ epoch=self.epoch,
422
+ epoch_continuous=epoch_continuous,
423
+ global_train_step=self.global_train_step,
424
+ **util.get_fwd_compat_kwargs(),
419
425
  )
420
426
  cur_count_grad_accum += 1
421
427
  perform_update_step = cur_count_grad_accum >= accum_grad_multiple_step
@@ -429,6 +435,12 @@ class Engine(EngineBase):
429
435
  else:
430
436
  total_loss.raw_tensor.backward()
431
437
 
438
+ if self._log_grad_norm and perform_update_step:
439
+ key = f"grad_norm:p{simplify_and_format_number(self._log_grad_norm)}"
440
+ assert key not in losses_dict
441
+ inv_norm_factors_dict[key] = 1.0 # once per update step
442
+ losses_dict[key] = _get_total_grad_norm(self._pt_model, p=self._log_grad_norm)
443
+
432
444
  # only update the weights when every gradient accumulation loop ends
433
445
  if perform_update_step:
434
446
  self._updater.step(grad_scaler=self._grad_scaler)
@@ -469,9 +481,7 @@ class Engine(EngineBase):
469
481
  step_idx += 1
470
482
  self.global_train_step += 1
471
483
  self._updater.set_current_train_step(
472
- global_train_step=self.global_train_step,
473
- epoch=self.epoch,
474
- epoch_continuous=(self.epoch - 1 + (last_seq_idx + 1) / num_seqs) if num_seqs is not None else None,
484
+ global_train_step=self.global_train_step, epoch=self.epoch, epoch_continuous=epoch_continuous
475
485
  )
476
486
  except Exception as exc:
477
487
  help_on_torch_exception(exc, step_idx=step_idx, model=self._orig_model, extern_data=extern_data)
@@ -480,8 +490,8 @@ class Engine(EngineBase):
480
490
  elapsed = time.monotonic() - epoch_start_time
481
491
  elapsed_computation_percentage = elapsed_computation_time / elapsed
482
492
  print(
483
- "Trained %i steps, %s elapsed (%.1f%% computing time)"
484
- % (step_idx, hms(elapsed), (elapsed_computation_percentage * 100.0)),
493
+ "Epoch %i: Trained %i steps, %s elapsed (%.1f%% computing time)"
494
+ % (self.epoch, step_idx, hms(elapsed), (elapsed_computation_percentage * 100.0)),
485
495
  file=log.v3,
486
496
  )
487
497
 
@@ -501,7 +511,7 @@ class Engine(EngineBase):
501
511
  if self._do_save():
502
512
  self.learning_rate_control.save()
503
513
 
504
- print(f"Total train loss:", _format_score(dict(accumulated_losses_dict)), file=log.v3)
514
+ print(f"Epoch {self.epoch}: Total train loss:", _format_score(dict(accumulated_losses_dict)), file=log.v3)
505
515
 
506
516
  self._maybe_report_dev_memory_stats()
507
517
 
@@ -532,8 +542,6 @@ class Engine(EngineBase):
532
542
  self._reset_dev_memory_stats()
533
543
 
534
544
  eval_dump_str = []
535
- score_keys = None
536
- error_keys = None
537
545
 
538
546
  for dataset_name, dataset in self.eval_datasets.items():
539
547
  if skip_already_evaluated and self._is_dataset_evaluated(name=dataset_name):
@@ -575,10 +583,6 @@ class Engine(EngineBase):
575
583
  self._run_step(extern_data, train_func=True)
576
584
  train_ctx = rf.get_run_ctx()
577
585
 
578
- if score_keys is None:
579
- score_keys = set(name for name, loss in train_ctx.losses.items() if not loss.as_error)
580
- error_keys = set(name for name, loss in train_ctx.losses.items() if loss.as_error)
581
-
582
586
  losses_dict = NumbersDict(
583
587
  {
584
588
  name: (
@@ -615,14 +619,7 @@ class Engine(EngineBase):
615
619
  self.learning_rate_control.save()
616
620
 
617
621
  # Same format as the TF engine.
618
- eval_dump_str += [
619
- "%s: score %s error %s"
620
- % (
621
- dataset_name,
622
- _format_score({name: accumulated_losses_dict[name] for name in score_keys}),
623
- _format_score({name: accumulated_losses_dict[name] for name in error_keys}),
624
- )
625
- ]
622
+ eval_dump_str += ["%s: %s" % (dataset_name, _format_score(dict(accumulated_losses_dict)))]
626
623
 
627
624
  if self._torch_distributed_ctx:
628
625
  assert self._torch_distributed_ctx.rank() == 0
@@ -630,7 +627,11 @@ class Engine(EngineBase):
630
627
  torch.distributed.broadcast(_has_data, src=0)
631
628
 
632
629
  if not self._torch_distributed_ctx or self._torch_distributed_ctx.rank() == 0:
633
- print(" ".join(eval_dump_str) if eval_dump_str else "(No evaluations.)", file=log.v1)
630
+ print(
631
+ f"Epoch {self.epoch} evaluation:",
632
+ " ".join(eval_dump_str) if eval_dump_str else "(No evaluations.)",
633
+ file=log.v1,
634
+ )
634
635
 
635
636
  self._maybe_report_dev_memory_stats()
636
637
 
@@ -684,7 +685,7 @@ class Engine(EngineBase):
684
685
  batch_size = self.config.typed_value("batch_size", -1)
685
686
  batch_size = self.config.typed_value(f"batch_size_{'train' if train else 'dev'}", batch_size)
686
687
  assert batch_size != -1, f"batch_size or batch_size_{'train' if train else 'dev'} not defined in config"
687
- max_seqs = self.config.int("max_seqs", -1)
688
+ max_seqs = self.config.typed_value("max_seqs", -1)
688
689
  batches_dataset = data_pipeline.BatchingIterDataPipe(wrapped_dataset, batch_size=batch_size, max_seqs=max_seqs)
689
690
 
690
691
  loader_opts = self.config.typed_value("torch_dataloader_opts") or {}
@@ -1286,9 +1287,9 @@ def _print_process(
1286
1287
  if log.verbose[5]: # report every minibatch
1287
1288
  info = [report_prefix, "step %i" % step]
1288
1289
  if eval_info: # Such as score.
1289
- info += ["%s %s" % (k, _format_value(v)) for k, v in eval_info.items()]
1290
+ info += ["%s %s" % (k, _format_score_value(v)) for k, v in eval_info.items()]
1290
1291
  if batch_size_info:
1291
- info += ["%s %s" % (k, _format_value(v)) for k, v in batch_size_info.items()]
1292
+ info += ["%s %s" % (k, _format_score_value(v)) for k, v in batch_size_info.items()]
1292
1293
  if log_memory_usage_device:
1293
1294
  dev = torch.device(log_memory_usage_device)
1294
1295
  if dev.type == "cuda":
@@ -1324,11 +1325,11 @@ def _format_score(score: Dict[str, float]) -> str:
1324
1325
  if not score:
1325
1326
  return "None"
1326
1327
  if len(score) == 1:
1327
- return _format_value(list(score.values())[0])
1328
- return " ".join(["%s %s" % (key.split(":", 2)[-1], _format_value(score[key])) for key in score.keys()])
1328
+ return _format_score_value(list(score.values())[0])
1329
+ return " ".join(["%s %s" % (k, _format_score_value(v)) for k, v in score.items()])
1329
1330
 
1330
1331
 
1331
- def _format_value(v: Any) -> str:
1332
+ def _format_score_value(v: Any) -> str:
1332
1333
  if isinstance(v, float):
1333
1334
  if abs(v) > 1.0e3 or abs(v) < 1.0e-3:
1334
1335
  return f"{v:.3e}"
@@ -1422,3 +1423,39 @@ def _set_torch_default_dtype_ctx_mgr(dtype: torch.dtype):
1422
1423
  yield
1423
1424
  finally:
1424
1425
  torch.set_default_dtype(old_dtype)
1426
+
1427
+
1428
+ def _parse_log_grad_norm(config: Config) -> Optional[Union[int, float]]:
1429
+ log_grad_norm = config.opt_typed_value("log_grad_norm", False)
1430
+ if isinstance(log_grad_norm, str):
1431
+ if log_grad_norm.lower() in ["true", "false", "none"]:
1432
+ log_grad_norm = {"true": True, "false": False, "none": None}[log_grad_norm.lower()]
1433
+ else:
1434
+ raise ValueError(f"Invalid value for log_grad_norm: {log_grad_norm!r}")
1435
+ if log_grad_norm is None:
1436
+ pass
1437
+ elif isinstance(log_grad_norm, bool):
1438
+ if log_grad_norm:
1439
+ log_grad_norm = 2
1440
+ else:
1441
+ log_grad_norm = None
1442
+ elif isinstance(log_grad_norm, (int, float)):
1443
+ assert log_grad_norm > 0, f"log_grad_norm {log_grad_norm} > 0 expected" # otherwise fine...
1444
+ else:
1445
+ raise TypeError(f"Invalid type for log_grad_norm: {log_grad_norm!r} type {type(log_grad_norm)}")
1446
+ return log_grad_norm
1447
+
1448
+
1449
+ def _get_total_grad_norm(model: torch.nn.Module, p: float) -> float:
1450
+ return float(
1451
+ torch.norm(
1452
+ torch.stack(
1453
+ [
1454
+ param.grad.norm(p=p).detach().cpu()
1455
+ for param in model.parameters()
1456
+ if param.requires_grad and param.grad is not None
1457
+ ]
1458
+ ),
1459
+ p=p,
1460
+ ).item()
1461
+ )
@@ -5,7 +5,7 @@ Various generic utilities, which are shared across different backend engines.
5
5
  """
6
6
 
7
7
  from __future__ import annotations
8
- from typing import Optional, Union, Any, Generic, TypeVar, Iterable, Tuple, Dict, List, Callable
8
+ from typing import Optional, Union, Any, Generic, TypeVar, Iterable, Tuple, Dict, List, Set, Callable
9
9
 
10
10
  import subprocess
11
11
  from subprocess import CalledProcessError
@@ -554,12 +554,11 @@ def describe_torch_version() -> str:
554
554
  return "%s (%s in %s)" % (version, git_info, tdir)
555
555
 
556
556
 
557
- def get_tensorflow_version_tuple():
557
+ def get_tensorflow_version_tuple() -> Tuple[int, ...]:
558
558
  """
559
559
  :return: tuple of ints, first entry is the major version
560
- :rtype: tuple[int]
561
560
  """
562
- import tensorflow as tf
561
+ import tensorflow as tf # noqa
563
562
  import re
564
563
 
565
564
  return tuple([int(re.sub("(-rc[0-9]|-dev[0-9]*)", "", s)) for s in tf.__version__.split(".")])
@@ -1963,9 +1962,9 @@ class NumbersDict:
1963
1962
  self.value = broadcast_value
1964
1963
  self.max = self._max_error
1965
1964
 
1966
- def copy(self):
1965
+ def copy(self) -> NumbersDict:
1967
1966
  """
1968
- :rtype: NumbersDict
1967
+ :return: copy
1969
1968
  """
1970
1969
  return NumbersDict(self)
1971
1970
 
@@ -1982,11 +1981,10 @@ class NumbersDict:
1982
1981
  numbers_dict={k: const_number for k in numbers_dict.dict.keys()},
1983
1982
  )
1984
1983
 
1985
- def copy_like(self, numbers_dict):
1984
+ def copy_like(self, numbers_dict: NumbersDict) -> NumbersDict:
1986
1985
  """
1987
- :param NumbersDict numbers_dict:
1986
+ :param numbers_dict:
1988
1987
  :return: copy of self with same keys as numbers_dict as far as we have them
1989
- :rtype: NumbersDict
1990
1988
  """
1991
1989
  if self.value is not None:
1992
1990
  return NumbersDict(
@@ -1999,11 +1997,11 @@ class NumbersDict:
1999
1997
  )
2000
1998
 
2001
1999
  @property
2002
- def keys_set(self):
2000
+ def keys_set(self) -> Set[str]:
2003
2001
  """
2004
2002
  Also see :func:`keys_union` if you want to have a deterministic order.
2005
2003
 
2006
- :rtype: set[str]
2004
+ :return: set of keys
2007
2005
  """
2008
2006
  return set(self.dict.keys())
2009
2007
 
@@ -2020,29 +2018,32 @@ class NumbersDict:
2020
2018
  res.append(key)
2021
2019
  return res
2022
2020
 
2023
- def __getitem__(self, key):
2021
+ def __getitem__(self, key: str):
2024
2022
  if self.value is not None:
2025
2023
  return self.dict.get(key, self.value)
2026
2024
  return self.dict[key]
2027
2025
 
2028
- def __setitem__(self, key, value):
2026
+ def __setitem__(self, key: str, value):
2029
2027
  self.dict[key] = value
2030
2028
 
2031
- def __delitem__(self, key):
2029
+ def __delitem__(self, key: str):
2032
2030
  del self.dict[key]
2033
2031
 
2034
- def get(self, key, default=None):
2032
+ def __contains__(self, item: str):
2033
+ return item in self.dict
2034
+
2035
+ def get(self, key: str, default=None):
2035
2036
  """
2036
- :param str key:
2037
+ :param key:
2037
2038
  :param T default:
2038
2039
  :rtype: object|T
2039
2040
  """
2040
2041
  # Keep consistent with self.__getitem__. If self.value is set, this will always be the default value.
2041
2042
  return self.dict.get(key, self.value if self.value is not None else default)
2042
2043
 
2043
- def pop(self, key, *args):
2044
+ def pop(self, key: str, *args):
2044
2045
  """
2045
- :param str key:
2046
+ :param key:
2046
2047
  :param T args: default, or not
2047
2048
  :rtype: object|T
2048
2049
  """
@@ -2055,22 +2056,21 @@ class NumbersDict:
2055
2056
  # which would only make sense for our values, not the dict keys.
2056
2057
  raise Exception("%s.__iter__ is undefined" % self.__class__.__name__)
2057
2058
 
2058
- def keys(self):
2059
+ def keys(self) -> Iterable[str]:
2059
2060
  """
2060
2061
  :rtype: set[str]
2061
2062
  """
2062
2063
  return self.dict.keys()
2063
2064
 
2064
- def values(self):
2065
+ def values(self) -> List[Any]:
2065
2066
  """
2066
- :rtype: list[object]
2067
+ :return: values: dict values + self.value
2067
2068
  """
2068
2069
  return list(self.dict.values()) + ([self.value] if self.value is not None else [])
2069
2070
 
2070
- def items(self):
2071
+ def items(self) -> Iterable[Tuple[str, Any]]:
2071
2072
  """
2072
2073
  :return: dict items. this excludes self.value
2073
- :rtype: str[(str,object)]
2074
2074
  """
2075
2075
  return self.dict.items()
2076
2076
 
@@ -2080,9 +2080,9 @@ class NumbersDict:
2080
2080
  """
2081
2081
  return self.value is not None or key in self.dict
2082
2082
 
2083
- def has_values(self):
2083
+ def has_values(self) -> bool:
2084
2084
  """
2085
- :rtype: bool
2085
+ :return: any values in self.dict or self.value
2086
2086
  """
2087
2087
  return bool(self.dict) or self.value is not None
2088
2088
 
@@ -2186,12 +2186,12 @@ class NumbersDict:
2186
2186
  def __neg__(self):
2187
2187
  return self.unary_op(op=lambda a: -a)
2188
2188
 
2189
- def __bool__(self):
2189
+ def __bool__(self) -> bool:
2190
2190
  return any(self.values())
2191
2191
 
2192
2192
  __nonzero__ = __bool__ # Python 2
2193
2193
 
2194
- def elem_eq(self, other, result_with_default=True):
2194
+ def elem_eq(self, other, result_with_default: bool = True) -> NumbersDict:
2195
2195
  """
2196
2196
  Element-wise equality check with other.
2197
2197
  Note about broadcast default value: Consider some key which is neither in self nor in other.
@@ -2202,8 +2202,8 @@ class NumbersDict:
2202
2202
  You can control the behavior via result_with_default.
2203
2203
 
2204
2204
  :param NumbersDict|T other:
2205
- :param bool result_with_default:
2206
- :rtype: NumbersDict
2205
+ :param result_with_default:
2206
+ :return: new NumbersDict with bool values
2207
2207
  """
2208
2208
 
2209
2209
  def op(a, b):
@@ -2223,19 +2223,17 @@ class NumbersDict:
2223
2223
  res.value = None
2224
2224
  return res
2225
2225
 
2226
- def __eq__(self, other):
2226
+ def __eq__(self, other) -> bool:
2227
2227
  """
2228
2228
  :param NumbersDict|T other:
2229
2229
  :return: whether self == other elemwise. see self.elem_eq
2230
- :rtype: bool
2231
2230
  """
2232
2231
  return all(self.elem_eq(other).values())
2233
2232
 
2234
- def __ne__(self, other):
2233
+ def __ne__(self, other) -> bool:
2235
2234
  """
2236
2235
  :param NumbersDict|T other:
2237
2236
  :return: not (self == other)
2238
- :rtype: bool
2239
2237
  """
2240
2238
  return not (self == other)
2241
2239
 
@@ -2244,11 +2242,10 @@ class NumbersDict:
2244
2242
  # and it would just confuse.
2245
2243
  raise Exception("%s.__cmp__ is undefined" % self.__class__.__name__)
2246
2244
 
2247
- def any_compare(self, other, cmp):
2245
+ def any_compare(self, other, cmp) -> bool:
2248
2246
  """
2249
2247
  :param NumbersDict other:
2250
2248
  :param ((object,object)->True) cmp:
2251
- :rtype: True
2252
2249
  """
2253
2250
  for key in self.keys():
2254
2251
  if key in other.keys():
@@ -2281,11 +2278,11 @@ class NumbersDict:
2281
2278
  return min(*args)
2282
2279
 
2283
2280
  @classmethod
2284
- def max(cls, items):
2281
+ def max(cls, items) -> NumbersDict:
2285
2282
  """
2286
2283
  Element-wise maximum for item in items.
2284
+
2287
2285
  :param list[NumbersDict|int|float] items:
2288
- :rtype: NumbersDict
2289
2286
  """
2290
2287
  assert items
2291
2288
  if len(items) == 1:
@@ -2295,11 +2292,10 @@ class NumbersDict:
2295
2292
  return cls.max([items[0], cls.max(items[1:])])
2296
2293
 
2297
2294
  @classmethod
2298
- def min(cls, items):
2295
+ def min(cls, items) -> NumbersDict:
2299
2296
  """
2300
2297
  Element-wise minimum for item in items.
2301
2298
  :param list[NumbersDict|int|float] items:
2302
- :rtype: NumbersDict
2303
2299
  """
2304
2300
  assert items
2305
2301
  if len(items) == 1:
@@ -2325,7 +2321,7 @@ class NumbersDict:
2325
2321
  """
2326
2322
  return min(self.values())
2327
2323
 
2328
- def __repr__(self):
2324
+ def __repr__(self) -> str:
2329
2325
  if self.value is None and not self.dict:
2330
2326
  return "%s()" % self.__class__.__name__
2331
2327
  if self.value is None and self.dict: