returnn 1.20241026.3853__tar.gz → 1.20241105.131828__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (469) hide show
  1. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/PKG-INFO +1 -1
  2. returnn-1.20241105.131828/_setup_info_generated.py +2 -0
  3. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/multi_proc.py +41 -8
  4. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/data/pipeline.py +64 -13
  5. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/data/returnn_dataset_wrapper.py +4 -0
  6. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/engine.py +50 -6
  7. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/basic.py +6 -4
  8. returnn-1.20241105.131828/returnn/util/math.py +87 -0
  9. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn.egg-info/PKG-INFO +1 -1
  10. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_Util.py +30 -0
  11. returnn-1.20241026.3853/_setup_info_generated.py +0 -2
  12. returnn-1.20241026.3853/returnn/util/math.py +0 -34
  13. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/.editorconfig +0 -0
  14. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/.gitignore +0 -0
  15. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/.gitmodules +0 -0
  16. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/.kateconfig +0 -0
  17. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/CHANGELOG.md +0 -0
  18. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/CODEOWNERS +0 -0
  19. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/CONTRIBUTING.md +0 -0
  20. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/LICENSE +0 -0
  21. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/MANIFEST.in +0 -0
  22. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/README.rst +0 -0
  23. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/__init__.py +0 -0
  24. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/12AX.cluster_map +0 -0
  25. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/_setup_returnn_env.py +0 -0
  26. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-fwd.config +0 -0
  27. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-horovod-mpi.py +0 -0
  28. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-horovod-mpi.py.sh +0 -0
  29. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-horovod-mpi.sh +0 -0
  30. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-hyper-param-tuning.config +0 -0
  31. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-iter-dataset.py +0 -0
  32. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-list-devices.py +0 -0
  33. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-lua-torch-layer.config +0 -0
  34. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-pretrain.config +0 -0
  35. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-record-and-push-to-webserver.py +0 -0
  36. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-returnn-as-framework.py +0 -0
  37. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-rf-pt-benchmark.py +0 -0
  38. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-rf.config +0 -0
  39. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-rhn-enwik8.config +0 -0
  40. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-sprint-interface.py +0 -0
  41. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-att-copy.config +0 -0
  42. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-attention.config +0 -0
  43. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  44. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  45. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-enc-dec.config +0 -0
  46. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-hard-att-copy.config +0 -0
  47. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-lstm-benchmark.py +0 -0
  48. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  49. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  50. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-native-lstm.12ax.config +0 -0
  51. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  52. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  53. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  54. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  55. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  56. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-rec-self-att.config +0 -0
  57. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-search-compiled-graph.py +0 -0
  58. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  59. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-timit-lstm-ctc.config +0 -0
  60. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-torch.config +0 -0
  61. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  62. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/demo.sh +0 -0
  63. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  64. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  65. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  66. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/README.txt +0 -0
  67. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/chars.txt +0 -0
  68. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/config_demo +0 -0
  69. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/config_fwd +0 -0
  70. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/config_real +0 -0
  71. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  72. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/decode.py +0 -0
  73. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  74. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/go.sh +0 -0
  75. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/lines.txt +0 -0
  76. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/split/eval.txt +0 -0
  77. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/split/train.txt +0 -0
  78. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/IAM/split/valid.txt +0 -0
  79. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/README.md +0 -0
  80. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  81. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/artificial/forwardconfig +0 -0
  82. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/artificial/go.sh +0 -0
  83. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/artificial/trainconfig +0 -0
  84. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  85. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  86. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  87. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  88. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/pyproject.toml +0 -0
  89. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/requirements.txt +0 -0
  90. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/__init__.py +0 -0
  91. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/__main__.py +0 -0
  92. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/__old_mod_loader__.py +0 -0
  93. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/__setup__.py +0 -0
  94. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/config.py +0 -0
  95. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/__init__.py +0 -0
  96. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/audio.py +0 -0
  97. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/basic.py +0 -0
  98. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/bundle_file.py +0 -0
  99. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/cached.py +0 -0
  100. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/cached2.py +0 -0
  101. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/distrib_files.py +0 -0
  102. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/generating.py +0 -0
  103. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/hdf.py +0 -0
  104. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/lm.py +0 -0
  105. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/map.py +0 -0
  106. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/meta.py +0 -0
  107. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/normalization_data.py +0 -0
  108. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/numpy_dump.py +0 -0
  109. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/postprocessing.py +0 -0
  110. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/raw_wav.py +0 -0
  111. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/sprint.py +0 -0
  112. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/stereo.py +0 -0
  113. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/util/__init__.py +0 -0
  114. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/util/feature_extraction.py +0 -0
  115. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/util/strings.py +0 -0
  116. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/datasets/util/vocabulary.py +0 -0
  117. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/engine/__init__.py +0 -0
  118. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/engine/base.py +0 -0
  119. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/engine/batch.py +0 -0
  120. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/__init__.py +0 -0
  121. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/__main__.py +0 -0
  122. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  123. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  124. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  125. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  126. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  127. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  128. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  129. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  130. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  131. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  132. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  133. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  134. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  135. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  136. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  137. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  138. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  139. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  140. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  141. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  142. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  143. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  144. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  145. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  146. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  147. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/__init__.py +0 -0
  148. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/graph_editor/README.md +0 -0
  149. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/graph_editor/__init__.py +0 -0
  150. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/graph_editor/edit.py +0 -0
  151. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/graph_editor/reroute.py +0 -0
  152. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/graph_editor/select.py +0 -0
  153. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/graph_editor/subgraph.py +0 -0
  154. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/graph_editor/transform.py +0 -0
  155. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/extern/graph_editor/util.py +0 -0
  156. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/forward_iface.py +0 -0
  157. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/__init__.py +0 -0
  158. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_backend.py +0 -0
  159. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_native/__init__.py +0 -0
  160. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_native/backend.cpp +0 -0
  161. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_native/backend.hpp +0 -0
  162. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_native/module.cpp +0 -0
  163. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_native/module.hpp +0 -0
  164. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_native/py_utils.hpp +0 -0
  165. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  166. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  167. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_numpy_backend.py +0 -0
  168. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_random_journal.py +0 -0
  169. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/_utils.py +0 -0
  170. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/array_.py +0 -0
  171. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/attention.py +0 -0
  172. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/audio/__init__.py +0 -0
  173. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/audio/mel.py +0 -0
  174. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/audio/specaugment.py +0 -0
  175. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/backend.py +0 -0
  176. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/build_from_dict.py +0 -0
  177. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/cond.py +0 -0
  178. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/const.py +0 -0
  179. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/container.py +0 -0
  180. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/control_flow_ctx.py +0 -0
  181. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/conv.py +0 -0
  182. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/conversions/__init__.py +0 -0
  183. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  184. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/conversions/hf_llama.py +0 -0
  185. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/conversions/torch_nn.py +0 -0
  186. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/decoder/__init__.py +0 -0
  187. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/decoder/transformer.py +0 -0
  188. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/device.py +0 -0
  189. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/dims.py +0 -0
  190. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/dropout.py +0 -0
  191. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/dtype.py +0 -0
  192. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/encoder/__init__.py +0 -0
  193. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/encoder/base.py +0 -0
  194. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/encoder/conformer.py +0 -0
  195. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/encoder/e_branchformer.py +0 -0
  196. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/encoder/transformer.py +0 -0
  197. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/gradient.py +0 -0
  198. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/graph.py +0 -0
  199. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/hooks.py +0 -0
  200. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/init.py +0 -0
  201. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/label_smoothing.py +0 -0
  202. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/linear.py +0 -0
  203. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/loop.py +0 -0
  204. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/loss.py +0 -0
  205. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/math_.py +0 -0
  206. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/matmul.py +0 -0
  207. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/module.py +0 -0
  208. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/normalization.py +0 -0
  209. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/parameter.py +0 -0
  210. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/parametrizations.py +0 -0
  211. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/parametrize.py +0 -0
  212. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/piecewise_linear.py +0 -0
  213. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/rand.py +0 -0
  214. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/rec.py +0 -0
  215. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/reduce.py +0 -0
  216. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/run_ctx.py +0 -0
  217. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/signal.py +0 -0
  218. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/state.py +0 -0
  219. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/stepwise_scheduler.py +0 -0
  220. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/tensor_array.py +0 -0
  221. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/frontend/types.py +0 -0
  222. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/import_/__init__.py +0 -0
  223. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/import_/common.py +0 -0
  224. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/import_/git.py +0 -0
  225. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/import_/import_.py +0 -0
  226. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/learning_rate_control.py +0 -0
  227. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/log.py +0 -0
  228. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/native_op.cpp +0 -0
  229. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/native_op.py +0 -0
  230. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/pretrain.py +0 -0
  231. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/sprint/__init__.py +0 -0
  232. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/sprint/cache.py +0 -0
  233. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/sprint/control.py +0 -0
  234. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/sprint/error_signals.py +0 -0
  235. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/sprint/extern_interface.py +0 -0
  236. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/sprint/interface.py +0 -0
  237. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/README.md +0 -0
  238. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/__init__.py +0 -0
  239. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/_dim_extra.py +0 -0
  240. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/_tensor_extra.py +0 -0
  241. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/_tensor_mixin_base.py +0 -0
  242. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/_tensor_op_overloads.py +0 -0
  243. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/control_flow_ctx.py +0 -0
  244. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/dim.py +0 -0
  245. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/marked_dim.py +0 -0
  246. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/tensor.py +0 -0
  247. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/tensor_dict.py +0 -0
  248. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tensor/utils.py +0 -0
  249. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/__init__.py +0 -0
  250. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/compat.py +0 -0
  251. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/data_pipeline.py +0 -0
  252. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/distributed.py +0 -0
  253. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/engine.py +0 -0
  254. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/README.md +0 -0
  255. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/__init__.py +0 -0
  256. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/_backend.py +0 -0
  257. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/_utils.py +0 -0
  258. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/cond.py +0 -0
  259. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  260. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  261. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/dims.py +0 -0
  262. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/layer.py +0 -0
  263. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/loop.py +0 -0
  264. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/make_layer.py +0 -0
  265. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  266. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  267. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  268. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_low_level/__init__.py +0 -0
  269. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/frontend_low_level/_backend.py +0 -0
  270. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/horovod.py +0 -0
  271. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/hyper_param_tuning.py +0 -0
  272. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/layers/__init__.py +0 -0
  273. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/layers/base.py +0 -0
  274. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/layers/basic.py +0 -0
  275. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/layers/rec.py +0 -0
  276. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/layers/segmental_model.py +0 -0
  277. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/layers/signal_processing.py +0 -0
  278. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/layers/variable.py +0 -0
  279. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/native_op.py +0 -0
  280. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/network.py +0 -0
  281. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/sprint.py +0 -0
  282. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/updater.py +0 -0
  283. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/util/__init__.py +0 -0
  284. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/util/basic.py +0 -0
  285. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/util/data.py +0 -0
  286. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/util/gradient_checkpoint.py +0 -0
  287. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/util/ken_lm.py +0 -0
  288. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/tf/util/open_fst.py +0 -0
  289. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/README.md +0 -0
  290. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/__init__.py +0 -0
  291. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/data/__init__.py +0 -0
  292. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/data/extern_data.py +0 -0
  293. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/data/queued_data_iter.py +0 -0
  294. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/data/tensor_utils.py +0 -0
  295. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/distributed.py +0 -0
  296. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/frontend/__init__.py +0 -0
  297. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/frontend/_backend.py +0 -0
  298. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/frontend/_rand.py +0 -0
  299. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/frontend/bridge.py +0 -0
  300. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/frontend/raw_ops.py +0 -0
  301. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/optim/README.md +0 -0
  302. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/optim/__init__.py +0 -0
  303. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/optim/lion.py +0 -0
  304. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/updater.py +0 -0
  305. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/util/README.md +0 -0
  306. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/util/__init__.py +0 -0
  307. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/util/array_.py +0 -0
  308. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/util/diagnose_gpu.py +0 -0
  309. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/util/exception_helper.py +0 -0
  310. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/util/gradient_checkpoint.py +0 -0
  311. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/util/module.py +0 -0
  312. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/torch/util/scaled_gradient.py +0 -0
  313. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/__init__.py +0 -0
  314. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/better_exchook.py +0 -0
  315. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/bpe.py +0 -0
  316. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/debug.py +0 -0
  317. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/debug_helpers.py +0 -0
  318. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/file_cache.py +0 -0
  319. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/fsa.py +0 -0
  320. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/literal_py_to_pickle.py +0 -0
  321. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  322. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/native_code_compiler.py +0 -0
  323. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/pprint.py +0 -0
  324. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/py-to-pickle.cpp +0 -0
  325. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/py_compat.py +0 -0
  326. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/py_ext_mod_compiler.py +0 -0
  327. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/result_with_reason.py +0 -0
  328. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/sig_proc.py +0 -0
  329. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/task_system.py +0 -0
  330. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/train_proc_manager.py +0 -0
  331. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn/util/watch_memory.py +0 -0
  332. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn.egg-info/SOURCES.txt +0 -0
  333. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn.egg-info/dependency_links.txt +0 -0
  334. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/returnn.egg-info/top_level.txt +0 -0
  335. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/rnn.py +0 -0
  336. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/setup.cfg +0 -0
  337. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/setup.py +0 -0
  338. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/DummySprintExec.py +0 -0
  339. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm-inspection-profile.xml +0 -0
  340. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/.gitignore +0 -0
  341. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/.name +0 -0
  342. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  343. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  344. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  345. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  346. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  347. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/misc.xml +0 -0
  348. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/modules.xml +0 -0
  349. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/returnn.iml +0 -0
  350. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  351. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/_set_num_threads1.py +0 -0
  352. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/_setup_returnn_env.py +0 -0
  353. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/_setup_test_env.py +0 -0
  354. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/bpe-unicode-demo.codes +0 -0
  355. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/bpe-unicode-demo.vocab +0 -0
  356. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/lexicon_opt.fst +0 -0
  357. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/lexicon_opt.isyms +0 -0
  358. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/lexicon_opt.jpg +0 -0
  359. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/lexicon_opt.osyms +0 -0
  360. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/lint_common.py +0 -0
  361. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/pycharm-inspect.py +0 -0
  362. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/pylint.py +0 -0
  363. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/returnn-as-framework.py +0 -0
  364. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/rf_utils.py +0 -0
  365. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/spelling.dic +0 -0
  366. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_Config.py +0 -0
  367. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_Dataset.py +0 -0
  368. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_Fsa.py +0 -0
  369. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_GeneratingDataset.py +0 -0
  370. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_HDFDataset.py +0 -0
  371. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_LearningRateControl.py +0 -0
  372. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_Log.py +0 -0
  373. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_MultiProcDataset.py +0 -0
  374. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_Pretrain.py +0 -0
  375. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_ResNet.py +0 -0
  376. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_SprintDataset.py +0 -0
  377. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_SprintInterface.py +0 -0
  378. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TFEngine.py +0 -0
  379. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TFNativeOp.py +0 -0
  380. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TFNetworkLayer.py +0 -0
  381. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TFNetworkRecLayer.py +0 -0
  382. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TFNetworkSigProcLayer.py +0 -0
  383. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TFUpdater.py +0 -0
  384. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TFUtil.py +0 -0
  385. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TF_determinism.py +0 -0
  386. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TaskSystem.py +0 -0
  387. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TaskSystem_SharedMem.py +0 -0
  388. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_TranslationDataset.py +0 -0
  389. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_demos.py +0 -0
  390. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_fork_exec.py +0 -0
  391. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_hdf_dump.py +0 -0
  392. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_array.py +0 -0
  393. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_attention.py +0 -0
  394. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_base.py +0 -0
  395. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_cond.py +0 -0
  396. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_const.py +0 -0
  397. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_container.py +0 -0
  398. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_conv.py +0 -0
  399. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_decoder_transformer.py +0 -0
  400. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_encoder_conformer.py +0 -0
  401. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_gradient.py +0 -0
  402. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_label_smoothing.py +0 -0
  403. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_loop.py +0 -0
  404. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_math.py +0 -0
  405. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_normalization.py +0 -0
  406. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_piecewise_linear.py +0 -0
  407. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_rec.py +0 -0
  408. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_reduce.py +0 -0
  409. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_rf_signal.py +0 -0
  410. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_tensor.py +0 -0
  411. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_tools.py +0 -0
  412. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_torch_dataset.py +0 -0
  413. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_torch_engine.py +0 -0
  414. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_torch_frontend.py +0 -0
  415. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_torch_internal_frontend.py +0 -0
  416. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/test_torch_util.py +0 -0
  417. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tests/torch_utils.py +0 -0
  418. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/_setup_returnn_env.py +0 -0
  419. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/analyze-dataset-batches.py +0 -0
  420. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/bliss-collect-seq-lens.py +0 -0
  421. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/bliss-dump-text.py +0 -0
  422. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/bliss-get-segment-names.py +0 -0
  423. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/bliss-to-ogg-zip.py +0 -0
  424. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/bpe-create-lexicon.py +0 -0
  425. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/calculate-word-error-rate.py +0 -0
  426. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/cleanup-old-models.py +0 -0
  427. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/collect-orth-symbols.py +0 -0
  428. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/collect-words.py +0 -0
  429. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/compile_native_op.py +0 -0
  430. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/compile_tf_graph.py +0 -0
  431. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/debug-dump-search-scores.py +0 -0
  432. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/debug-plot-search-scores.py +0 -0
  433. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/dump-dataset-raw-strings.py +0 -0
  434. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/dump-dataset.py +0 -0
  435. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/dump-forward-stats.py +0 -0
  436. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/dump-forward.py +0 -0
  437. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/dump-network-json.py +0 -0
  438. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/dump-pickle.py +0 -0
  439. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/extract_state_tying_from_dataset.py +0 -0
  440. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/get-attention-weights.py +0 -0
  441. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/get-best-model-epoch.py +0 -0
  442. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/hdf_dump.py +0 -0
  443. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/hdf_dump_translation_dataset.py +0 -0
  444. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/import-blocks-mt-model.py +0 -0
  445. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/import-t2t-mt-model.py +0 -0
  446. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/.gitignore +0 -0
  447. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/Makefile +0 -0
  448. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/README.md +0 -0
  449. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/example/README.md +0 -0
  450. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/example/libs_list +0 -0
  451. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  452. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  453. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  454. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/example/state_vars_list +0 -0
  455. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  456. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/file.h +0 -0
  457. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  458. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  459. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/main.cc +0 -0
  460. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/rescorer.h +0 -0
  461. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/vocabulary.cc +0 -0
  462. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/lattice_rescorer/vocabulary.h +0 -0
  463. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/tf_avg_checkpoints.py +0 -0
  464. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/tf_inspect_checkpoint.py +0 -0
  465. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/tf_inspect_summary_log.py +0 -0
  466. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/torch_avg_checkpoints.py +0 -0
  467. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/torch_export_to_onnx.py +0 -0
  468. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/torch_inspect_checkpoint.py +0 -0
  469. {returnn-1.20241026.3853 → returnn-1.20241105.131828}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241026.3853
3
+ Version: 1.20241105.131828
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20241105.131828'
2
+ long_version = '1.20241105.131828+git.0494bcf'
@@ -68,6 +68,7 @@ class MultiProcDataset(CachedDataset2):
68
68
  self._data_keys = None
69
69
  self._num_seqs = None
70
70
  self._total_num_seqs = None
71
+ self._all_tags = None
71
72
 
72
73
  self._worker_parent_conns = None # type: Optional[List[mpConnection]]
73
74
  self._seq_order_proc_parent_conn = None # type: Optional[mpConnection]
@@ -96,8 +97,9 @@ class MultiProcDataset(CachedDataset2):
96
97
  return {
97
98
  "num_inputs": self.num_inputs,
98
99
  "num_outputs": self.num_outputs,
99
- "total_num_seqs": self._total_num_seqs,
100
100
  "labels": self.labels,
101
+ "total_num_seqs": self._total_num_seqs,
102
+ "all_tags": self._all_tags,
101
103
  }
102
104
 
103
105
  def _lazy_init(self):
@@ -172,8 +174,6 @@ class MultiProcDataset(CachedDataset2):
172
174
  assert msg == "num_inputs"
173
175
  msg, self.num_outputs = self._seq_order_proc_parent_conn.recv()
174
176
  assert msg == "num_outputs"
175
- msg, self._total_num_seqs = self._seq_order_proc_parent_conn.recv()
176
- assert msg == "total_num_seqs"
177
177
  msg, self.labels = self._seq_order_proc_parent_conn.recv()
178
178
  assert msg == "labels"
179
179
 
@@ -281,12 +281,23 @@ class MultiProcDataset(CachedDataset2):
281
281
  dataset = init_dataset(dataset_dict)
282
282
  parent_conn.send(("num_inputs", dataset.num_inputs))
283
283
  parent_conn.send(("num_outputs", dataset.num_outputs))
284
+ parent_conn.send(("labels", dataset.labels))
285
+ elif msg == "get_total_num_seqs":
286
+ assert dataset
284
287
  try:
285
288
  total_num_seqs = dataset.get_total_num_seqs()
286
- except NotImplementedError:
287
- total_num_seqs = None
289
+ assert isinstance(total_num_seqs, int)
290
+ except NotImplementedError as exc:
291
+ total_num_seqs = NotImplementedError(f"{exc} in {dataset}")
288
292
  parent_conn.send(("total_num_seqs", total_num_seqs))
289
- parent_conn.send(("labels", dataset.labels))
293
+ elif msg == "get_all_tags":
294
+ assert dataset
295
+ try:
296
+ all_tags = dataset.get_all_tags()
297
+ assert isinstance(all_tags, list)
298
+ except NotImplementedError as exc:
299
+ all_tags = NotImplementedError(f"{exc} in {dataset}")
300
+ parent_conn.send(("all_tags", all_tags))
290
301
  elif msg == "init_seq_order":
291
302
  if dataset is None:
292
303
  dataset = init_dataset(dataset_dict)
@@ -400,9 +411,31 @@ class MultiProcDataset(CachedDataset2):
400
411
 
401
412
  def get_total_num_seqs(self, *, fast: bool = False) -> int:
402
413
  """total num seqs"""
403
- if self._total_num_seqs is not None:
414
+ if self._total_num_seqs is None:
415
+ worker = self._seq_order_proc_parent_conn
416
+ worker.send(("get_total_num_seqs", {}))
417
+ msg, self._total_num_seqs = worker.recv()
418
+ assert msg == "total_num_seqs" and self._total_num_seqs is not None
419
+ if isinstance(self._total_num_seqs, int):
404
420
  return self._total_num_seqs
405
- raise NotImplementedError
421
+ elif isinstance(self._total_num_seqs, Exception):
422
+ raise self._total_num_seqs
423
+ else:
424
+ raise TypeError(f"invalid type {type(self._total_num_seqs)} for total_num_seqs")
425
+
426
+ def get_all_tags(self):
427
+ """all tags"""
428
+ if self._all_tags is None:
429
+ worker = self._seq_order_proc_parent_conn
430
+ worker.send(("get_all_tags", {}))
431
+ msg, self._all_tags = worker.recv()
432
+ assert msg == "all_tags" and self._all_tags is not None
433
+ if isinstance(self._all_tags, list):
434
+ return self._all_tags
435
+ elif isinstance(self._all_tags, Exception):
436
+ raise self._all_tags
437
+ else:
438
+ raise TypeError(f"invalid type {type(self._all_tags)} for all_tags")
406
439
 
407
440
  def finish_epoch(self, *, free_resources: bool = False):
408
441
  """finish epoch"""
@@ -28,7 +28,7 @@ import numpy
28
28
  import torch
29
29
  import torch.utils.data
30
30
 
31
- from returnn.util.basic import NumbersDict
31
+ from returnn.util.basic import NumbersDict, get_fwd_compat_kwargs
32
32
 
33
33
 
34
34
  def create_tensor(array: numpy.ndarray) -> Union[torch.Tensor, numpy.ndarray]:
@@ -59,7 +59,7 @@ def collate_batch(batch: List[Dict[str, numpy.ndarray]]) -> Dict[str, Union[torc
59
59
 
60
60
  res = {}
61
61
  for key in data_keys:
62
- if key == "num_seqs":
62
+ if key in ("num_seqs", "epoch"):
63
63
  res[key] = batch[0][key] # it should always be the same
64
64
  continue
65
65
  ls = [create_tensor(sample[key]) for sample in batch]
@@ -119,7 +119,7 @@ class ChunkingIterDataPipe(torch.utils.data.IterDataPipe):
119
119
 
120
120
  if not chunking_data_keys:
121
121
  chunking_data_keys = list(data_dict.keys()) # use all if not configured separately
122
- chunking_data_key_black_list = ["seq_tag", "seq_idx", "num_seqs"]
122
+ chunking_data_key_black_list = ["seq_tag", "seq_idx", "num_seqs", "epoch"]
123
123
  for key in chunking_data_key_black_list:
124
124
  if key in chunking_data_keys:
125
125
  chunking_data_keys.remove(key)
@@ -208,20 +208,66 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
208
208
  def __init__(self, dataset: torch.utils.data.IterableDataset, batch_size=1, max_seqs=None):
209
209
  """
210
210
  :param dataset: dataset to apply batching to
211
- :param int|dict[str,int]|None batch_size: Maximum number of time steps (e.g. audio frames / words) in one
212
- batch (padding included).
211
+ :param int|dict[str,int]|None|function batch_size: Maximum number of time steps (e.g. audio frames / words)
212
+ in one batch (padding included).
213
213
  If given as a dict data_key -> value, sets different individual limits per data key.
214
214
  If None, no limit.
215
- :param int|None max_seqs: maximum number of sequences in a batch,
216
- None means unlimited (also -1 to match TF backend)
215
+ Can also be a callable with kwargs epoch, seq_idx, epoch_continuous, **_other_kwargs,
216
+ returning the batch size.
217
+ :param int|None|function max_seqs: maximum number of sequences in a batch,
218
+ None means unlimited (also -1 to match TF backend).
219
+ Can also be a callable with kwargs epoch, seq_idx, epoch_continuous, **_other_kwargs,
220
+ returning the max seqs.
217
221
  """
218
222
  super().__init__()
219
223
  self._dataset = dataset
220
- self._max_batch_size = NumbersDict(sys.maxsize if batch_size is None else batch_size)
221
- self._max_seqs = sys.maxsize if (max_seqs is None or max_seqs == -1) else max_seqs
224
+ self._max_batch_size = self._parse_batch_size(batch_size)
225
+ self._max_seqs = self._parse_max_seqs(max_seqs)
222
226
 
223
- assert self._max_batch_size.min_value() > 0
224
- assert self._max_seqs > 0
227
+ if not callable(self._max_batch_size):
228
+ assert isinstance(self._max_batch_size, NumbersDict) and self._max_batch_size.min_value() > 0
229
+ if not callable(self._max_seqs):
230
+ assert isinstance(self._max_seqs, int) and self._max_seqs > 0
231
+
232
+ @staticmethod
233
+ def _parse_batch_size(
234
+ batch_size: Union[int, Dict[str, int], NumbersDict, None, Callable],
235
+ *,
236
+ data_dict: Optional[Dict[str, Any]] = None,
237
+ ) -> Union[NumbersDict, Callable]:
238
+ """
239
+ :param batch_size: see __init__()
240
+ :return: batch_size
241
+ """
242
+ if callable(batch_size):
243
+ if data_dict:
244
+ batch_size = batch_size(**BatchingIterDataPipe._get_user_func_kwargs_from_data_dict(data_dict))
245
+ else:
246
+ return batch_size
247
+ return NumbersDict(sys.maxsize if batch_size is None else batch_size)
248
+
249
+ @staticmethod
250
+ def _parse_max_seqs(
251
+ max_seqs: Union[int, None, Callable], *, data_dict: Optional[Dict[str, Any]] = None
252
+ ) -> Union[int, Callable]:
253
+ """
254
+ :param max_seqs: see __init__()
255
+ :return: max_seqs
256
+ """
257
+ if callable(max_seqs):
258
+ if data_dict:
259
+ max_seqs = max_seqs(**BatchingIterDataPipe._get_user_func_kwargs_from_data_dict(data_dict))
260
+ else:
261
+ return max_seqs
262
+ return sys.maxsize if (max_seqs is None or max_seqs == -1) else max_seqs
263
+
264
+ @staticmethod
265
+ def _get_user_func_kwargs_from_data_dict(data_dict: Dict[str, Any]) -> Dict[str, Any]:
266
+ epoch = int(data_dict["epoch"])
267
+ seq_idx = int(data_dict["seq_idx"])
268
+ num_seqs = int(data_dict["num_seqs"]) # >=1 if known, otherwise -1
269
+ epoch_continuous = (epoch - 1 + (seq_idx + 1) / num_seqs) if num_seqs > 0 else None
270
+ return {"epoch": epoch, "seq_idx": seq_idx, "epoch_continuous": epoch_continuous, **get_fwd_compat_kwargs()}
225
271
 
226
272
  def __iter__(self):
227
273
  """
@@ -233,7 +279,12 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
233
279
  current_max_sequence_lengths = NumbersDict(0) # data_key -> length of longest sequence in current batch
234
280
 
235
281
  for data_dict in self._dataset:
236
- if len(current_batch) == self._max_seqs:
282
+ max_seqs = self._parse_max_seqs(self._max_seqs, data_dict=data_dict)
283
+ max_batch_size = self._parse_batch_size(self._max_batch_size, data_dict=data_dict)
284
+ assert isinstance(max_seqs, int) and max_seqs > 0
285
+ assert isinstance(max_batch_size, NumbersDict) and max_batch_size.min_value() > 0
286
+
287
+ if len(current_batch) >= max_seqs:
237
288
  yield current_batch
238
289
  current_batch = []
239
290
  current_max_sequence_lengths = NumbersDict(0)
@@ -246,7 +297,7 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
246
297
  max_sequence_lengths_if_included = NumbersDict.max([current_max_sequence_lengths, sequence_lengths])
247
298
  batch_size_if_included = max_sequence_lengths_if_included * (len(current_batch) + 1) # including padding
248
299
 
249
- if current_batch and batch_size_if_included.any_compare(self._max_batch_size, (lambda a, b: a > b)):
300
+ if current_batch and batch_size_if_included.any_compare(max_batch_size, (lambda a, b: a > b)):
250
301
  yield current_batch
251
302
  current_batch = [data_dict]
252
303
  current_max_sequence_lengths = sequence_lengths
@@ -81,6 +81,8 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
81
81
  except Exception: # might not work for all datasets
82
82
  num_seqs = -1
83
83
  num_seqs = numpy.array(num_seqs)
84
+ assert self._dataset.epoch is not None
85
+ epoch = numpy.array(self._dataset.epoch)
84
86
 
85
87
  try:
86
88
  data_keys = self._dataset.get_data_keys()
@@ -94,6 +96,8 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
94
96
  # It's slightly redundant to have num_seqs in each entry,
95
97
  # but it's difficult to pass this back to the main proc otherwise.
96
98
  data["num_seqs"] = num_seqs
99
+ # epoch is also redundant, but that's the cleanest/simplest way to pass it on to BatchingIterDataPipe.
100
+ data["epoch"] = epoch
97
101
  yield data
98
102
  seq_index += 1
99
103
 
@@ -34,6 +34,7 @@ from returnn.util import NumbersDict
34
34
  from returnn.util.basic import hms, NotSpecified
35
35
  from returnn.util.result_with_reason import ResultWithReason
36
36
  from returnn.util.debug import debug_shell
37
+ from returnn.util.math import simplify_and_format_number
37
38
  from returnn.forward_iface import ForwardCallbackIface
38
39
 
39
40
  from .updater import Updater
@@ -125,6 +126,7 @@ class Engine(EngineBase):
125
126
  self._log_memory_usage = config.bool("torch_log_memory_usage", False)
126
127
  self._log_batch_size = config.bool("log_batch_size", False) and log.verbose[5]
127
128
  self._calculate_exp_loss = config.bool("calculate_exp_loss", False)
129
+ self._log_grad_norm = _parse_log_grad_norm(config)
128
130
  self._reset_dev_memory_caches = config.bool("reset_dev_memory_caches", False)
129
131
  self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
130
132
  self._stop_on_nonfinite_train_score = config.bool("stop_on_nonfinite_train_score", True)
@@ -429,6 +431,12 @@ class Engine(EngineBase):
429
431
  else:
430
432
  total_loss.raw_tensor.backward()
431
433
 
434
+ if self._log_grad_norm and perform_update_step:
435
+ key = f"grad_norm:p{simplify_and_format_number(self._log_grad_norm)}"
436
+ assert key not in losses_dict
437
+ inv_norm_factors_dict[key] = 1.0 # once per update step
438
+ losses_dict[key] = _get_total_grad_norm(self._pt_model, p=self._log_grad_norm)
439
+
432
440
  # only update the weights when every gradient accumulation loop ends
433
441
  if perform_update_step:
434
442
  self._updater.step(grad_scaler=self._grad_scaler)
@@ -684,7 +692,7 @@ class Engine(EngineBase):
684
692
  batch_size = self.config.typed_value("batch_size", -1)
685
693
  batch_size = self.config.typed_value(f"batch_size_{'train' if train else 'dev'}", batch_size)
686
694
  assert batch_size != -1, f"batch_size or batch_size_{'train' if train else 'dev'} not defined in config"
687
- max_seqs = self.config.int("max_seqs", -1)
695
+ max_seqs = self.config.typed_value("max_seqs", -1)
688
696
  batches_dataset = data_pipeline.BatchingIterDataPipe(wrapped_dataset, batch_size=batch_size, max_seqs=max_seqs)
689
697
 
690
698
  loader_opts = self.config.typed_value("torch_dataloader_opts") or {}
@@ -1286,9 +1294,9 @@ def _print_process(
1286
1294
  if log.verbose[5]: # report every minibatch
1287
1295
  info = [report_prefix, "step %i" % step]
1288
1296
  if eval_info: # Such as score.
1289
- info += ["%s %s" % (k, _format_value(v)) for k, v in eval_info.items()]
1297
+ info += ["%s %s" % (k, _format_score_value(v)) for k, v in eval_info.items()]
1290
1298
  if batch_size_info:
1291
- info += ["%s %s" % (k, _format_value(v)) for k, v in batch_size_info.items()]
1299
+ info += ["%s %s" % (k, _format_score_value(v)) for k, v in batch_size_info.items()]
1292
1300
  if log_memory_usage_device:
1293
1301
  dev = torch.device(log_memory_usage_device)
1294
1302
  if dev.type == "cuda":
@@ -1324,11 +1332,11 @@ def _format_score(score: Dict[str, float]) -> str:
1324
1332
  if not score:
1325
1333
  return "None"
1326
1334
  if len(score) == 1:
1327
- return _format_value(list(score.values())[0])
1328
- return " ".join(["%s %s" % (key.split(":", 2)[-1], _format_value(score[key])) for key in score.keys()])
1335
+ return _format_score_value(list(score.values())[0])
1336
+ return " ".join(["%s %s" % (key.split(":", 2)[-1], _format_score_value(score[key])) for key in score.keys()])
1329
1337
 
1330
1338
 
1331
- def _format_value(v: Any) -> str:
1339
+ def _format_score_value(v: Any) -> str:
1332
1340
  if isinstance(v, float):
1333
1341
  if abs(v) > 1.0e3 or abs(v) < 1.0e-3:
1334
1342
  return f"{v:.3e}"
@@ -1422,3 +1430,39 @@ def _set_torch_default_dtype_ctx_mgr(dtype: torch.dtype):
1422
1430
  yield
1423
1431
  finally:
1424
1432
  torch.set_default_dtype(old_dtype)
1433
+
1434
+
1435
+ def _parse_log_grad_norm(config: Config) -> Optional[Union[int, float]]:
1436
+ log_grad_norm = config.opt_typed_value("log_grad_norm", False)
1437
+ if isinstance(log_grad_norm, str):
1438
+ if log_grad_norm.lower() in ["true", "false", "none"]:
1439
+ log_grad_norm = {"true": True, "false": False, "none": None}[log_grad_norm.lower()]
1440
+ else:
1441
+ raise ValueError(f"Invalid value for log_grad_norm: {log_grad_norm!r}")
1442
+ if log_grad_norm is None:
1443
+ pass
1444
+ elif isinstance(log_grad_norm, bool):
1445
+ if log_grad_norm:
1446
+ log_grad_norm = 2
1447
+ else:
1448
+ log_grad_norm = None
1449
+ elif isinstance(log_grad_norm, (int, float)):
1450
+ assert log_grad_norm > 0, f"log_grad_norm {log_grad_norm} > 0 expected" # otherwise fine...
1451
+ else:
1452
+ raise TypeError(f"Invalid type for log_grad_norm: {log_grad_norm!r} type {type(log_grad_norm)}")
1453
+ return log_grad_norm
1454
+
1455
+
1456
+ def _get_total_grad_norm(model: torch.nn.Module, p: float) -> float:
1457
+ return float(
1458
+ torch.norm(
1459
+ torch.stack(
1460
+ [
1461
+ param.grad.norm(p=p).detach().cpu()
1462
+ for param in model.parameters()
1463
+ if param.requires_grad and param.grad is not None
1464
+ ]
1465
+ ),
1466
+ p=p,
1467
+ ).item()
1468
+ )
@@ -554,12 +554,11 @@ def describe_torch_version() -> str:
554
554
  return "%s (%s in %s)" % (version, git_info, tdir)
555
555
 
556
556
 
557
- def get_tensorflow_version_tuple():
557
+ def get_tensorflow_version_tuple() -> Tuple[int, ...]:
558
558
  """
559
559
  :return: tuple of ints, first entry is the major version
560
- :rtype: tuple[int]
561
560
  """
562
- import tensorflow as tf
561
+ import tensorflow as tf # noqa
563
562
  import re
564
563
 
565
564
  return tuple([int(re.sub("(-rc[0-9]|-dev[0-9]*)", "", s)) for s in tf.__version__.split(".")])
@@ -2031,6 +2030,9 @@ class NumbersDict:
2031
2030
  def __delitem__(self, key):
2032
2031
  del self.dict[key]
2033
2032
 
2033
+ def __contains__(self, item):
2034
+ return item in self.dict
2035
+
2034
2036
  def get(self, key, default=None):
2035
2037
  """
2036
2038
  :param str key:
@@ -2070,7 +2072,7 @@ class NumbersDict:
2070
2072
  def items(self):
2071
2073
  """
2072
2074
  :return: dict items. this excludes self.value
2073
- :rtype: str[(str,object)]
2075
+ :rtype: set[(str,object)]
2074
2076
  """
2075
2077
  return self.dict.items()
2076
2078
 
@@ -0,0 +1,87 @@
1
+ """
2
+ Some mathematical functions, in pure NumPy.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from typing import Union, Optional, Dict
7
+ import numpy
8
+
9
+
10
+ def ceil_div(a: int, b: int) -> int:
11
+ """ceil(a / b)"""
12
+ return -(-a // b)
13
+
14
+
15
+ def next_power_of_two(n: int) -> int:
16
+ """next power of two, >= n"""
17
+ return 2 ** (int(n - 1).bit_length())
18
+
19
+
20
+ class PiecewiseLinear:
21
+ """
22
+ Piecewise linear function.
23
+ (Basically wraps ``numpy.interp``.)
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ values: Dict[Union[int, float], Union[int, float]],
29
+ *,
30
+ kw_name: Optional[str] = None,
31
+ ignore_other_kwargs: bool = False,
32
+ ):
33
+ """
34
+ :param values: dict x -> y. Everything between the x values is linearly interpolated.
35
+ Everything outside is repeated from the nearest x value.
36
+ :param kw_name: keyword argument name to use in the __call__. Other keyword arguments are ignored.
37
+ :param ignore_other_kwargs: if True, ignore other keyword arguments in the __call__.
38
+ """
39
+ self._sorted_items = sorted(values.items())
40
+ self._sorted_keys = numpy.array([x for x, _ in self._sorted_items])
41
+ self._sorted_values = numpy.array([y for _, y in self._sorted_items])
42
+ self._kw_name = kw_name
43
+ self._ignore_other_kwargs = ignore_other_kwargs
44
+
45
+ def __getstate__(self):
46
+ # Note: I was implementing __getnewargs_ex__, but we cannot use this because of this Sisyphus bug:
47
+ # https://github.com/rwth-i6/sisyphus/issues/215
48
+ kwargs = {"values": dict(self._sorted_items)}
49
+ if self._kw_name is not None:
50
+ kwargs["kw_name"] = self._kw_name
51
+ if self._ignore_other_kwargs:
52
+ kwargs["ignore_other_kwargs"] = True
53
+ return kwargs
54
+
55
+ def __setstate__(self, state):
56
+ self.__init__(**state)
57
+
58
+ def __repr__(self) -> str:
59
+ kwargs = self.__getstate__()
60
+ values = kwargs.pop("values")
61
+ all_args_str = ", ".join([repr(values)] + [f"{k}={v!r}" for k, v in kwargs.items()])
62
+ return f"{self.__class__.__name__}({all_args_str})"
63
+
64
+ def __call__(self, *args: Union[int, float], **kwargs) -> Union[int, float]:
65
+ if self._kw_name:
66
+ if args:
67
+ raise TypeError(f"{self}: Expected zero positional arguments, got {args!r}")
68
+ x = kwargs.pop(self._kw_name, None)
69
+ else:
70
+ if len(args) != 1:
71
+ raise TypeError(f"{self}: Expected one positional argument, got {args!r}")
72
+ x = args[0]
73
+ if not self._ignore_other_kwargs:
74
+ if kwargs:
75
+ raise TypeError(f"{self}: Unexpected keyword arguments: {kwargs!r}")
76
+ assert x is not None
77
+ steps = self._sorted_keys
78
+ values = self._sorted_values
79
+ return numpy.interp(x, steps, values)
80
+
81
+
82
+ def simplify_and_format_number(n: Union[int, float]) -> str:
83
+ """Format a number, removing trailing zeros and the decimal point if it is an integer"""
84
+ if isinstance(n, (int, float)):
85
+ return str(n).rstrip("0").rstrip(".")
86
+ else:
87
+ raise TypeError(f"Expected int or float, got {n!r} type {type(n)}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241026.3853
3
+ Version: 1.20241105.131828
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -101,6 +101,7 @@ def test_math_PiecewiseLinear():
101
101
 
102
102
  eps = 1e-5
103
103
  f = PiecewiseLinear({1: 2, 3: 4, 5: 1})
104
+ assert str(f) == "PiecewiseLinear({1: 2, 3: 4, 5: 1})"
104
105
  assert_equal(f(0), 2)
105
106
  assert_equal(f(1 - eps), 2)
106
107
  assert_equal(f(1), 2)
@@ -116,6 +117,35 @@ def test_math_PiecewiseLinear():
116
117
  assert_equal(f(6), 1)
117
118
 
118
119
 
120
+ def test_math_PiecewiseLinear_kwargs():
121
+ from returnn.util.math import PiecewiseLinear
122
+
123
+ f = PiecewiseLinear({1: 2, 3: 4, 5: 1}, kw_name="epoch_continuous")
124
+ try:
125
+ f(0)
126
+ except TypeError:
127
+ pass # this is expected
128
+ else:
129
+ assert False, "TypeError expected (wrong args)"
130
+ assert f(epoch_continuous=0) == 2
131
+ try:
132
+ f(epoch_continuous=0, seq_idx=123)
133
+ except TypeError:
134
+ pass # this is expected
135
+ else:
136
+ assert False, "TypeError expected (wrong args)"
137
+
138
+ f = PiecewiseLinear({1: 2, 3: 4, 5: 1}, kw_name="epoch_continuous", ignore_other_kwargs=True)
139
+ try:
140
+ f(0)
141
+ except TypeError:
142
+ pass # this is expected
143
+ else:
144
+ assert False, "TypeError expected (wrong args)"
145
+ assert f(epoch_continuous=0) == 2
146
+ assert f(epoch_continuous=0, seq_idx=123) == 2
147
+
148
+
119
149
  def test_parse_orthography_into_symbols():
120
150
  assert_equal(list("hi"), parse_orthography_into_symbols("hi"))
121
151
  assert_equal(list(" hello "), parse_orthography_into_symbols(" hello "))
@@ -1,2 +0,0 @@
1
- version = '1.20241026.003853'
2
- long_version = '1.20241026.003853+git.98e9755'
@@ -1,34 +0,0 @@
1
- """
2
- Some mathematical functions, in pure NumPy.
3
- """
4
-
5
- from __future__ import annotations
6
- from typing import Union, Dict
7
- import numpy
8
-
9
-
10
- def ceil_div(a: int, b: int) -> int:
11
- """ceil(a / b)"""
12
- return -(-a // b)
13
-
14
-
15
- def next_power_of_two(n: int) -> int:
16
- """next power of two, >= n"""
17
- return 2 ** (int(n - 1).bit_length())
18
-
19
-
20
- class PiecewiseLinear:
21
- """
22
- Piecewise linear function.
23
- """
24
-
25
- def __init__(self, values: Dict[Union[int, float], Union[int, float]]):
26
- self._sorted_items = sorted(values.items())
27
- self._sorted_keys = numpy.array([x for x, _ in self._sorted_items])
28
- self._sorted_values = numpy.array([y for _, y in self._sorted_items])
29
-
30
- def __call__(self, x: Union[int, float]) -> Union[int, float]:
31
- assert x is not None
32
- steps = self._sorted_keys
33
- values = self._sorted_values
34
- return numpy.interp(x, steps, values)