returnn 1.20240830.140746__tar.gz → 1.20240905.105440__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (464) hide show
  1. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/PKG-INFO +1 -1
  2. returnn-1.20240905.105440/_setup_info_generated.py +2 -0
  3. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/distrib_files.py +26 -5
  4. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_numpy_backend.py +15 -1
  5. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/_dim_extra.py +124 -2
  6. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/config_entry_points.py +3 -0
  7. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/data.py +1 -1
  8. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/engine.py +51 -6
  9. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/bridge.py +10 -7
  10. returnn-1.20240905.105440/returnn/torch/util/module.py +43 -0
  11. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn.egg-info/PKG-INFO +1 -1
  12. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn.egg-info/SOURCES.txt +1 -0
  13. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/rf_utils.py +4 -0
  14. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_array.py +1 -1
  15. returnn-1.20240905.105440/tests/test_rf_decoder_transformer.py +324 -0
  16. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_frontend.py +23 -0
  17. returnn-1.20240830.140746/_setup_info_generated.py +0 -2
  18. returnn-1.20240830.140746/tests/test_rf_decoder_transformer.py +0 -163
  19. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/.editorconfig +0 -0
  20. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/.gitignore +0 -0
  21. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/.gitmodules +0 -0
  22. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/.kateconfig +0 -0
  23. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/CHANGELOG.md +0 -0
  24. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/CODEOWNERS +0 -0
  25. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/CONTRIBUTING.md +0 -0
  26. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/LICENSE +0 -0
  27. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/MANIFEST.in +0 -0
  28. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/README.rst +0 -0
  29. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/__init__.py +0 -0
  30. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/12AX.cluster_map +0 -0
  31. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/_setup_returnn_env.py +0 -0
  32. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-fwd.config +0 -0
  33. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-horovod-mpi.py +0 -0
  34. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-horovod-mpi.py.sh +0 -0
  35. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-horovod-mpi.sh +0 -0
  36. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-hyper-param-tuning.config +0 -0
  37. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-iter-dataset.py +0 -0
  38. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-list-devices.py +0 -0
  39. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-lua-torch-layer.config +0 -0
  40. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-pretrain.config +0 -0
  41. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-record-and-push-to-webserver.py +0 -0
  42. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-returnn-as-framework.py +0 -0
  43. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-rf-pt-benchmark.py +0 -0
  44. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-rf.config +0 -0
  45. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-rhn-enwik8.config +0 -0
  46. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-sprint-interface.py +0 -0
  47. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-att-copy.config +0 -0
  48. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-attention.config +0 -0
  49. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  50. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  51. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-enc-dec.config +0 -0
  52. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-hard-att-copy.config +0 -0
  53. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-lstm-benchmark.py +0 -0
  54. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  55. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  56. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-native-lstm.12ax.config +0 -0
  57. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  58. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  59. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  60. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  61. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  62. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-rec-self-att.config +0 -0
  63. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-search-compiled-graph.py +0 -0
  64. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  65. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-timit-lstm-ctc.config +0 -0
  66. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-torch.config +0 -0
  67. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  68. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo.sh +0 -0
  69. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  70. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  71. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  72. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/README.txt +0 -0
  73. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/chars.txt +0 -0
  74. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/config_demo +0 -0
  75. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/config_fwd +0 -0
  76. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/config_real +0 -0
  77. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  78. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/decode.py +0 -0
  79. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  80. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/go.sh +0 -0
  81. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/lines.txt +0 -0
  82. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/split/eval.txt +0 -0
  83. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/split/train.txt +0 -0
  84. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/split/valid.txt +0 -0
  85. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/README.md +0 -0
  86. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  87. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial/forwardconfig +0 -0
  88. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial/go.sh +0 -0
  89. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial/trainconfig +0 -0
  90. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  91. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  92. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  93. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  94. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/pyproject.toml +0 -0
  95. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/requirements.txt +0 -0
  96. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/__init__.py +0 -0
  97. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/__main__.py +0 -0
  98. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/__old_mod_loader__.py +0 -0
  99. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/__setup__.py +0 -0
  100. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/config.py +0 -0
  101. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/__init__.py +0 -0
  102. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/audio.py +0 -0
  103. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/basic.py +0 -0
  104. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/bundle_file.py +0 -0
  105. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/cached.py +0 -0
  106. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/cached2.py +0 -0
  107. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/generating.py +0 -0
  108. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/hdf.py +0 -0
  109. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/lm.py +0 -0
  110. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/map.py +0 -0
  111. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/meta.py +0 -0
  112. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/multi_proc.py +0 -0
  113. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/normalization_data.py +0 -0
  114. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/numpy_dump.py +0 -0
  115. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/postprocessing.py +0 -0
  116. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/raw_wav.py +0 -0
  117. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/sprint.py +0 -0
  118. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/stereo.py +0 -0
  119. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/util/__init__.py +0 -0
  120. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/util/feature_extraction.py +0 -0
  121. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/util/strings.py +0 -0
  122. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/util/vocabulary.py +0 -0
  123. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/engine/__init__.py +0 -0
  124. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/engine/base.py +0 -0
  125. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/engine/batch.py +0 -0
  126. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/__init__.py +0 -0
  127. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/__main__.py +0 -0
  128. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  129. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  130. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  131. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  132. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  133. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  134. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  135. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  136. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  137. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  138. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  139. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  140. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  141. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  142. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  143. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  144. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  145. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  146. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  147. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  148. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  149. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  150. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  151. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  152. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  153. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/__init__.py +0 -0
  154. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/README.md +0 -0
  155. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/__init__.py +0 -0
  156. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/edit.py +0 -0
  157. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/reroute.py +0 -0
  158. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/select.py +0 -0
  159. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/subgraph.py +0 -0
  160. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/transform.py +0 -0
  161. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/util.py +0 -0
  162. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/forward_iface.py +0 -0
  163. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/__init__.py +0 -0
  164. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_backend.py +0 -0
  165. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/__init__.py +0 -0
  166. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/backend.cpp +0 -0
  167. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/backend.hpp +0 -0
  168. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/module.cpp +0 -0
  169. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/module.hpp +0 -0
  170. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/py_utils.hpp +0 -0
  171. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  172. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  173. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_random_journal.py +0 -0
  174. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_utils.py +0 -0
  175. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/array_.py +0 -0
  176. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/attention.py +0 -0
  177. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/audio/__init__.py +0 -0
  178. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/audio/mel.py +0 -0
  179. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/audio/specaugment.py +0 -0
  180. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/backend.py +0 -0
  181. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/build_from_dict.py +0 -0
  182. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/cond.py +0 -0
  183. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/const.py +0 -0
  184. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/container.py +0 -0
  185. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/control_flow_ctx.py +0 -0
  186. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conv.py +0 -0
  187. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conversions/__init__.py +0 -0
  188. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  189. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conversions/hf_llama.py +0 -0
  190. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conversions/torch_nn.py +0 -0
  191. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/decoder/__init__.py +0 -0
  192. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/decoder/transformer.py +0 -0
  193. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/device.py +0 -0
  194. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/dims.py +0 -0
  195. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/dropout.py +0 -0
  196. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/dtype.py +0 -0
  197. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/encoder/__init__.py +0 -0
  198. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/encoder/base.py +0 -0
  199. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/encoder/conformer.py +0 -0
  200. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/encoder/e_branchformer.py +0 -0
  201. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/gradient.py +0 -0
  202. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/graph.py +0 -0
  203. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/hooks.py +0 -0
  204. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/init.py +0 -0
  205. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/label_smoothing.py +0 -0
  206. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/linear.py +0 -0
  207. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/loop.py +0 -0
  208. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/loss.py +0 -0
  209. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/math_.py +0 -0
  210. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/matmul.py +0 -0
  211. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/module.py +0 -0
  212. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/normalization.py +0 -0
  213. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/parameter.py +0 -0
  214. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/parametrizations.py +0 -0
  215. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/parametrize.py +0 -0
  216. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/piecewise_linear.py +0 -0
  217. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/rand.py +0 -0
  218. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/rec.py +0 -0
  219. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/reduce.py +0 -0
  220. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/run_ctx.py +0 -0
  221. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/signal.py +0 -0
  222. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/state.py +0 -0
  223. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/stepwise_scheduler.py +0 -0
  224. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/tensor_array.py +0 -0
  225. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/types.py +0 -0
  226. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/import_/__init__.py +0 -0
  227. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/import_/common.py +0 -0
  228. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/import_/git.py +0 -0
  229. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/import_/import_.py +0 -0
  230. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/learning_rate_control.py +0 -0
  231. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/log.py +0 -0
  232. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/native_op.cpp +0 -0
  233. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/native_op.py +0 -0
  234. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/pretrain.py +0 -0
  235. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/__init__.py +0 -0
  236. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/cache.py +0 -0
  237. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/control.py +0 -0
  238. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/error_signals.py +0 -0
  239. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/extern_interface.py +0 -0
  240. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/interface.py +0 -0
  241. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/README.md +0 -0
  242. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/__init__.py +0 -0
  243. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/_tensor_extra.py +0 -0
  244. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/_tensor_mixin_base.py +0 -0
  245. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/_tensor_op_overloads.py +0 -0
  246. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/control_flow_ctx.py +0 -0
  247. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/dim.py +0 -0
  248. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/marked_dim.py +0 -0
  249. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/tensor.py +0 -0
  250. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/tensor_dict.py +0 -0
  251. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/utils.py +0 -0
  252. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/__init__.py +0 -0
  253. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/compat.py +0 -0
  254. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/data_pipeline.py +0 -0
  255. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/distributed.py +0 -0
  256. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/engine.py +0 -0
  257. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/README.md +0 -0
  258. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/__init__.py +0 -0
  259. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/_backend.py +0 -0
  260. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/_utils.py +0 -0
  261. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/cond.py +0 -0
  262. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  263. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/dims.py +0 -0
  264. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/layer.py +0 -0
  265. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/loop.py +0 -0
  266. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/make_layer.py +0 -0
  267. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  268. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  269. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  270. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_low_level/__init__.py +0 -0
  271. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_low_level/_backend.py +0 -0
  272. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/horovod.py +0 -0
  273. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/hyper_param_tuning.py +0 -0
  274. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/__init__.py +0 -0
  275. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/base.py +0 -0
  276. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/basic.py +0 -0
  277. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/rec.py +0 -0
  278. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/segmental_model.py +0 -0
  279. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/signal_processing.py +0 -0
  280. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/variable.py +0 -0
  281. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/native_op.py +0 -0
  282. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/network.py +0 -0
  283. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/sprint.py +0 -0
  284. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/updater.py +0 -0
  285. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/__init__.py +0 -0
  286. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/basic.py +0 -0
  287. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/gradient_checkpoint.py +0 -0
  288. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/ken_lm.py +0 -0
  289. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/open_fst.py +0 -0
  290. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/README.md +0 -0
  291. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/__init__.py +0 -0
  292. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/__init__.py +0 -0
  293. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/extern_data.py +0 -0
  294. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/pipeline.py +0 -0
  295. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/queued_data_iter.py +0 -0
  296. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  297. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/tensor_utils.py +0 -0
  298. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/distributed.py +0 -0
  299. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/__init__.py +0 -0
  300. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/_backend.py +0 -0
  301. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/_rand.py +0 -0
  302. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/raw_ops.py +0 -0
  303. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/updater.py +0 -0
  304. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/README.md +0 -0
  305. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/__init__.py +0 -0
  306. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/array_.py +0 -0
  307. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/diagnose_gpu.py +0 -0
  308. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/gradient_checkpoint.py +0 -0
  309. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/scaled_gradient.py +0 -0
  310. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/__init__.py +0 -0
  311. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/basic.py +0 -0
  312. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/better_exchook.py +0 -0
  313. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/bpe.py +0 -0
  314. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/debug.py +0 -0
  315. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/debug_helpers.py +0 -0
  316. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/file_cache.py +0 -0
  317. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/fsa.py +0 -0
  318. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/literal_py_to_pickle.py +0 -0
  319. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/math.py +0 -0
  320. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  321. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/native_code_compiler.py +0 -0
  322. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/pprint.py +0 -0
  323. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/py-to-pickle.cpp +0 -0
  324. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/py_compat.py +0 -0
  325. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/py_ext_mod_compiler.py +0 -0
  326. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/result_with_reason.py +0 -0
  327. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/sig_proc.py +0 -0
  328. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/task_system.py +0 -0
  329. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/train_proc_manager.py +0 -0
  330. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/watch_memory.py +0 -0
  331. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn.egg-info/dependency_links.txt +0 -0
  332. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn.egg-info/top_level.txt +0 -0
  333. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/rnn.py +0 -0
  334. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/setup.cfg +0 -0
  335. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/setup.py +0 -0
  336. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/DummySprintExec.py +0 -0
  337. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm-inspection-profile.xml +0 -0
  338. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/.gitignore +0 -0
  339. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/.name +0 -0
  340. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  341. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  342. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  343. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  344. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  345. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/misc.xml +0 -0
  346. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/modules.xml +0 -0
  347. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/returnn.iml +0 -0
  348. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  349. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/_set_num_threads1.py +0 -0
  350. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/_setup_returnn_env.py +0 -0
  351. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/_setup_test_env.py +0 -0
  352. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/bpe-unicode-demo.codes +0 -0
  353. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/bpe-unicode-demo.vocab +0 -0
  354. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lexicon_opt.fst +0 -0
  355. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lexicon_opt.isyms +0 -0
  356. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lexicon_opt.jpg +0 -0
  357. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lexicon_opt.osyms +0 -0
  358. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lint_common.py +0 -0
  359. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/pycharm-inspect.py +0 -0
  360. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/pylint.py +0 -0
  361. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/returnn-as-framework.py +0 -0
  362. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/spelling.dic +0 -0
  363. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Config.py +0 -0
  364. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Dataset.py +0 -0
  365. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Fsa.py +0 -0
  366. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_GeneratingDataset.py +0 -0
  367. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_HDFDataset.py +0 -0
  368. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_LearningRateControl.py +0 -0
  369. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Log.py +0 -0
  370. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_MultiProcDataset.py +0 -0
  371. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Pretrain.py +0 -0
  372. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_ResNet.py +0 -0
  373. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_SprintDataset.py +0 -0
  374. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_SprintInterface.py +0 -0
  375. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFEngine.py +0 -0
  376. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFNativeOp.py +0 -0
  377. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFNetworkLayer.py +0 -0
  378. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFNetworkRecLayer.py +0 -0
  379. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFNetworkSigProcLayer.py +0 -0
  380. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFUpdater.py +0 -0
  381. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFUtil.py +0 -0
  382. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TF_determinism.py +0 -0
  383. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TaskSystem.py +0 -0
  384. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TaskSystem_SharedMem.py +0 -0
  385. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TranslationDataset.py +0 -0
  386. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Util.py +0 -0
  387. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_demos.py +0 -0
  388. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_fork_exec.py +0 -0
  389. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_hdf_dump.py +0 -0
  390. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_attention.py +0 -0
  391. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_base.py +0 -0
  392. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_cond.py +0 -0
  393. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_const.py +0 -0
  394. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_container.py +0 -0
  395. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_conv.py +0 -0
  396. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_encoder_conformer.py +0 -0
  397. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_gradient.py +0 -0
  398. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_label_smoothing.py +0 -0
  399. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_loop.py +0 -0
  400. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_math.py +0 -0
  401. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_normalization.py +0 -0
  402. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_piecewise_linear.py +0 -0
  403. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_rec.py +0 -0
  404. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_reduce.py +0 -0
  405. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_signal.py +0 -0
  406. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_tensor.py +0 -0
  407. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_tools.py +0 -0
  408. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_dataset.py +0 -0
  409. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_engine.py +0 -0
  410. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_internal_frontend.py +0 -0
  411. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_util.py +0 -0
  412. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/torch_utils.py +0 -0
  413. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/_setup_returnn_env.py +0 -0
  414. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/analyze-dataset-batches.py +0 -0
  415. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bliss-collect-seq-lens.py +0 -0
  416. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bliss-dump-text.py +0 -0
  417. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bliss-get-segment-names.py +0 -0
  418. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bliss-to-ogg-zip.py +0 -0
  419. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bpe-create-lexicon.py +0 -0
  420. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/calculate-word-error-rate.py +0 -0
  421. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/cleanup-old-models.py +0 -0
  422. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/collect-orth-symbols.py +0 -0
  423. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/collect-words.py +0 -0
  424. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/compile_native_op.py +0 -0
  425. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/compile_tf_graph.py +0 -0
  426. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/debug-dump-search-scores.py +0 -0
  427. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/debug-plot-search-scores.py +0 -0
  428. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-dataset-raw-strings.py +0 -0
  429. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-dataset.py +0 -0
  430. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-forward-stats.py +0 -0
  431. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-forward.py +0 -0
  432. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-network-json.py +0 -0
  433. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-pickle.py +0 -0
  434. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/extract_state_tying_from_dataset.py +0 -0
  435. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/get-attention-weights.py +0 -0
  436. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/get-best-model-epoch.py +0 -0
  437. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/hdf_dump.py +0 -0
  438. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/hdf_dump_translation_dataset.py +0 -0
  439. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/import-blocks-mt-model.py +0 -0
  440. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/import-t2t-mt-model.py +0 -0
  441. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/.gitignore +0 -0
  442. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/Makefile +0 -0
  443. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/README.md +0 -0
  444. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/README.md +0 -0
  445. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/libs_list +0 -0
  446. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  447. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  448. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  449. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/state_vars_list +0 -0
  450. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  451. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/file.h +0 -0
  452. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  453. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  454. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/main.cc +0 -0
  455. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/rescorer.h +0 -0
  456. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/vocabulary.cc +0 -0
  457. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/vocabulary.h +0 -0
  458. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/tf_avg_checkpoints.py +0 -0
  459. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/tf_inspect_checkpoint.py +0 -0
  460. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/tf_inspect_summary_log.py +0 -0
  461. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/torch_avg_checkpoints.py +0 -0
  462. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/torch_export_to_onnx.py +0 -0
  463. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/torch_inspect_checkpoint.py +0 -0
  464. {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20240830.140746
3
+ Version: 1.20240905.105440
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20240905.105440'
2
+ long_version = '1.20240905.105440+git.d79c168'
@@ -317,15 +317,36 @@ class DistributeFilesDataset(CachedDataset2):
317
317
  return True
318
318
 
319
319
  def _get_sub_dataset_dict(self, files: List[FileTree]) -> Dict[str, Any]:
320
+ import tree
321
+
320
322
  dataset_dict = self.get_sub_epoch_dataset(files)
321
323
  dataset_dict = extend_dataset_dict_from_parent_dataset(dataset_dict, parent_dataset=self)
322
- if dataset_dict.get("partition_epoch", 1) != 1:
323
- raise ValueError(f"{self}: sub dataset should not have partition_epoch, got: {dataset_dict}")
324
- if "seq_ordering" not in dataset_dict and "seq_order_control_dataset" not in dataset_dict:
324
+
325
+ flat_sub_dset = tree.flatten_with_path(dataset_dict)
326
+
327
+ part_epoch_cfg = next(
328
+ ((path, v) for path, v in flat_sub_dset if path[-1] == "partition_epoch" and v != 1), None
329
+ )
330
+ if part_epoch_cfg is not None:
331
+ path, subeps = part_epoch_cfg
325
332
  raise ValueError(
326
- f"{self}: sub dataset should have explicit seq_ordering "
327
- f"(or seq_order_control_dataset for MetaDataset), got: {dataset_dict}"
333
+ f"{self}: sub dataset should not have partition_epoch, "
334
+ f'but got "partition_epoch": {subeps} at {".".join(path)} in {dataset_dict}.'
328
335
  )
336
+
337
+ # Heuristic check for well-definedness of seq ordering. Might need to be extended in the
338
+ # future if there are other ways of defining a seq order than the ones below.
339
+ if (
340
+ not any(path[-1] == "seq_ordering" for path, _ in flat_sub_dset)
341
+ and not any(path[-1] == "seq_order_control_dataset" for path, _ in flat_sub_dset)
342
+ and not any(path[-1] == "map_seq_stream" for path, _ in flat_sub_dset)
343
+ ):
344
+ raise ValueError(
345
+ f"{self}: there should be an explicit seq_ordering somewhere in the sub dataset "
346
+ f"(or seq_order_control_dataset for MetaDataset or map_seq_stream for PostprocessingDataset), "
347
+ f"but found none in {dataset_dict}."
348
+ )
349
+
329
350
  return dataset_dict
330
351
 
331
352
  @staticmethod
@@ -153,7 +153,10 @@ class NumpyBackend(Backend[numpy.ndarray]):
153
153
  op = NumpyBackend._CombineKindMap.get(kind)
154
154
  if not op:
155
155
  raise ValueError(f"RF NumpyBackend: combine kind {kind!r} not supported")
156
- return op(a, b)
156
+ res = op(a, b)
157
+ if not isinstance(res, numpy.ndarray):
158
+ res = numpy.array(res)
159
+ return res
157
160
 
158
161
  @staticmethod
159
162
  def range_over_dim(dim: Dim, *, dtype: Optional[str] = None, device: Optional[str] = None) -> Tensor[numpy.ndarray]:
@@ -211,3 +214,14 @@ class NumpyBackend(Backend[numpy.ndarray]):
211
214
  sparse_dim=source.sparse_dim,
212
215
  )
213
216
  return res
217
+
218
+ @staticmethod
219
+ def activation_raw(raw_tensor: numpy.ndarray, func: str) -> numpy.ndarray:
220
+ """
221
+ :param raw_tensor:
222
+ :param func: "tanh", "sigmoid", "relu", ...
223
+ :return: raw tensor with elementwise activation applied
224
+ """
225
+ if func == "relu":
226
+ return numpy.array(numpy.maximum(raw_tensor, 0))
227
+ raise NotImplementedError("NumpyBackend: activation %r not implemented" % func)
@@ -4,8 +4,9 @@ or just rarely used attribs, such that we can save memory for the common case.
4
4
  """
5
5
 
6
6
  from __future__ import annotations
7
- from typing import TYPE_CHECKING, Optional, Union, Any, Tuple, Sequence, Dict, List, Set, Callable
7
+ from typing import TYPE_CHECKING, Optional, Union, Any, Tuple, Sequence, MutableMapping, Dict, List, Set, Callable
8
8
  import operator
9
+ import weakref
9
10
 
10
11
  from returnn.util.basic import Entity
11
12
  from returnn.util import basic as util
@@ -118,7 +119,7 @@ class _DimExtra:
118
119
  self.same_for_batch_ctx = {} # type: Dict[Tuple[BatchInfo,Optional[ControlFlowContext]],_d.Dim]
119
120
  self.cache_dyn_size_ext_dev = {} # type: Dict[str,_t.Tensor] # device -> dyn_size_ext
120
121
  self.cache_seq_mask: Dict[Tuple[str, Optional[Tuple[Dim, ...]]], _t.Tensor] = {} # (dev,dim_order) -> seq_mask
121
- self.cache_dim_math: Dict[Tuple[str, Union[Dim, int]], Dim] = {} # op (add,sub,...), operand -> Dim
122
+ self.cache_dim_math = _CacheDimMath() # op (add,sub,...), operand -> Dim
122
123
 
123
124
  def __getstate__(self):
124
125
  d = vars(self).copy()
@@ -389,6 +390,10 @@ class _DimMixin:
389
390
  if dim_extra:
390
391
  # Any dims via dim math could also contain raw tensors,
391
392
  # so iterate through them.
393
+ if dim.dyn_size_ext is not None or dim.dimension is None:
394
+ dim_extra.cache_dim_math.clear()
395
+ else:
396
+ dim_extra.cache_dim_math.clear_dynamic()
392
397
  queue += dim_extra.cache_dim_math.values()
393
398
  if dim_extra.same_as:
394
399
  queue.append(dim_extra.same_as)
@@ -2873,6 +2878,123 @@ def dim_cmp_value(obj):
2873
2878
  return obj
2874
2879
 
2875
2880
 
2881
+ class _CacheDimMath:
2882
+ """op (add,sub,...), operand -> Dim"""
2883
+
2884
+ class _OperandCache:
2885
+ def __init__(self):
2886
+ self.dims: MutableMapping[Dim, Dim] = weakref.WeakKeyDictionary()
2887
+ self.statics: Dict[int, Dim] = {}
2888
+
2889
+ def __init__(self):
2890
+ self._ops: Dict[str, _CacheDimMath._OperandCache] = {}
2891
+
2892
+ def __repr__(self):
2893
+ return "_CacheDimMath({%s})" % ", ".join("%r: %r" % (k, v) for k, v in self.items())
2894
+
2895
+ def _get_op_dict(self, __key: Tuple[str, Union[Dim, int]]) -> _OperandCache:
2896
+ if __key[0] in self._ops:
2897
+ return self._ops[__key[0]]
2898
+ else:
2899
+ op_dict = self._OperandCache()
2900
+ self._ops[__key[0]] = op_dict
2901
+ return op_dict
2902
+
2903
+ def __setitem__(self, __key: Tuple[str, Union[Dim, int]], __value: Dim):
2904
+ op_dict = self._get_op_dict(__key)
2905
+ if isinstance(__key[1], int):
2906
+ value_dict = op_dict.statics
2907
+ else:
2908
+ value_dict = op_dict.dims
2909
+ if __key[1] in value_dict:
2910
+ value_dict[__key[1]] = __value
2911
+ return
2912
+ if len(value_dict) >= 5:
2913
+ # Just to avoid memory leaks.
2914
+ value_dict.clear()
2915
+ value_dict[__key[1]] = __value
2916
+
2917
+ def __delitem__(self, __key: Tuple[str, Union[Dim, int]]):
2918
+ op_dict = self._ops[__key[0]]
2919
+ if isinstance(__key[1], int):
2920
+ del op_dict.statics[__key[1]]
2921
+ else:
2922
+ del op_dict.dims[__key[1]]
2923
+
2924
+ def __getitem__(self, __key: Tuple[str, Union[Dim, int]]) -> Dim:
2925
+ op_dict = self._ops[__key[0]]
2926
+ if isinstance(__key[1], int):
2927
+ return op_dict.statics[__key[1]]
2928
+ else:
2929
+ return op_dict.dims[__key[1]]
2930
+
2931
+ def __contains__(self, __key: Tuple[str, Union[Dim, int]]) -> bool:
2932
+ op_dict = self._ops.get(__key[0])
2933
+ if not op_dict:
2934
+ return False
2935
+ if isinstance(__key[1], int):
2936
+ return __key[1] in op_dict.statics
2937
+ else:
2938
+ return __key[1] in op_dict.dims
2939
+
2940
+ def get(self, __key: Tuple[str, Union[Dim, int]], default: Optional[Dim] = None) -> Optional[Dim]:
2941
+ """get"""
2942
+ op_dict = self._ops.get(__key[0])
2943
+ if not op_dict:
2944
+ return default
2945
+ if isinstance(__key[1], int):
2946
+ return op_dict.statics.get(__key[1], default)
2947
+ else:
2948
+ return op_dict.dims.get(__key[1], default)
2949
+
2950
+ def setdefault(self, __key: Tuple[str, Union[Dim, int]], __value: Dim):
2951
+ """setdefault"""
2952
+ existing = self.get(__key)
2953
+ if existing is not None:
2954
+ return existing
2955
+ self[__key] = __value
2956
+ return __value
2957
+
2958
+ def clear(self):
2959
+ """clear"""
2960
+ self._ops.clear()
2961
+
2962
+ def clear_dynamic(self):
2963
+ """clear dynamic part"""
2964
+ for op_dict in self._ops.values():
2965
+ for k, v in list(op_dict.dims.items()):
2966
+ if v.dyn_size_ext is not None or v.dimension is None:
2967
+ del op_dict.dims[k]
2968
+
2969
+ def __len__(self):
2970
+ count = 0
2971
+ for op_dict in self._ops.values():
2972
+ count += len(op_dict.statics)
2973
+ count += len(op_dict.dims)
2974
+ return count
2975
+
2976
+ def items(self):
2977
+ """items"""
2978
+ for op_name, op_dict in self._ops.items():
2979
+ for key, value in op_dict.statics.items():
2980
+ yield (op_name, key), value
2981
+ for key, value in op_dict.dims.items():
2982
+ yield (op_name, key), value
2983
+
2984
+ def keys(self):
2985
+ """keys"""
2986
+ for k, v in self.items():
2987
+ yield k
2988
+
2989
+ def values(self):
2990
+ """values"""
2991
+ for k, v in self.items():
2992
+ yield v
2993
+
2994
+ def __iter__(self):
2995
+ yield from self.keys()
2996
+
2997
+
2876
2998
  def _behavior_version_reset_callback():
2877
2999
  # Reset things we did in _handle_new_min_version.
2878
3000
  _DimMixin._SimpleEquality = False
@@ -118,6 +118,9 @@ def get_net_dict(
118
118
  # but now the TF engine actually wants to have Tensor[tf.Tensor].
119
119
  # Reset it now. The TF engine should redefine it again.
120
120
  elem.reset_batch_and_raw()
121
+ elif isinstance(elem, set):
122
+ # map_structure does not recurse into sets.
123
+ nest.map_structure(_cleanup_net_dict_value, sorted(list(elem)))
121
124
  return elem
122
125
 
123
126
  # Do some cleanup.
@@ -339,7 +339,7 @@ class BatchInfo:
339
339
 
340
340
  # Ok, need to extend.
341
341
  global_batch_dims = [dim for dim in all_virtual_dims if isinstance(dim, BatchInfo.GlobalBatchDim)]
342
- assert len(global_batch_dims) == 1
342
+ assert len(global_batch_dims) == 1, f"got global_batch_dims={global_batch_dims!r}"
343
343
  global_batch_dim = global_batch_dims[0]
344
344
  assert base.virtual_dims == [global_batch_dim]
345
345
  beams = [dim for dim in all_virtual_dims if isinstance(dim, BatchInfo.BeamDim)]
@@ -3,13 +3,15 @@ Main engine for PyTorch
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Optional, Any, Union, Callable, Dict
6
+ from typing import Optional, Any, Union, Callable, Dict, Set
7
7
  from contextlib import nullcontext
8
8
 
9
9
  import gc
10
10
  import os
11
11
  import time
12
12
  import socket
13
+ import fnmatch
14
+ import re
13
15
 
14
16
  import torch
15
17
  import torch.distributed
@@ -41,6 +43,7 @@ from .data import extern_data as extern_data_util
41
43
  from .data.queued_data_iter import QueuedDataIter
42
44
  from .frontend.bridge import rf_module_to_pt_module
43
45
  from .util import diagnose_gpu
46
+ from .util import module as util_module
44
47
  from .distributed import DistributedContext, get_ctx as dist_get_ctx
45
48
 
46
49
 
@@ -83,6 +86,7 @@ class Engine(EngineBase):
83
86
  self._forward_step_expected_outputs = TensorDict()
84
87
  self._forward_step_expected_outputs.update(self.config.typed_value("model_outputs"), auto_convert=True)
85
88
  self._save_model_epoch_interval = 1
89
+ self._ignore_param_set: Set[str] = set() # for the updater and for saving the model checkpoint
86
90
  self._updater = None # type: Optional[Updater]
87
91
 
88
92
  self._use_autocast = False
@@ -721,6 +725,7 @@ class Engine(EngineBase):
721
725
 
722
726
  self._create_model(epoch=epoch, step=step)
723
727
 
728
+ self._ignore_param_set.clear()
724
729
  loaded_state_keys = set()
725
730
  missing_keys = set()
726
731
  unexpected_keys = set()
@@ -745,6 +750,7 @@ class Engine(EngineBase):
745
750
 
746
751
  preload_from_files = self.config.typed_value("preload_from_files", {})
747
752
  if preload_from_files:
753
+ model_state_keys_set = set(self._pt_model.state_dict().keys())
748
754
  # see `preload_from_files` in tf engine and `returnn.tf.network.CustomCheckpointLoader`
749
755
  # We use the reversed sorted order here to achieve consistent behavior with the TF engine.
750
756
  # There, the keys are used in sorted order but if a variable is loaded,
@@ -754,12 +760,39 @@ class Engine(EngineBase):
754
760
  # In order to get consistent behavior, we use the reversed order.
755
761
  for preload_key, opts in reversed(sorted(preload_from_files.items())):
756
762
  assert isinstance(opts, dict) and "filename" in opts
757
- if opts.get("init_for_train", False):
758
- if not is_first_train_epoch:
759
- continue
763
+ init_for_train = opts.get("init_for_train", False)
764
+ if init_for_train:
765
+ if isinstance(init_for_train, str) and init_for_train == "always":
766
+ # No matter if this is the first train epoch
767
+ # or training with loading some prev epoch,
768
+ # those parameters will always be loaded via preload_from_files,
769
+ # and thus also not stored in our own checkpoint.
770
+ pass
771
+ elif isinstance(init_for_train, bool) and init_for_train:
772
+ if not is_first_train_epoch:
773
+ continue
774
+ else:
775
+ raise ValueError(
776
+ f"preload key {preload_key}:"
777
+ f" invalid init_for_train value {init_for_train!r} (type {type(init_for_train).__name__})"
778
+ )
760
779
  else: # default: init for recog
761
780
  if is_training:
762
781
  continue
782
+ if opts["filename"] is None:
783
+ print(f"Pre-load (initialize) weights for key '{preload_key}'", file=log.v3)
784
+ pattern = opts["pattern"]
785
+ match = re.compile(fnmatch.translate(pattern)).match
786
+ remove = []
787
+ for name in self._pt_model.state_dict().keys():
788
+ if match(name) and name in missing_keys:
789
+ remove.append(name)
790
+ if remove:
791
+ print(f"Randomly initialize params: {remove}", file=log.v3)
792
+ missing_keys.difference_update(remove)
793
+ else:
794
+ print("(No relevant parameters matching.)", file=log.v3)
795
+ continue
763
796
  print(f"Pre-load weights for key '{preload_key}' from {opts['filename']}", file=log.v3)
764
797
  preload_model_state = torch.load(opts["filename"])
765
798
  if opts.get("checkpoint_key", "model") is not None:
@@ -786,6 +819,8 @@ class Engine(EngineBase):
786
819
  preload_model_state.pop(key)
787
820
  for new_name, name_in_checkpoint in opts.get("var_name_mapping", {}).items():
788
821
  preload_model_state[new_name] = preload_model_state.pop(name_in_checkpoint)
822
+ if init_for_train == "always":
823
+ self._ignore_param_set.update(set(preload_model_state.keys()).intersection(model_state_keys_set))
789
824
  missing_keys_preload, unexpected_keys_preload = self._pt_model.load_state_dict(
790
825
  preload_model_state, strict=False
791
826
  )
@@ -797,7 +832,7 @@ class Engine(EngineBase):
797
832
  if opts.get("prefix", ""):
798
833
  prefix_keys = [key for key in self._pt_model.state_dict() if key.startswith(opts.get("prefix", ""))]
799
834
  else:
800
- prefix_keys = self._pt_model.state_dict().keys()
835
+ prefix_keys = model_state_keys_set
801
836
  missing_keys_preload = (
802
837
  set(prefix_keys).intersection(set(missing_keys_preload)).difference(loaded_state_keys)
803
838
  )
@@ -816,6 +851,9 @@ class Engine(EngineBase):
816
851
  )
817
852
  unexpected_keys.update(unexpected_keys_preload)
818
853
 
854
+ if self._ignore_param_set:
855
+ util_module.convert_parameters_to_buffers(self._pt_model, self._ignore_param_set, persistent=False)
856
+
819
857
  if missing_keys:
820
858
  raise Exception(
821
859
  "\n".join(
@@ -913,9 +951,16 @@ class Engine(EngineBase):
913
951
  tmp_filename = filename + ".tmp_write"
914
952
  if os.path.exists(tmp_filename):
915
953
  os.unlink(tmp_filename)
954
+ state_dict = self._pt_model.state_dict()
955
+ if self._ignore_param_set:
956
+ # Do some extra check that we don't save the ignored parameters.
957
+ # Should not be in the state_dict anymore because we should have converted them to buffers
958
+ # via util_module.convert_parameters_to_buffers before.
959
+ remaining = set(state_dict.keys()).intersection(self._ignore_param_set)
960
+ assert not remaining, f"_save_model: found remaining params in state_dict to ignore: {remaining}"
916
961
  torch.save(
917
962
  {
918
- "model": self._pt_model.state_dict(),
963
+ "model": state_dict,
919
964
  "epoch": self.epoch,
920
965
  "step": self.global_train_step,
921
966
  "effective_learning_rate": self._updater.get_effective_learning_rate() if self._updater else None,
@@ -149,13 +149,16 @@ class RFModuleAsPTModule(torch.nn.Module):
149
149
  for name, rf_param in self._rf_module.named_parameters(recurse=False):
150
150
  pt_param = getattr(self, name)
151
151
  if rf_param.auxiliary and self._aux_params_as_buffers:
152
- assert isinstance(pt_param, torch.Tensor) # but not torch.nn.Parameter
153
- # See similar logic in torch.nn.Module._apply.
154
- pt_param = torch.nn.Parameter(pt_param, pt_param.requires_grad)
155
- else:
156
- assert isinstance(pt_param, torch.nn.Parameter), (
157
- f"{self}.{name} is not a Parameter" f" but {type(pt_param).__name__}"
158
- )
152
+ if not isinstance(pt_param, torch.nn.Parameter):
153
+ assert isinstance(pt_param, torch.Tensor) # but not torch.nn.Parameter
154
+ # See similar logic in torch.nn.Module._apply.
155
+ pt_param = torch.nn.Parameter(pt_param, pt_param.requires_grad)
156
+ # Otherwise, we do not care whether it is a torch.nn.Parameter or not.
157
+ # Its type might have changed due to convert_parameters_to_buffers.
158
+ # Just make sure it is a tensor.
159
+ assert isinstance(pt_param, torch.Tensor)
160
+ # noinspection PyProtectedMember
161
+ rf_param.dtype = rf_param._raw_backend.get_dtype_name_raw(pt_param) # dtype might have changed
159
162
  rf_param.raw_tensor = pt_param
160
163
 
161
164
  def register_parameter(self, name: str, param: Optional[torch.nn.Parameter]) -> None:
@@ -0,0 +1,43 @@
1
+ """
2
+ Utils for modules
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from typing import Collection
7
+ import torch
8
+
9
+
10
+ def convert_parameters_to_buffers(
11
+ module: torch.nn.Module, parameter_names: Collection[str], *, deep: bool = True, persistent: bool
12
+ ):
13
+ """
14
+ :param module:
15
+ :param parameter_names:
16
+ :param deep: parameter_name can contain '.' to access submodules
17
+ :param persistent: whether the buffer is persistent. if True, the buffer will be saved to the state_dict.
18
+ passed to module.register_buffer.
19
+ """
20
+ for parameter_name in parameter_names:
21
+ convert_parameter_to_buffer(module, parameter_name, deep=deep, persistent=persistent)
22
+
23
+
24
+ def convert_parameter_to_buffer(module: torch.nn.Module, parameter_name: str, *, deep: bool = True, persistent: bool):
25
+ """
26
+ :param module:
27
+ :param parameter_name:
28
+ :param deep: parameter_name can contain '.' to access submodules
29
+ :param persistent: whether the buffer is persistent. if True, the buffer will be saved to the state_dict.
30
+ passed to module.register_buffer.
31
+ """
32
+ if "." in parameter_name:
33
+ if not deep:
34
+ raise ValueError("parameter_name can't contain '.' when deep is False")
35
+ module_path, _, parameter_name = parameter_name.rpartition(".")
36
+ module = module.get_submodule(module_path)
37
+
38
+ parameter = getattr(module, parameter_name)
39
+ if not isinstance(parameter, torch.nn.Parameter):
40
+ raise ValueError(f"{parameter_name} is not a torch.nn.Parameter, got type {type(parameter).__name__}")
41
+ delattr(module, parameter_name)
42
+ parameter.requires_grad = False
43
+ module.register_buffer(parameter_name, parameter, persistent=persistent)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20240830.140746
3
+ Version: 1.20240905.105440
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -302,6 +302,7 @@ returnn/torch/util/__init__.py
302
302
  returnn/torch/util/array_.py
303
303
  returnn/torch/util/diagnose_gpu.py
304
304
  returnn/torch/util/gradient_checkpoint.py
305
+ returnn/torch/util/module.py
305
306
  returnn/torch/util/scaled_gradient.py
306
307
  returnn/util/__init__.py
307
308
  returnn/util/basic.py
@@ -185,6 +185,9 @@ def _run_model_net_dict_tf(
185
185
 
186
186
  from returnn.tf.frontend_layers.config_entry_points import get_net_dict
187
187
 
188
+ # noinspection PyProtectedMember
189
+ from returnn.frontend import _backend
190
+
188
191
  config = Config(
189
192
  {
190
193
  "debug_runtime_sanity_checks": True,
@@ -203,6 +206,7 @@ def _run_model_net_dict_tf(
203
206
  outputs_layers = rf.get_run_ctx().outputs
204
207
  print("*** outputs:", outputs_layers)
205
208
 
209
+ _backend.select_backend_tf()
206
210
  net = TFNetwork(config=config, train_flag=False)
207
211
  net.construct_from_dict(net_dict)
208
212
 
@@ -238,7 +238,7 @@ def test_pad_time_right():
238
238
  assert data_.dims == (batch_dim, time_dim, in_dim)
239
239
  new_time_dim = out_.dims[1]
240
240
  assert out_.dims == (batch_dim, new_time_dim, in_dim) and new_time_dim != time_dim
241
- assert new_time_dim == time_dim + 1 # math dim... not really necessary check here...
241
+ # assert new_time_dim == time_dim + 1 # math dim... not really necessary check here...
242
242
  assert time_dim.dyn_size_ext.dims == new_time_dim.dyn_size_ext.dims == (batch_dim,)
243
243
  batch_size = batch_dim.get_dim_value()
244
244
  assert batch_size > 1