returnn 1.20250110.132842__tar.gz → 1.20250114.164134__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (473) hide show
  1. {returnn-1.20250110.132842/returnn.egg-info → returnn-1.20250114.164134}/PKG-INFO +1 -1
  2. returnn-1.20250114.164134/_setup_info_generated.py +2 -0
  3. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/requirements.txt +0 -1
  4. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/postprocessing.py +4 -1
  5. returnn-1.20250114.164134/returnn/frontend/_cache.py +208 -0
  6. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/attention.py +12 -12
  7. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/conversions/hf_llama.py +7 -4
  8. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/debug.py +13 -0
  9. returnn-1.20250114.164134/returnn/util/lru_cache.py +309 -0
  10. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/py-to-pickle.cpp +1 -0
  11. {returnn-1.20250110.132842 → returnn-1.20250114.164134/returnn.egg-info}/PKG-INFO +1 -1
  12. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn.egg-info/SOURCES.txt +3 -0
  13. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_attention.py +17 -9
  14. returnn-1.20250114.164134/tests/test_threading.py +88 -0
  15. returnn-1.20250110.132842/_setup_info_generated.py +0 -2
  16. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/.editorconfig +0 -0
  17. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/.gitignore +0 -0
  18. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/.gitmodules +0 -0
  19. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/.kateconfig +0 -0
  20. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/CHANGELOG.md +0 -0
  21. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/CODEOWNERS +0 -0
  22. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/CONTRIBUTING.md +0 -0
  23. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/LICENSE +0 -0
  24. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/MANIFEST.in +0 -0
  25. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/README.rst +0 -0
  26. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/__init__.py +0 -0
  27. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/12AX.cluster_map +0 -0
  28. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/_setup_returnn_env.py +0 -0
  29. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-fwd.config +0 -0
  30. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-horovod-mpi.py +0 -0
  31. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-horovod-mpi.py.sh +0 -0
  32. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-horovod-mpi.sh +0 -0
  33. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-hyper-param-tuning.config +0 -0
  34. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-iter-dataset.py +0 -0
  35. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-list-devices.py +0 -0
  36. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-lua-torch-layer.config +0 -0
  37. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-pretrain.config +0 -0
  38. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-record-and-push-to-webserver.py +0 -0
  39. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-returnn-as-framework.py +0 -0
  40. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-rf-pt-benchmark.py +0 -0
  41. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-rf.config +0 -0
  42. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-rhn-enwik8.config +0 -0
  43. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-sprint-interface.py +0 -0
  44. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-att-copy.config +0 -0
  45. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-attention.config +0 -0
  46. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  47. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  48. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-enc-dec.config +0 -0
  49. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-hard-att-copy.config +0 -0
  50. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-lstm-benchmark.py +0 -0
  51. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  52. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  53. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-native-lstm.12ax.config +0 -0
  54. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  55. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  56. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  57. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  58. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  59. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-rec-self-att.config +0 -0
  60. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-search-compiled-graph.py +0 -0
  61. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  62. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-timit-lstm-ctc.config +0 -0
  63. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-torch.config +0 -0
  64. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  65. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/demo.sh +0 -0
  66. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  67. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  68. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  69. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/README.txt +0 -0
  70. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/chars.txt +0 -0
  71. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/config_demo +0 -0
  72. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/config_fwd +0 -0
  73. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/config_real +0 -0
  74. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  75. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/decode.py +0 -0
  76. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  77. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/go.sh +0 -0
  78. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/lines.txt +0 -0
  79. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/split/eval.txt +0 -0
  80. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/split/train.txt +0 -0
  81. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/IAM/split/valid.txt +0 -0
  82. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/README.md +0 -0
  83. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  84. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/artificial/forwardconfig +0 -0
  85. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/artificial/go.sh +0 -0
  86. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/artificial/trainconfig +0 -0
  87. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  88. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  89. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  90. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  91. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/pyproject.toml +0 -0
  92. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/__init__.py +0 -0
  93. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/__main__.py +0 -0
  94. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/__old_mod_loader__.py +0 -0
  95. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/__setup__.py +0 -0
  96. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/config.py +0 -0
  97. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/__init__.py +0 -0
  98. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/audio.py +0 -0
  99. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/basic.py +0 -0
  100. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/bundle_file.py +0 -0
  101. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/cached.py +0 -0
  102. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/cached2.py +0 -0
  103. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/distrib_files.py +0 -0
  104. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/generating.py +0 -0
  105. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/hdf.py +0 -0
  106. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/lm.py +0 -0
  107. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/map.py +0 -0
  108. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/meta.py +0 -0
  109. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/multi_proc.py +0 -0
  110. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/normalization_data.py +0 -0
  111. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/numpy_dump.py +0 -0
  112. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/raw_wav.py +0 -0
  113. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/sprint.py +0 -0
  114. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/stereo.py +0 -0
  115. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/util/__init__.py +0 -0
  116. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/util/feature_extraction.py +0 -0
  117. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/util/strings.py +0 -0
  118. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/datasets/util/vocabulary.py +0 -0
  119. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/engine/__init__.py +0 -0
  120. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/engine/base.py +0 -0
  121. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/engine/batch.py +0 -0
  122. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/__init__.py +0 -0
  123. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/__main__.py +0 -0
  124. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  125. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  126. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  127. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  128. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  129. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  130. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  131. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  132. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  133. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  134. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  135. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  136. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  137. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  138. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  139. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  140. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  141. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  142. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  143. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  144. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  145. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  146. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  147. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  148. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  149. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/__init__.py +0 -0
  150. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/graph_editor/README.md +0 -0
  151. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/graph_editor/__init__.py +0 -0
  152. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/graph_editor/edit.py +0 -0
  153. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/graph_editor/reroute.py +0 -0
  154. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/graph_editor/select.py +0 -0
  155. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/graph_editor/subgraph.py +0 -0
  156. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/graph_editor/transform.py +0 -0
  157. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/extern/graph_editor/util.py +0 -0
  158. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/forward_iface.py +0 -0
  159. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/__init__.py +0 -0
  160. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_backend.py +0 -0
  161. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_native/__init__.py +0 -0
  162. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_native/backend.cpp +0 -0
  163. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_native/backend.hpp +0 -0
  164. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_native/module.cpp +0 -0
  165. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_native/module.hpp +0 -0
  166. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_native/py_utils.hpp +0 -0
  167. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  168. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  169. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_numpy_backend.py +0 -0
  170. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_random_journal.py +0 -0
  171. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/_utils.py +0 -0
  172. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/array_.py +0 -0
  173. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/audio/__init__.py +0 -0
  174. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/audio/mel.py +0 -0
  175. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/audio/specaugment.py +0 -0
  176. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/backend.py +0 -0
  177. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/build_from_dict.py +0 -0
  178. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/cond.py +0 -0
  179. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/const.py +0 -0
  180. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/container.py +0 -0
  181. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/control_flow_ctx.py +0 -0
  182. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/conv.py +0 -0
  183. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/conversions/__init__.py +0 -0
  184. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  185. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/conversions/torch_nn.py +0 -0
  186. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/decoder/__init__.py +0 -0
  187. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/decoder/transformer.py +0 -0
  188. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/device.py +0 -0
  189. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/dims.py +0 -0
  190. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/dropout.py +0 -0
  191. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/dtype.py +0 -0
  192. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/encoder/__init__.py +0 -0
  193. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/encoder/base.py +0 -0
  194. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/encoder/conformer.py +0 -0
  195. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/encoder/conformer_v2.py +0 -0
  196. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/encoder/e_branchformer.py +0 -0
  197. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/encoder/transformer.py +0 -0
  198. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/gradient.py +0 -0
  199. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/graph.py +0 -0
  200. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/hooks.py +0 -0
  201. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/init.py +0 -0
  202. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/label_smoothing.py +0 -0
  203. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/linear.py +0 -0
  204. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/loop.py +0 -0
  205. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/loss.py +0 -0
  206. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/math_.py +0 -0
  207. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/matmul.py +0 -0
  208. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/module.py +0 -0
  209. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/normalization.py +0 -0
  210. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/parameter.py +0 -0
  211. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/parametrizations.py +0 -0
  212. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/parametrize.py +0 -0
  213. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/piecewise_linear.py +0 -0
  214. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/rand.py +0 -0
  215. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/rec.py +0 -0
  216. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/reduce.py +0 -0
  217. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/run_ctx.py +0 -0
  218. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/signal.py +0 -0
  219. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/state.py +0 -0
  220. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/stepwise_scheduler.py +0 -0
  221. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/tensor_array.py +0 -0
  222. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/frontend/types.py +0 -0
  223. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/import_/__init__.py +0 -0
  224. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/import_/common.py +0 -0
  225. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/import_/git.py +0 -0
  226. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/import_/import_.py +0 -0
  227. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/learning_rate_control.py +0 -0
  228. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/log.py +0 -0
  229. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/native_op.cpp +0 -0
  230. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/native_op.py +0 -0
  231. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/pretrain.py +0 -0
  232. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/sprint/__init__.py +0 -0
  233. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/sprint/cache.py +0 -0
  234. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/sprint/control.py +0 -0
  235. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/sprint/error_signals.py +0 -0
  236. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/sprint/extern_interface.py +0 -0
  237. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/sprint/interface.py +0 -0
  238. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/README.md +0 -0
  239. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/__init__.py +0 -0
  240. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/_dim_extra.py +0 -0
  241. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/_tensor_extra.py +0 -0
  242. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/_tensor_mixin_base.py +0 -0
  243. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/_tensor_op_overloads.py +0 -0
  244. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/control_flow_ctx.py +0 -0
  245. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/dim.py +0 -0
  246. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/marked_dim.py +0 -0
  247. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/tensor.py +0 -0
  248. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/tensor_dict.py +0 -0
  249. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tensor/utils.py +0 -0
  250. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/__init__.py +0 -0
  251. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/compat.py +0 -0
  252. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/data_pipeline.py +0 -0
  253. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/distributed.py +0 -0
  254. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/engine.py +0 -0
  255. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/README.md +0 -0
  256. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/__init__.py +0 -0
  257. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/_backend.py +0 -0
  258. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/_utils.py +0 -0
  259. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/cond.py +0 -0
  260. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  261. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  262. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/dims.py +0 -0
  263. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/layer.py +0 -0
  264. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/loop.py +0 -0
  265. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/make_layer.py +0 -0
  266. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  267. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  268. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  269. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_low_level/__init__.py +0 -0
  270. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/frontend_low_level/_backend.py +0 -0
  271. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/horovod.py +0 -0
  272. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/hyper_param_tuning.py +0 -0
  273. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/layers/__init__.py +0 -0
  274. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/layers/base.py +0 -0
  275. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/layers/basic.py +0 -0
  276. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/layers/rec.py +0 -0
  277. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/layers/segmental_model.py +0 -0
  278. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/layers/signal_processing.py +0 -0
  279. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/layers/variable.py +0 -0
  280. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/native_op.py +0 -0
  281. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/network.py +0 -0
  282. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/sprint.py +0 -0
  283. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/updater.py +0 -0
  284. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/util/__init__.py +0 -0
  285. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/util/basic.py +0 -0
  286. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/util/data.py +0 -0
  287. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/util/gradient_checkpoint.py +0 -0
  288. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/util/ken_lm.py +0 -0
  289. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/tf/util/open_fst.py +0 -0
  290. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/README.md +0 -0
  291. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/__init__.py +0 -0
  292. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/data/__init__.py +0 -0
  293. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/data/extern_data.py +0 -0
  294. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/data/pipeline.py +0 -0
  295. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/data/queued_data_iter.py +0 -0
  296. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  297. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/data/tensor_utils.py +0 -0
  298. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/distributed.py +0 -0
  299. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/engine.py +0 -0
  300. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/frontend/__init__.py +0 -0
  301. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/frontend/_backend.py +0 -0
  302. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/frontend/_rand.py +0 -0
  303. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/frontend/bridge.py +0 -0
  304. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/frontend/raw_ops.py +0 -0
  305. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/optim/README.md +0 -0
  306. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/optim/__init__.py +0 -0
  307. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/optim/lion.py +0 -0
  308. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/updater.py +0 -0
  309. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/README.md +0 -0
  310. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/__init__.py +0 -0
  311. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/array_.py +0 -0
  312. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/debug_inf_nan.py +0 -0
  313. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/diagnose_gpu.py +0 -0
  314. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/exception_helper.py +0 -0
  315. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/gradient_checkpoint.py +0 -0
  316. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/module.py +0 -0
  317. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/torch/util/scaled_gradient.py +0 -0
  318. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/__init__.py +0 -0
  319. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/basic.py +0 -0
  320. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/better_exchook.py +0 -0
  321. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/bpe.py +0 -0
  322. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/debug_helpers.py +0 -0
  323. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/file_cache.py +0 -0
  324. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/fsa.py +0 -0
  325. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/literal_py_to_pickle.py +0 -0
  326. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/math.py +0 -0
  327. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  328. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/native_code_compiler.py +0 -0
  329. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/pprint.py +0 -0
  330. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/py_compat.py +0 -0
  331. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/py_ext_mod_compiler.py +0 -0
  332. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/result_with_reason.py +0 -0
  333. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/sig_proc.py +0 -0
  334. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/task_system.py +0 -0
  335. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/train_proc_manager.py +0 -0
  336. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn/util/watch_memory.py +0 -0
  337. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn.egg-info/dependency_links.txt +0 -0
  338. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/returnn.egg-info/top_level.txt +0 -0
  339. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/rnn.py +0 -0
  340. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/setup.cfg +0 -0
  341. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/setup.py +0 -0
  342. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/DummySprintExec.py +0 -0
  343. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm-inspection-profile.xml +0 -0
  344. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/.gitignore +0 -0
  345. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/.name +0 -0
  346. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  347. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  348. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  349. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  350. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  351. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/misc.xml +0 -0
  352. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/modules.xml +0 -0
  353. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/returnn.iml +0 -0
  354. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  355. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/_set_num_threads1.py +0 -0
  356. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/_setup_returnn_env.py +0 -0
  357. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/_setup_test_env.py +0 -0
  358. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/bpe-unicode-demo.codes +0 -0
  359. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/bpe-unicode-demo.vocab +0 -0
  360. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/lexicon_opt.fst +0 -0
  361. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/lexicon_opt.isyms +0 -0
  362. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/lexicon_opt.jpg +0 -0
  363. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/lexicon_opt.osyms +0 -0
  364. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/lint_common.py +0 -0
  365. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/pycharm-inspect.py +0 -0
  366. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/pylint.py +0 -0
  367. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/returnn-as-framework.py +0 -0
  368. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/rf_utils.py +0 -0
  369. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/spelling.dic +0 -0
  370. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_Config.py +0 -0
  371. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_Dataset.py +0 -0
  372. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_Fsa.py +0 -0
  373. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_GeneratingDataset.py +0 -0
  374. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_HDFDataset.py +0 -0
  375. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_LearningRateControl.py +0 -0
  376. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_Log.py +0 -0
  377. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_MultiProcDataset.py +0 -0
  378. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_Pretrain.py +0 -0
  379. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_ResNet.py +0 -0
  380. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_SprintDataset.py +0 -0
  381. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_SprintInterface.py +0 -0
  382. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TFEngine.py +0 -0
  383. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TFNativeOp.py +0 -0
  384. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TFNetworkLayer.py +0 -0
  385. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TFNetworkRecLayer.py +0 -0
  386. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TFNetworkSigProcLayer.py +0 -0
  387. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TFUpdater.py +0 -0
  388. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TFUtil.py +0 -0
  389. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TF_determinism.py +0 -0
  390. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TaskSystem.py +0 -0
  391. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TaskSystem_SharedMem.py +0 -0
  392. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_TranslationDataset.py +0 -0
  393. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_Util.py +0 -0
  394. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_demos.py +0 -0
  395. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_fork_exec.py +0 -0
  396. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_hdf_dump.py +0 -0
  397. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_array.py +0 -0
  398. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_base.py +0 -0
  399. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_cond.py +0 -0
  400. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_const.py +0 -0
  401. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_container.py +0 -0
  402. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_conv.py +0 -0
  403. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_decoder_transformer.py +0 -0
  404. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_encoder_conformer.py +0 -0
  405. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_gradient.py +0 -0
  406. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_label_smoothing.py +0 -0
  407. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_loop.py +0 -0
  408. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_math.py +0 -0
  409. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_normalization.py +0 -0
  410. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_piecewise_linear.py +0 -0
  411. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_rec.py +0 -0
  412. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_reduce.py +0 -0
  413. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_rf_signal.py +0 -0
  414. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_tensor.py +0 -0
  415. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_tools.py +0 -0
  416. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_torch_dataset.py +0 -0
  417. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_torch_engine.py +0 -0
  418. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_torch_frontend.py +0 -0
  419. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_torch_internal_frontend.py +0 -0
  420. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/test_torch_util.py +0 -0
  421. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tests/torch_utils.py +0 -0
  422. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/_setup_returnn_env.py +0 -0
  423. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/analyze-dataset-batches.py +0 -0
  424. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/bliss-collect-seq-lens.py +0 -0
  425. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/bliss-dump-text.py +0 -0
  426. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/bliss-get-segment-names.py +0 -0
  427. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/bliss-to-ogg-zip.py +0 -0
  428. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/bpe-create-lexicon.py +0 -0
  429. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/calculate-word-error-rate.py +0 -0
  430. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/cleanup-old-models.py +0 -0
  431. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/collect-orth-symbols.py +0 -0
  432. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/collect-words.py +0 -0
  433. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/compile_native_op.py +0 -0
  434. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/compile_tf_graph.py +0 -0
  435. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/debug-dump-search-scores.py +0 -0
  436. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/debug-plot-search-scores.py +0 -0
  437. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/dump-dataset-raw-strings.py +0 -0
  438. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/dump-dataset.py +0 -0
  439. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/dump-forward-stats.py +0 -0
  440. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/dump-forward.py +0 -0
  441. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/dump-network-json.py +0 -0
  442. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/dump-pickle.py +0 -0
  443. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/extract_state_tying_from_dataset.py +0 -0
  444. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/get-attention-weights.py +0 -0
  445. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/get-best-model-epoch.py +0 -0
  446. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/hdf_dump.py +0 -0
  447. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/hdf_dump_translation_dataset.py +0 -0
  448. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/import-blocks-mt-model.py +0 -0
  449. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/import-t2t-mt-model.py +0 -0
  450. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/.gitignore +0 -0
  451. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/Makefile +0 -0
  452. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/README.md +0 -0
  453. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/README.md +0 -0
  454. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/libs_list +0 -0
  455. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  456. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  457. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  458. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/state_vars_list +0 -0
  459. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  460. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/file.h +0 -0
  461. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  462. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  463. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/main.cc +0 -0
  464. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/rescorer.h +0 -0
  465. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/vocabulary.cc +0 -0
  466. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/lattice_rescorer/vocabulary.h +0 -0
  467. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/tf_avg_checkpoints.py +0 -0
  468. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/tf_inspect_checkpoint.py +0 -0
  469. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/tf_inspect_summary_log.py +0 -0
  470. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/torch_avg_checkpoints.py +0 -0
  471. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/torch_export_to_onnx.py +0 -0
  472. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/torch_inspect_checkpoint.py +0 -0
  473. {returnn-1.20250110.132842 → returnn-1.20250114.164134}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250110.132842
3
+ Version: 1.20250114.164134
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20250114.164134'
2
+ long_version = '1.20250114.164134+git.4c8ef4c'
@@ -1,5 +1,4 @@
1
1
  numpy
2
2
  h5py
3
- typing
4
3
  # Nest alternative, provides `tree`: https://github.com/rwth-i6/returnn/issues/1314
5
4
  dm-tree
@@ -138,11 +138,13 @@ class PostprocessingDataset(CachedDataset2):
138
138
  self._in_tensor_dict_template = TensorDict(
139
139
  {name: self._make_tensor_template_from_input(name) for name in self._dataset.get_data_keys()}
140
140
  )
141
+ self.labels = {}
141
142
  if self._map_outputs is not None:
142
143
  self._out_tensor_dict_template = TensorDict()
143
144
  self._out_tensor_dict_template.update(self._map_outputs, auto_convert=True)
144
145
  else:
145
146
  self._out_tensor_dict_template = self._in_tensor_dict_template.copy_template()
147
+ self.labels = self._dataset.labels.copy()
146
148
  # update only after _out_tensor_dict_template has been created from _in_tensor_dict_template
147
149
  self._in_tensor_dict_template.update({"seq_tag": {"dims": (), "dtype": "string"}}, auto_convert=True)
148
150
  self.num_outputs = {
@@ -152,8 +154,9 @@ class PostprocessingDataset(CachedDataset2):
152
154
  self._default_input = "data" if "data" in self.num_outputs else next(iter(self.num_outputs.keys()))
153
155
  self.num_inputs = self.num_outputs[self._default_input][0]
154
156
 
155
- self.labels = {}
156
157
  for k, t in self._out_tensor_dict_template.data.items():
158
+ if self.labels.get(k):
159
+ continue
157
160
  if t.vocab:
158
161
  self.labels[k] = t.vocab.labels
159
162
  elif t.sparse_dim: # sparse_dim but not vocab
@@ -0,0 +1,208 @@
1
+ """
2
+ Cache, to store some data.
3
+ See :class:`Cache`.
4
+
5
+ One use case example is :func:`sinusoidal_positional_encoding` and :func:`relative_positional_encoding`.
6
+ """
7
+
8
+ from __future__ import annotations
9
+ from typing import Optional, Union, Any, Type, Callable, Tuple, Dict
10
+ from weakref import ref
11
+ import tree
12
+ from returnn.util.lru_cache import lru_cache
13
+ from returnn.tensor import Tensor, Dim
14
+ import returnn.frontend as rf
15
+ from returnn.frontend._backend import global_backend, get_backend_by_raw_tensor_type, Backend
16
+
17
+
18
+ class Cache:
19
+ """
20
+ Cache, intended for internal use of RF functions.
21
+
22
+ One use case example is :func:`sinusoidal_positional_encoding` and :func:`relative_positional_encoding`.
23
+
24
+ There are some specific properties we must take care of:
25
+
26
+ - Lifetime of values: For graph-based backends, it can only stay alive for the current run ctx.
27
+ (For eager-based backends, there is no such restriction.)
28
+ - Size: Put some limit, use LRU logic.
29
+ - Dims: Use only weakrefs. Some Dim should not stay alive just because of the cache.
30
+ - Scalar dynamic Dims in eager mode, or static dims: Instead of the Dim, use the dim value for the key
31
+ (and map the output to the Dim).
32
+ - Tensor as keys: Use weakrefs. Also don't check by value but by identity.
33
+ """
34
+
35
+ def __init__(self, max_size: int):
36
+ # Use lru_cache here, but not via a decorator,
37
+ # as we want custom set/get logic.
38
+ # Also, we want the lru_cache to be local to this Cache instance,
39
+ # not shared over all instances of this class.
40
+ self._lru_cache = lru_cache(max_size)(_lru_cache_dummy_func)
41
+
42
+ def get(self, key, default=None):
43
+ """
44
+ :param key:
45
+ :param default:
46
+ :return: entry in cache or default
47
+ """
48
+ key_transformed = _transform_key(key)
49
+ key_transformed_orig, value = self._lru_cache.cache_peek(key_transformed, fallback=(None, None))
50
+ if key_transformed_orig is None:
51
+ return default
52
+
53
+ assert len(key_transformed_orig) == len(key_transformed)
54
+ dim_map = {} # orig -> new
55
+ for key_item_orig, key_item in zip(key_transformed_orig, key_transformed):
56
+ if isinstance(key_item_orig, DimWrapper):
57
+ assert isinstance(key_item, DimWrapper)
58
+ dim_orig = key_item_orig.dim_ref()
59
+ dim = key_item.dim_ref()
60
+ assert isinstance(dim_orig, Dim) and isinstance(dim, Dim)
61
+ dim_map[dim_orig] = dim
62
+
63
+ # noinspection PyShadowingNames
64
+ def _map_output(output):
65
+ if isinstance(output, Dim):
66
+ return dim_map.get(output, output)
67
+ if isinstance(output, Tensor):
68
+ if any(dim in dim_map for dim in output.dims):
69
+ out_raw = output.raw_tensor
70
+ for axis, dim in enumerate(output.dims):
71
+ if dim in dim_map:
72
+ output = output.copy_template_replace_dim_tag(axis=axis, new_dim_tag=dim_map[dim])
73
+ output.raw_tensor = out_raw
74
+ return output
75
+
76
+ return tree.map_structure(_map_output, value)
77
+
78
+ def set(self, key, value):
79
+ """
80
+ :param key:
81
+ :param value:
82
+ """
83
+
84
+ def _finalize_callback(*_args):
85
+ self._lru_cache.cache_pop(key_transformed, fallback=None)
86
+
87
+ key_backend = _get_backend(key)
88
+ value_backend = _get_backend(value)
89
+ if key_backend != value_backend:
90
+ raise ValueError(f"key and value have different backends: {key_backend} != {value_backend}")
91
+ key_transformed = _transform_key(key, finalize_callback=_finalize_callback)
92
+ self._lru_cache.cache_set(key_transformed, result=(key_transformed, value))
93
+
94
+
95
+ def _lru_cache_dummy_func(*_args, **_kwargs):
96
+ raise Exception("This should not be called.")
97
+
98
+
99
+ def _transform_key(
100
+ key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Optional[Dict[Dim, DimWrapper]] = None
101
+ ) -> Tuple[Union[Type[Backend], ref[rf.RunCtx], _KeyItemType], ...]:
102
+ backend = _get_backend(key)
103
+ keys_flat = [backend]
104
+ if not backend.executing_eagerly():
105
+ # See comment above: If graph-mode, the cached value becomes invalid
106
+ # when the current run ctx goes out of scope.
107
+ keys_flat.append(ref(rf.get_run_ctx(), finalize_callback))
108
+ if collected_dim_map is None:
109
+ collected_dim_map = {}
110
+ keys_flat += [
111
+ _transform_key_item(key, finalize_callback=finalize_callback, collected_dim_map=collected_dim_map)
112
+ for key in tree.flatten(key)
113
+ ]
114
+ return tuple(keys_flat)
115
+
116
+
117
+ def _transform_key_item(
118
+ key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Dict[Dim, DimWrapper]
119
+ ) -> _KeyItemType:
120
+ if isinstance(key, Tensor):
121
+ return TensorWrapper(key, finalize_callback=finalize_callback)
122
+ if isinstance(key, Dim):
123
+ if key in collected_dim_map:
124
+ return collected_dim_map[key]
125
+ dim_wrapper = DimWrapper(key, finalize_callback=finalize_callback)
126
+ collected_dim_map[key] = dim_wrapper
127
+ return dim_wrapper
128
+ if not isinstance(key, _RawTypes):
129
+ raise TypeError(f"unexpected type {type(key)}")
130
+ return key
131
+
132
+
133
+ def _get_backend(*args) -> Type[Backend]:
134
+ args_flat = tree.flatten(args)
135
+ for arg in args_flat:
136
+ if isinstance(arg, Tensor) and arg.raw_tensor is not None:
137
+ return get_backend_by_raw_tensor_type(type(arg.raw_tensor))
138
+ return global_backend.__class__
139
+
140
+
141
+ class TensorWrapper:
142
+ """
143
+ Wraps :class:`Tensor`.
144
+ Using weakref for the tensor, including also ``raw_tensor``.
145
+ Equality is given if the identity is the same, for the Tensor itself and the raw_tensor.
146
+ No value of the tensor is checked.
147
+ """
148
+
149
+ def __init__(self, value: Tensor, *, finalize_callback):
150
+ self.value_ref = ref(value, finalize_callback)
151
+ self.raw_value_ref = ref(value.raw_tensor, finalize_callback)
152
+ self._hash = id(value)
153
+
154
+ def __eq__(self, other):
155
+ if isinstance(other, TensorWrapper):
156
+ return self.value_ref() is other.value_ref() and self.raw_value_ref() is other.raw_value_ref()
157
+ return False
158
+
159
+ def __hash__(self):
160
+ return self._hash
161
+
162
+
163
+ class DimWrapper:
164
+ """
165
+ Wraps :class:`Dim`.
166
+ Using weakref for the dim.
167
+ If the size is scalar and known, equality is given when the size is equal (and dim tag is ignored)
168
+ """
169
+
170
+ def __init__(self, dim: Dim, *, finalize_callback):
171
+ self.dim_value = _dim_value_for_key(dim)
172
+ # finalize_callback only needed when we don't use the dim value.
173
+ self.dim_ref = ref(dim, finalize_callback if self.dim_value is None else None)
174
+ self.dyn_size_ref = (
175
+ # E.g. consider the batch dim or data spatial dim which would be reset each step.
176
+ # We need some ref to the dyn size, and finalize this key when it goes out of scope.
177
+ # This is only needed when there is no info on the static size (or eager scalar dyn size).
178
+ ref(dim.dyn_size_ext.raw_tensor, finalize_callback)
179
+ if self.dim_value is None and dim.dyn_size_ext and dim.dyn_size_ext.raw_tensor is not None
180
+ else None
181
+ )
182
+ self._hash = hash(dim) if self.dim_value is None else hash(self.dim_value)
183
+
184
+ def __eq__(self, other):
185
+ if isinstance(other, DimWrapper):
186
+ if self.dim_value is not None:
187
+ return self.dim_value == other.dim_value
188
+ return self.dim_ref() == other.dim_ref() and self.dyn_size_ref() is other.dyn_size_ref()
189
+ return False
190
+
191
+ def __hash__(self):
192
+ return self._hash
193
+
194
+
195
+ def _dim_value_for_key(dim: Dim) -> Optional[int]:
196
+ if dim.size is not None:
197
+ return dim.size
198
+ if dim.dyn_size_ext and not dim.dyn_size_ext.dims:
199
+ if dim.dyn_size_ext.raw_tensor is not None:
200
+ # noinspection PyProtectedMember
201
+ if dim.dyn_size_ext._raw_backend.executing_eagerly():
202
+ return int(dim.get_dim_value())
203
+ return None
204
+
205
+
206
+ # For now... we might extend it by some more types.
207
+ _KeyItemType = Union[None, str, bool, int, float, TensorWrapper, DimWrapper]
208
+ _RawTypes = (type(None), str, bool, int, float)
@@ -4,10 +4,10 @@ Attention
4
4
 
5
5
  from __future__ import annotations
6
6
  from typing import Tuple, Union, Optional, Sequence
7
- import weakref
8
7
  import logging
9
8
  from returnn.tensor import Tensor, Dim, single_step_dim
10
9
  import returnn.frontend as rf
10
+ from returnn.frontend._cache import Cache
11
11
 
12
12
 
13
13
  __all__ = [
@@ -330,7 +330,7 @@ class RotaryPosCausalSelfAttention(CausalSelfAttention):
330
330
  q = _apply_rope(
331
331
  q,
332
332
  (
333
- rf.gather(pos_enc, axis=hist_dim, indices=hist_dim.get_size_tensor() - 1)
333
+ rf.gather(pos_enc, axis=hist_dim, indices=rf.last_frame_position_of_dim(hist_dim))
334
334
  if axis == single_step_dim
335
335
  else rf.replace_dim(pos_enc, in_dim=hist_dim, out_dim=axis)[0]
336
336
  ),
@@ -892,7 +892,7 @@ def _make_indices(
892
892
  return indices, out_spatial_dim
893
893
 
894
894
 
895
- _relative_positional_encoding_cache = weakref.WeakKeyDictionary() # run ctx -> (spatial_dim, feat_dim) -> enc
895
+ _relative_positional_encoding_cache = Cache(128)
896
896
 
897
897
 
898
898
  def relative_positional_encoding(
@@ -924,10 +924,10 @@ def relative_positional_encoding(
924
924
  """
925
925
  if not dtype:
926
926
  dtype = rf.get_default_float_dtype()
927
- cache = _relative_positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
928
927
  cache_key = (query_spatial_dim, key_value_spatial_dim, feat_dim, query_offset, dtype)
929
- if cache_key in cache:
930
- return cache[cache_key]
928
+ cache_entry = _relative_positional_encoding_cache.get(cache_key)
929
+ if cache_entry is not None:
930
+ return cache_entry
931
931
  import math
932
932
 
933
933
  with rf.control_flow_ctx(None):
@@ -946,11 +946,11 @@ def relative_positional_encoding(
946
946
  allow_missing_implicit_dims=True,
947
947
  )
948
948
  emb.feature_dim = feat_dim
949
- cache[cache_key] = emb, out_spatial_dim
949
+ _relative_positional_encoding_cache.set(cache_key, (emb, out_spatial_dim))
950
950
  return emb, out_spatial_dim
951
951
 
952
952
 
953
- _sinusoidal_positional_encoding_cache = weakref.WeakKeyDictionary() # run ctx -> (spatial_dim, feat_dim) -> enc
953
+ _sinusoidal_positional_encoding_cache = Cache(128) # (spatial_dim, feat_dim) -> enc
954
954
 
955
955
 
956
956
  def sinusoidal_positional_encoding(
@@ -982,10 +982,10 @@ def sinusoidal_positional_encoding(
982
982
  dtype = rf.get_default_float_dtype()
983
983
  if not device:
984
984
  device = rf.get_default_device()
985
- cache = _sinusoidal_positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
986
985
  cache_key = (spatial_dim, feat_dim, offset, base, dtype, device)
987
- if cache_key in cache:
988
- return cache[cache_key]
986
+ cache_entry = _sinusoidal_positional_encoding_cache.get(cache_key)
987
+ if cache_entry is not None:
988
+ return cache_entry
989
989
  import math
990
990
 
991
991
  with rf.control_flow_ctx(None):
@@ -1012,7 +1012,7 @@ def sinusoidal_positional_encoding(
1012
1012
  {spatial_dim, feat_dim} if spatial_dim != single_step_dim else {feat_dim}, allow_missing_implicit_dims=True
1013
1013
  )
1014
1014
  emb.feature_dim = feat_dim
1015
- cache[cache_key] = emb
1015
+ _sinusoidal_positional_encoding_cache.set(cache_key, emb)
1016
1016
  return emb
1017
1017
 
1018
1018
 
@@ -8,6 +8,7 @@ import returnn.frontend as rf
8
8
  from returnn.frontend.decoder.transformer import TransformerDecoder, TransformerDecoderLayer, FeedForwardGated
9
9
 
10
10
  if TYPE_CHECKING:
11
+ # noinspection PyUnresolvedReferences,PyPackageRequirements,PyProtectedMember
11
12
  from transformers.models.llama.modeling_llama import (
12
13
  LlamaModel,
13
14
  LlamaForCausalLM,
@@ -25,6 +26,8 @@ def import_params_hf_llama_to_rf_transformer_decoder(
25
26
  Import params from HF Llama model to RF :class:`TransformerDecoder`.
26
27
  """
27
28
  import torch
29
+
30
+ # noinspection PyUnresolvedReferences,PyPackageRequirements,PyProtectedMember
28
31
  from transformers.models.llama.modeling_llama import LlamaModel, LlamaForCausalLM, LlamaDecoderLayer
29
32
 
30
33
  print("HF Model:")
@@ -206,10 +209,10 @@ def import_params_hf_llama_att_to_rf_rotary_att(model_hf: LlamaAttention, model_
206
209
  """
207
210
  import torch
208
211
 
209
- assert model_hf.num_heads == model_rf.num_heads.dimension
210
- assert model_hf.hidden_size == model_rf.in_dim.dimension
211
- dim = model_hf.hidden_size
212
- nh = model_hf.num_heads
212
+ assert model_hf.config.num_attention_heads == model_rf.num_heads.dimension
213
+ assert model_hf.config.hidden_size == model_rf.in_dim.dimension
214
+ dim = model_hf.config.hidden_size
215
+ nh = model_hf.config.num_attention_heads
213
216
  hdim = dim // nh
214
217
 
215
218
  print("HF Model:")
@@ -182,6 +182,19 @@ def init_better_exchook():
182
182
 
183
183
  sys.excepthook = excepthook
184
184
 
185
+ def threading_excepthook(args, /):
186
+ """
187
+ Thread-specific excepthook to ensure the main thread is killed on unhandled exceptions in sub threads.
188
+ """
189
+ log_out = log.v1 or sys.stdout
190
+ print(
191
+ f"Unhandled exception in thread {threading.current_thread()}, going to interrupt main thread:", file=log_out
192
+ )
193
+ better_exchook(args.exc_type, args.exc_value, args.exc_traceback, autodebugshell=False, file=log_out)
194
+ thread.interrupt_main()
195
+
196
+ threading.excepthook = threading_excepthook
197
+
185
198
  from returnn.util.basic import to_bool
186
199
 
187
200
  if os.environ.get("DEBUG_WARN_WITH_TRACEBACK") and to_bool(os.environ.get("DEBUG_WARN_WITH_TRACEBACK")):