returnn 1.20240830.140746__tar.gz → 1.20240903.205823__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (463) hide show
  1. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/PKG-INFO +1 -1
  2. returnn-1.20240903.205823/_setup_info_generated.py +2 -0
  3. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_numpy_backend.py +15 -1
  4. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/_dim_extra.py +124 -2
  5. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/config_entry_points.py +3 -0
  6. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/data.py +1 -1
  7. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn.egg-info/PKG-INFO +1 -1
  8. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/rf_utils.py +4 -0
  9. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_array.py +1 -1
  10. returnn-1.20240903.205823/tests/test_rf_decoder_transformer.py +324 -0
  11. returnn-1.20240830.140746/_setup_info_generated.py +0 -2
  12. returnn-1.20240830.140746/tests/test_rf_decoder_transformer.py +0 -163
  13. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/.editorconfig +0 -0
  14. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/.gitignore +0 -0
  15. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/.gitmodules +0 -0
  16. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/.kateconfig +0 -0
  17. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/CHANGELOG.md +0 -0
  18. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/CODEOWNERS +0 -0
  19. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/CONTRIBUTING.md +0 -0
  20. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/LICENSE +0 -0
  21. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/MANIFEST.in +0 -0
  22. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/README.rst +0 -0
  23. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/__init__.py +0 -0
  24. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/12AX.cluster_map +0 -0
  25. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/_setup_returnn_env.py +0 -0
  26. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-fwd.config +0 -0
  27. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-horovod-mpi.py +0 -0
  28. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-horovod-mpi.py.sh +0 -0
  29. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-horovod-mpi.sh +0 -0
  30. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-hyper-param-tuning.config +0 -0
  31. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-iter-dataset.py +0 -0
  32. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-list-devices.py +0 -0
  33. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-lua-torch-layer.config +0 -0
  34. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-pretrain.config +0 -0
  35. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-record-and-push-to-webserver.py +0 -0
  36. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-returnn-as-framework.py +0 -0
  37. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-rf-pt-benchmark.py +0 -0
  38. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-rf.config +0 -0
  39. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-rhn-enwik8.config +0 -0
  40. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-sprint-interface.py +0 -0
  41. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-att-copy.config +0 -0
  42. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-attention.config +0 -0
  43. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  44. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  45. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-enc-dec.config +0 -0
  46. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-hard-att-copy.config +0 -0
  47. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-lstm-benchmark.py +0 -0
  48. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  49. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  50. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-native-lstm.12ax.config +0 -0
  51. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  52. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  53. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  54. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  55. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  56. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-rec-self-att.config +0 -0
  57. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-search-compiled-graph.py +0 -0
  58. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  59. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-timit-lstm-ctc.config +0 -0
  60. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-torch.config +0 -0
  61. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  62. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo.sh +0 -0
  63. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  64. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  65. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  66. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/README.txt +0 -0
  67. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/chars.txt +0 -0
  68. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/config_demo +0 -0
  69. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/config_fwd +0 -0
  70. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/config_real +0 -0
  71. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  72. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/decode.py +0 -0
  73. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  74. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/go.sh +0 -0
  75. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/lines.txt +0 -0
  76. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/split/eval.txt +0 -0
  77. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/split/train.txt +0 -0
  78. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/split/valid.txt +0 -0
  79. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/README.md +0 -0
  80. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  81. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial/forwardconfig +0 -0
  82. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial/go.sh +0 -0
  83. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial/trainconfig +0 -0
  84. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  85. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  86. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  87. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  88. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/pyproject.toml +0 -0
  89. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/requirements.txt +0 -0
  90. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/__init__.py +0 -0
  91. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/__main__.py +0 -0
  92. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/__old_mod_loader__.py +0 -0
  93. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/__setup__.py +0 -0
  94. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/config.py +0 -0
  95. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/__init__.py +0 -0
  96. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/audio.py +0 -0
  97. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/basic.py +0 -0
  98. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/bundle_file.py +0 -0
  99. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/cached.py +0 -0
  100. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/cached2.py +0 -0
  101. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/distrib_files.py +0 -0
  102. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/generating.py +0 -0
  103. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/hdf.py +0 -0
  104. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/lm.py +0 -0
  105. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/map.py +0 -0
  106. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/meta.py +0 -0
  107. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/multi_proc.py +0 -0
  108. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/normalization_data.py +0 -0
  109. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/numpy_dump.py +0 -0
  110. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/postprocessing.py +0 -0
  111. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/raw_wav.py +0 -0
  112. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/sprint.py +0 -0
  113. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/stereo.py +0 -0
  114. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/util/__init__.py +0 -0
  115. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/util/feature_extraction.py +0 -0
  116. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/util/strings.py +0 -0
  117. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/util/vocabulary.py +0 -0
  118. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/engine/__init__.py +0 -0
  119. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/engine/base.py +0 -0
  120. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/engine/batch.py +0 -0
  121. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/__init__.py +0 -0
  122. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/__main__.py +0 -0
  123. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  124. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  125. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  126. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  127. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  128. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  129. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  130. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  131. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  132. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  133. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  134. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  135. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  136. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  137. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  138. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  139. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  140. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  141. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  142. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  143. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  144. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  145. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  146. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  147. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  148. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/__init__.py +0 -0
  149. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/README.md +0 -0
  150. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/__init__.py +0 -0
  151. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/edit.py +0 -0
  152. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/reroute.py +0 -0
  153. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/select.py +0 -0
  154. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/subgraph.py +0 -0
  155. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/transform.py +0 -0
  156. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/util.py +0 -0
  157. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/forward_iface.py +0 -0
  158. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/__init__.py +0 -0
  159. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_backend.py +0 -0
  160. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/__init__.py +0 -0
  161. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/backend.cpp +0 -0
  162. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/backend.hpp +0 -0
  163. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/module.cpp +0 -0
  164. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/module.hpp +0 -0
  165. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/py_utils.hpp +0 -0
  166. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  167. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  168. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_random_journal.py +0 -0
  169. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_utils.py +0 -0
  170. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/array_.py +0 -0
  171. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/attention.py +0 -0
  172. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/audio/__init__.py +0 -0
  173. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/audio/mel.py +0 -0
  174. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/audio/specaugment.py +0 -0
  175. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/backend.py +0 -0
  176. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/build_from_dict.py +0 -0
  177. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/cond.py +0 -0
  178. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/const.py +0 -0
  179. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/container.py +0 -0
  180. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/control_flow_ctx.py +0 -0
  181. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conv.py +0 -0
  182. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conversions/__init__.py +0 -0
  183. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  184. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conversions/hf_llama.py +0 -0
  185. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conversions/torch_nn.py +0 -0
  186. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/decoder/__init__.py +0 -0
  187. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/decoder/transformer.py +0 -0
  188. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/device.py +0 -0
  189. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/dims.py +0 -0
  190. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/dropout.py +0 -0
  191. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/dtype.py +0 -0
  192. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/encoder/__init__.py +0 -0
  193. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/encoder/base.py +0 -0
  194. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/encoder/conformer.py +0 -0
  195. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/encoder/e_branchformer.py +0 -0
  196. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/gradient.py +0 -0
  197. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/graph.py +0 -0
  198. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/hooks.py +0 -0
  199. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/init.py +0 -0
  200. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/label_smoothing.py +0 -0
  201. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/linear.py +0 -0
  202. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/loop.py +0 -0
  203. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/loss.py +0 -0
  204. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/math_.py +0 -0
  205. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/matmul.py +0 -0
  206. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/module.py +0 -0
  207. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/normalization.py +0 -0
  208. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/parameter.py +0 -0
  209. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/parametrizations.py +0 -0
  210. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/parametrize.py +0 -0
  211. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/piecewise_linear.py +0 -0
  212. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/rand.py +0 -0
  213. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/rec.py +0 -0
  214. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/reduce.py +0 -0
  215. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/run_ctx.py +0 -0
  216. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/signal.py +0 -0
  217. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/state.py +0 -0
  218. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/stepwise_scheduler.py +0 -0
  219. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/tensor_array.py +0 -0
  220. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/types.py +0 -0
  221. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/import_/__init__.py +0 -0
  222. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/import_/common.py +0 -0
  223. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/import_/git.py +0 -0
  224. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/import_/import_.py +0 -0
  225. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/learning_rate_control.py +0 -0
  226. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/log.py +0 -0
  227. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/native_op.cpp +0 -0
  228. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/native_op.py +0 -0
  229. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/pretrain.py +0 -0
  230. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/__init__.py +0 -0
  231. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/cache.py +0 -0
  232. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/control.py +0 -0
  233. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/error_signals.py +0 -0
  234. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/extern_interface.py +0 -0
  235. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/interface.py +0 -0
  236. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/README.md +0 -0
  237. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/__init__.py +0 -0
  238. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/_tensor_extra.py +0 -0
  239. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/_tensor_mixin_base.py +0 -0
  240. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/_tensor_op_overloads.py +0 -0
  241. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/control_flow_ctx.py +0 -0
  242. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/dim.py +0 -0
  243. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/marked_dim.py +0 -0
  244. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/tensor.py +0 -0
  245. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/tensor_dict.py +0 -0
  246. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/utils.py +0 -0
  247. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/__init__.py +0 -0
  248. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/compat.py +0 -0
  249. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/data_pipeline.py +0 -0
  250. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/distributed.py +0 -0
  251. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/engine.py +0 -0
  252. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/README.md +0 -0
  253. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/__init__.py +0 -0
  254. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/_backend.py +0 -0
  255. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/_utils.py +0 -0
  256. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/cond.py +0 -0
  257. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  258. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/dims.py +0 -0
  259. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/layer.py +0 -0
  260. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/loop.py +0 -0
  261. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/make_layer.py +0 -0
  262. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  263. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  264. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  265. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_low_level/__init__.py +0 -0
  266. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_low_level/_backend.py +0 -0
  267. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/horovod.py +0 -0
  268. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/hyper_param_tuning.py +0 -0
  269. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/__init__.py +0 -0
  270. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/base.py +0 -0
  271. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/basic.py +0 -0
  272. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/rec.py +0 -0
  273. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/segmental_model.py +0 -0
  274. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/signal_processing.py +0 -0
  275. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/variable.py +0 -0
  276. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/native_op.py +0 -0
  277. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/network.py +0 -0
  278. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/sprint.py +0 -0
  279. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/updater.py +0 -0
  280. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/__init__.py +0 -0
  281. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/basic.py +0 -0
  282. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/gradient_checkpoint.py +0 -0
  283. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/ken_lm.py +0 -0
  284. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/open_fst.py +0 -0
  285. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/README.md +0 -0
  286. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/__init__.py +0 -0
  287. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/__init__.py +0 -0
  288. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/extern_data.py +0 -0
  289. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/pipeline.py +0 -0
  290. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/queued_data_iter.py +0 -0
  291. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  292. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/tensor_utils.py +0 -0
  293. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/distributed.py +0 -0
  294. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/engine.py +0 -0
  295. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/__init__.py +0 -0
  296. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/_backend.py +0 -0
  297. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/_rand.py +0 -0
  298. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/bridge.py +0 -0
  299. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/raw_ops.py +0 -0
  300. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/updater.py +0 -0
  301. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/README.md +0 -0
  302. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/__init__.py +0 -0
  303. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/array_.py +0 -0
  304. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/diagnose_gpu.py +0 -0
  305. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/gradient_checkpoint.py +0 -0
  306. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/scaled_gradient.py +0 -0
  307. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/__init__.py +0 -0
  308. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/basic.py +0 -0
  309. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/better_exchook.py +0 -0
  310. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/bpe.py +0 -0
  311. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/debug.py +0 -0
  312. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/debug_helpers.py +0 -0
  313. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/file_cache.py +0 -0
  314. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/fsa.py +0 -0
  315. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/literal_py_to_pickle.py +0 -0
  316. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/math.py +0 -0
  317. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  318. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/native_code_compiler.py +0 -0
  319. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/pprint.py +0 -0
  320. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/py-to-pickle.cpp +0 -0
  321. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/py_compat.py +0 -0
  322. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/py_ext_mod_compiler.py +0 -0
  323. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/result_with_reason.py +0 -0
  324. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/sig_proc.py +0 -0
  325. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/task_system.py +0 -0
  326. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/train_proc_manager.py +0 -0
  327. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/watch_memory.py +0 -0
  328. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn.egg-info/SOURCES.txt +0 -0
  329. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn.egg-info/dependency_links.txt +0 -0
  330. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn.egg-info/top_level.txt +0 -0
  331. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/rnn.py +0 -0
  332. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/setup.cfg +0 -0
  333. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/setup.py +0 -0
  334. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/DummySprintExec.py +0 -0
  335. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm-inspection-profile.xml +0 -0
  336. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/.gitignore +0 -0
  337. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/.name +0 -0
  338. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  339. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  340. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  341. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  342. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  343. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/misc.xml +0 -0
  344. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/modules.xml +0 -0
  345. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/returnn.iml +0 -0
  346. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  347. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/_set_num_threads1.py +0 -0
  348. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/_setup_returnn_env.py +0 -0
  349. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/_setup_test_env.py +0 -0
  350. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/bpe-unicode-demo.codes +0 -0
  351. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/bpe-unicode-demo.vocab +0 -0
  352. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lexicon_opt.fst +0 -0
  353. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lexicon_opt.isyms +0 -0
  354. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lexicon_opt.jpg +0 -0
  355. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lexicon_opt.osyms +0 -0
  356. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lint_common.py +0 -0
  357. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/pycharm-inspect.py +0 -0
  358. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/pylint.py +0 -0
  359. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/returnn-as-framework.py +0 -0
  360. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/spelling.dic +0 -0
  361. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Config.py +0 -0
  362. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Dataset.py +0 -0
  363. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Fsa.py +0 -0
  364. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_GeneratingDataset.py +0 -0
  365. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_HDFDataset.py +0 -0
  366. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_LearningRateControl.py +0 -0
  367. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Log.py +0 -0
  368. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_MultiProcDataset.py +0 -0
  369. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Pretrain.py +0 -0
  370. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_ResNet.py +0 -0
  371. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_SprintDataset.py +0 -0
  372. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_SprintInterface.py +0 -0
  373. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFEngine.py +0 -0
  374. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFNativeOp.py +0 -0
  375. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFNetworkLayer.py +0 -0
  376. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFNetworkRecLayer.py +0 -0
  377. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFNetworkSigProcLayer.py +0 -0
  378. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFUpdater.py +0 -0
  379. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFUtil.py +0 -0
  380. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TF_determinism.py +0 -0
  381. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TaskSystem.py +0 -0
  382. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TaskSystem_SharedMem.py +0 -0
  383. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TranslationDataset.py +0 -0
  384. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Util.py +0 -0
  385. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_demos.py +0 -0
  386. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_fork_exec.py +0 -0
  387. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_hdf_dump.py +0 -0
  388. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_attention.py +0 -0
  389. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_base.py +0 -0
  390. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_cond.py +0 -0
  391. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_const.py +0 -0
  392. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_container.py +0 -0
  393. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_conv.py +0 -0
  394. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_encoder_conformer.py +0 -0
  395. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_gradient.py +0 -0
  396. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_label_smoothing.py +0 -0
  397. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_loop.py +0 -0
  398. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_math.py +0 -0
  399. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_normalization.py +0 -0
  400. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_piecewise_linear.py +0 -0
  401. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_rec.py +0 -0
  402. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_reduce.py +0 -0
  403. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_signal.py +0 -0
  404. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_tensor.py +0 -0
  405. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_tools.py +0 -0
  406. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_dataset.py +0 -0
  407. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_engine.py +0 -0
  408. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_frontend.py +0 -0
  409. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_internal_frontend.py +0 -0
  410. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_util.py +0 -0
  411. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/torch_utils.py +0 -0
  412. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/_setup_returnn_env.py +0 -0
  413. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/analyze-dataset-batches.py +0 -0
  414. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bliss-collect-seq-lens.py +0 -0
  415. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bliss-dump-text.py +0 -0
  416. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bliss-get-segment-names.py +0 -0
  417. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bliss-to-ogg-zip.py +0 -0
  418. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bpe-create-lexicon.py +0 -0
  419. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/calculate-word-error-rate.py +0 -0
  420. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/cleanup-old-models.py +0 -0
  421. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/collect-orth-symbols.py +0 -0
  422. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/collect-words.py +0 -0
  423. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/compile_native_op.py +0 -0
  424. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/compile_tf_graph.py +0 -0
  425. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/debug-dump-search-scores.py +0 -0
  426. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/debug-plot-search-scores.py +0 -0
  427. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-dataset-raw-strings.py +0 -0
  428. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-dataset.py +0 -0
  429. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-forward-stats.py +0 -0
  430. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-forward.py +0 -0
  431. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-network-json.py +0 -0
  432. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-pickle.py +0 -0
  433. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/extract_state_tying_from_dataset.py +0 -0
  434. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/get-attention-weights.py +0 -0
  435. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/get-best-model-epoch.py +0 -0
  436. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/hdf_dump.py +0 -0
  437. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/hdf_dump_translation_dataset.py +0 -0
  438. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/import-blocks-mt-model.py +0 -0
  439. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/import-t2t-mt-model.py +0 -0
  440. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/.gitignore +0 -0
  441. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/Makefile +0 -0
  442. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/README.md +0 -0
  443. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/README.md +0 -0
  444. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/libs_list +0 -0
  445. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  446. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  447. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  448. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/state_vars_list +0 -0
  449. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  450. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/file.h +0 -0
  451. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  452. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  453. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/main.cc +0 -0
  454. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/rescorer.h +0 -0
  455. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/vocabulary.cc +0 -0
  456. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/vocabulary.h +0 -0
  457. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/tf_avg_checkpoints.py +0 -0
  458. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/tf_inspect_checkpoint.py +0 -0
  459. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/tf_inspect_summary_log.py +0 -0
  460. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/torch_avg_checkpoints.py +0 -0
  461. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/torch_export_to_onnx.py +0 -0
  462. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/torch_inspect_checkpoint.py +0 -0
  463. {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20240830.140746
3
+ Version: 1.20240903.205823
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20240903.205823'
2
+ long_version = '1.20240903.205823+git.eb0f22e'
@@ -153,7 +153,10 @@ class NumpyBackend(Backend[numpy.ndarray]):
153
153
  op = NumpyBackend._CombineKindMap.get(kind)
154
154
  if not op:
155
155
  raise ValueError(f"RF NumpyBackend: combine kind {kind!r} not supported")
156
- return op(a, b)
156
+ res = op(a, b)
157
+ if not isinstance(res, numpy.ndarray):
158
+ res = numpy.array(res)
159
+ return res
157
160
 
158
161
  @staticmethod
159
162
  def range_over_dim(dim: Dim, *, dtype: Optional[str] = None, device: Optional[str] = None) -> Tensor[numpy.ndarray]:
@@ -211,3 +214,14 @@ class NumpyBackend(Backend[numpy.ndarray]):
211
214
  sparse_dim=source.sparse_dim,
212
215
  )
213
216
  return res
217
+
218
+ @staticmethod
219
+ def activation_raw(raw_tensor: numpy.ndarray, func: str) -> numpy.ndarray:
220
+ """
221
+ :param raw_tensor:
222
+ :param func: "tanh", "sigmoid", "relu", ...
223
+ :return: raw tensor with elementwise activation applied
224
+ """
225
+ if func == "relu":
226
+ return numpy.array(numpy.maximum(raw_tensor, 0))
227
+ raise NotImplementedError("NumpyBackend: activation %r not implemented" % func)
@@ -4,8 +4,9 @@ or just rarely used attribs, such that we can save memory for the common case.
4
4
  """
5
5
 
6
6
  from __future__ import annotations
7
- from typing import TYPE_CHECKING, Optional, Union, Any, Tuple, Sequence, Dict, List, Set, Callable
7
+ from typing import TYPE_CHECKING, Optional, Union, Any, Tuple, Sequence, MutableMapping, Dict, List, Set, Callable
8
8
  import operator
9
+ import weakref
9
10
 
10
11
  from returnn.util.basic import Entity
11
12
  from returnn.util import basic as util
@@ -118,7 +119,7 @@ class _DimExtra:
118
119
  self.same_for_batch_ctx = {} # type: Dict[Tuple[BatchInfo,Optional[ControlFlowContext]],_d.Dim]
119
120
  self.cache_dyn_size_ext_dev = {} # type: Dict[str,_t.Tensor] # device -> dyn_size_ext
120
121
  self.cache_seq_mask: Dict[Tuple[str, Optional[Tuple[Dim, ...]]], _t.Tensor] = {} # (dev,dim_order) -> seq_mask
121
- self.cache_dim_math: Dict[Tuple[str, Union[Dim, int]], Dim] = {} # op (add,sub,...), operand -> Dim
122
+ self.cache_dim_math = _CacheDimMath() # op (add,sub,...), operand -> Dim
122
123
 
123
124
  def __getstate__(self):
124
125
  d = vars(self).copy()
@@ -389,6 +390,10 @@ class _DimMixin:
389
390
  if dim_extra:
390
391
  # Any dims via dim math could also contain raw tensors,
391
392
  # so iterate through them.
393
+ if dim.dyn_size_ext is not None or dim.dimension is None:
394
+ dim_extra.cache_dim_math.clear()
395
+ else:
396
+ dim_extra.cache_dim_math.clear_dynamic()
392
397
  queue += dim_extra.cache_dim_math.values()
393
398
  if dim_extra.same_as:
394
399
  queue.append(dim_extra.same_as)
@@ -2873,6 +2878,123 @@ def dim_cmp_value(obj):
2873
2878
  return obj
2874
2879
 
2875
2880
 
2881
+ class _CacheDimMath:
2882
+ """op (add,sub,...), operand -> Dim"""
2883
+
2884
+ class _OperandCache:
2885
+ def __init__(self):
2886
+ self.dims: MutableMapping[Dim, Dim] = weakref.WeakKeyDictionary()
2887
+ self.statics: Dict[int, Dim] = {}
2888
+
2889
+ def __init__(self):
2890
+ self._ops: Dict[str, _CacheDimMath._OperandCache] = {}
2891
+
2892
+ def __repr__(self):
2893
+ return "_CacheDimMath({%s})" % ", ".join("%r: %r" % (k, v) for k, v in self.items())
2894
+
2895
+ def _get_op_dict(self, __key: Tuple[str, Union[Dim, int]]) -> _OperandCache:
2896
+ if __key[0] in self._ops:
2897
+ return self._ops[__key[0]]
2898
+ else:
2899
+ op_dict = self._OperandCache()
2900
+ self._ops[__key[0]] = op_dict
2901
+ return op_dict
2902
+
2903
+ def __setitem__(self, __key: Tuple[str, Union[Dim, int]], __value: Dim):
2904
+ op_dict = self._get_op_dict(__key)
2905
+ if isinstance(__key[1], int):
2906
+ value_dict = op_dict.statics
2907
+ else:
2908
+ value_dict = op_dict.dims
2909
+ if __key[1] in value_dict:
2910
+ value_dict[__key[1]] = __value
2911
+ return
2912
+ if len(value_dict) >= 5:
2913
+ # Just to avoid memory leaks.
2914
+ value_dict.clear()
2915
+ value_dict[__key[1]] = __value
2916
+
2917
+ def __delitem__(self, __key: Tuple[str, Union[Dim, int]]):
2918
+ op_dict = self._ops[__key[0]]
2919
+ if isinstance(__key[1], int):
2920
+ del op_dict.statics[__key[1]]
2921
+ else:
2922
+ del op_dict.dims[__key[1]]
2923
+
2924
+ def __getitem__(self, __key: Tuple[str, Union[Dim, int]]) -> Dim:
2925
+ op_dict = self._ops[__key[0]]
2926
+ if isinstance(__key[1], int):
2927
+ return op_dict.statics[__key[1]]
2928
+ else:
2929
+ return op_dict.dims[__key[1]]
2930
+
2931
+ def __contains__(self, __key: Tuple[str, Union[Dim, int]]) -> bool:
2932
+ op_dict = self._ops.get(__key[0])
2933
+ if not op_dict:
2934
+ return False
2935
+ if isinstance(__key[1], int):
2936
+ return __key[1] in op_dict.statics
2937
+ else:
2938
+ return __key[1] in op_dict.dims
2939
+
2940
+ def get(self, __key: Tuple[str, Union[Dim, int]], default: Optional[Dim] = None) -> Optional[Dim]:
2941
+ """get"""
2942
+ op_dict = self._ops.get(__key[0])
2943
+ if not op_dict:
2944
+ return default
2945
+ if isinstance(__key[1], int):
2946
+ return op_dict.statics.get(__key[1], default)
2947
+ else:
2948
+ return op_dict.dims.get(__key[1], default)
2949
+
2950
+ def setdefault(self, __key: Tuple[str, Union[Dim, int]], __value: Dim):
2951
+ """setdefault"""
2952
+ existing = self.get(__key)
2953
+ if existing is not None:
2954
+ return existing
2955
+ self[__key] = __value
2956
+ return __value
2957
+
2958
+ def clear(self):
2959
+ """clear"""
2960
+ self._ops.clear()
2961
+
2962
+ def clear_dynamic(self):
2963
+ """clear dynamic part"""
2964
+ for op_dict in self._ops.values():
2965
+ for k, v in list(op_dict.dims.items()):
2966
+ if v.dyn_size_ext is not None or v.dimension is None:
2967
+ del op_dict.dims[k]
2968
+
2969
+ def __len__(self):
2970
+ count = 0
2971
+ for op_dict in self._ops.values():
2972
+ count += len(op_dict.statics)
2973
+ count += len(op_dict.dims)
2974
+ return count
2975
+
2976
+ def items(self):
2977
+ """items"""
2978
+ for op_name, op_dict in self._ops.items():
2979
+ for key, value in op_dict.statics.items():
2980
+ yield (op_name, key), value
2981
+ for key, value in op_dict.dims.items():
2982
+ yield (op_name, key), value
2983
+
2984
+ def keys(self):
2985
+ """keys"""
2986
+ for k, v in self.items():
2987
+ yield k
2988
+
2989
+ def values(self):
2990
+ """values"""
2991
+ for k, v in self.items():
2992
+ yield v
2993
+
2994
+ def __iter__(self):
2995
+ yield from self.keys()
2996
+
2997
+
2876
2998
  def _behavior_version_reset_callback():
2877
2999
  # Reset things we did in _handle_new_min_version.
2878
3000
  _DimMixin._SimpleEquality = False
@@ -118,6 +118,9 @@ def get_net_dict(
118
118
  # but now the TF engine actually wants to have Tensor[tf.Tensor].
119
119
  # Reset it now. The TF engine should redefine it again.
120
120
  elem.reset_batch_and_raw()
121
+ elif isinstance(elem, set):
122
+ # map_structure does not recurse into sets.
123
+ nest.map_structure(_cleanup_net_dict_value, sorted(list(elem)))
121
124
  return elem
122
125
 
123
126
  # Do some cleanup.
@@ -339,7 +339,7 @@ class BatchInfo:
339
339
 
340
340
  # Ok, need to extend.
341
341
  global_batch_dims = [dim for dim in all_virtual_dims if isinstance(dim, BatchInfo.GlobalBatchDim)]
342
- assert len(global_batch_dims) == 1
342
+ assert len(global_batch_dims) == 1, f"got global_batch_dims={global_batch_dims!r}"
343
343
  global_batch_dim = global_batch_dims[0]
344
344
  assert base.virtual_dims == [global_batch_dim]
345
345
  beams = [dim for dim in all_virtual_dims if isinstance(dim, BatchInfo.BeamDim)]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20240830.140746
3
+ Version: 1.20240903.205823
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -185,6 +185,9 @@ def _run_model_net_dict_tf(
185
185
 
186
186
  from returnn.tf.frontend_layers.config_entry_points import get_net_dict
187
187
 
188
+ # noinspection PyProtectedMember
189
+ from returnn.frontend import _backend
190
+
188
191
  config = Config(
189
192
  {
190
193
  "debug_runtime_sanity_checks": True,
@@ -203,6 +206,7 @@ def _run_model_net_dict_tf(
203
206
  outputs_layers = rf.get_run_ctx().outputs
204
207
  print("*** outputs:", outputs_layers)
205
208
 
209
+ _backend.select_backend_tf()
206
210
  net = TFNetwork(config=config, train_flag=False)
207
211
  net.construct_from_dict(net_dict)
208
212
 
@@ -238,7 +238,7 @@ def test_pad_time_right():
238
238
  assert data_.dims == (batch_dim, time_dim, in_dim)
239
239
  new_time_dim = out_.dims[1]
240
240
  assert out_.dims == (batch_dim, new_time_dim, in_dim) and new_time_dim != time_dim
241
- assert new_time_dim == time_dim + 1 # math dim... not really necessary check here...
241
+ # assert new_time_dim == time_dim + 1 # math dim... not really necessary check here...
242
242
  assert time_dim.dyn_size_ext.dims == new_time_dim.dyn_size_ext.dims == (batch_dim,)
243
243
  batch_size = batch_dim.get_dim_value()
244
244
  assert batch_size > 1
@@ -0,0 +1,324 @@
1
+ """
2
+ Testing returnn.frontend.decoder.transformer.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import _setup_test_env # noqa
8
+ import sys
9
+ import unittest
10
+ import torch
11
+ from returnn.util import better_exchook
12
+ from returnn.util.debug import PyTracer, check_py_traces_rf_to_pt_equal
13
+ from returnn.tensor import Tensor, Dim
14
+ import returnn.frontend as rf
15
+
16
+
17
+ def _setup():
18
+ try:
19
+ import lovely_tensors
20
+
21
+ lovely_tensors.monkey_patch()
22
+ except ImportError:
23
+ pass
24
+
25
+
26
+ _setup()
27
+
28
+
29
+ def test_llama():
30
+ """
31
+ Test that we can reproduce the Llama model.
32
+
33
+ This here is the final complete test.
34
+ There are several other sub-tests:
35
+
36
+ - :func:`test_rotary_embedding`
37
+ - :func:`test_rope_causal_self_att`
38
+
39
+ Some references for the whole Llama model:
40
+ https://github.com/meta-llama/llama/blob/main/llama/model.py
41
+ https://github.com/meta-llama/llama3/blob/main/llama/model.py
42
+ https://github.com/karpathy/llama2.c/blob/master/model.py
43
+ https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
44
+ https://github.com/hkproj/pytorch-llama/blob/main/model.py
45
+ https://github.com/likejazz/llama3.np/blob/main/llama3.py
46
+ """
47
+ from returnn.frontend.decoder.transformer import TransformerDecoder, TransformerDecoderLayer, FeedForwardGated
48
+ from returnn.frontend.conversions.hf_llama import import_params_hf_llama_to_rf_transformer_decoder
49
+ from transformers.models.llama.modeling_llama import LlamaDecoderLayer, LlamaForCausalLM, LlamaModel, LlamaConfig
50
+
51
+ config = LlamaConfig(
52
+ vocab_size=11,
53
+ hidden_size=64,
54
+ intermediate_size=64 * 4,
55
+ num_hidden_layers=2,
56
+ num_attention_heads=2,
57
+ max_position_embeddings=128,
58
+ )
59
+
60
+ model_hf = LlamaForCausalLM(config)
61
+ print("HF Model:")
62
+ print(model_hf)
63
+ print("Parameters:")
64
+ num_params = 0
65
+ for k, v in model_hf.named_parameters():
66
+ print(f"{k}: {list(v.shape)} {v.dtype}")
67
+ num_params += v.numel()
68
+ print("Total number of parameters:", num_params)
69
+
70
+ rf.select_backend_torch()
71
+
72
+ model_dim = Dim(config.hidden_size, name="model")
73
+ model_rf = TransformerDecoder(
74
+ encoder_dim=None,
75
+ vocab_dim=Dim(config.vocab_size, name="vocab"),
76
+ model_dim=model_dim,
77
+ num_layers=config.num_hidden_layers,
78
+ pos_enc=None,
79
+ norm=rf.RMSNorm,
80
+ ff=FeedForwardGated,
81
+ share_embedding=False,
82
+ input_embedding_scale=1.0,
83
+ decoder_layer_opts=dict(self_att=rf.RotaryPosCausalSelfAttention, self_att_opts=dict(with_bias=False)),
84
+ num_heads=config.num_attention_heads,
85
+ dropout=0,
86
+ att_dropout=0,
87
+ )
88
+ print("RF Model:")
89
+ print(model_rf)
90
+ print("Parameters:")
91
+ num_params = 0
92
+ for k, v in model_rf.named_parameters():
93
+ print(f"{k}: {list(v.dims)} {v.dtype}")
94
+ num_params += v.num_elements()
95
+ print("Total number of parameters:", num_params)
96
+
97
+ import_params_hf_llama_to_rf_transformer_decoder(model_hf, model_rf)
98
+
99
+ batch_dim = Dim(3, name="batch")
100
+ seq_dim = Dim(rf.random_uniform([batch_dim], minval=7, maxval=13, dtype="int32"), name="seq")
101
+ in_ = rf.random_uniform([batch_dim, seq_dim], sparse_dim=model_rf.vocab_dim)
102
+ in_.name = "input_labels"
103
+
104
+ with PyTracer([TransformerDecoder.__call__, TransformerDecoderLayer.__call__], Tensor) as trace_rf:
105
+ out_rf, _ = model_rf(in_, spatial_dim=seq_dim, state=model_rf.default_initial_state(batch_dims=[batch_dim]))
106
+
107
+ mask = rf.sequence_mask([batch_dim, seq_dim])
108
+ with PyTracer([LlamaForCausalLM.forward, LlamaModel.forward, LlamaDecoderLayer.forward], torch.Tensor) as trace_hf:
109
+ out_hf = model_hf(in_.raw_tensor, attention_mask=mask.raw_tensor)
110
+
111
+ check_py_traces_rf_to_pt_equal(
112
+ trace_rf.captured_locals,
113
+ trace_hf.captured_locals,
114
+ [
115
+ (
116
+ (TransformerDecoder.__call__, 0, "decoded", 0),
117
+ (LlamaModel.forward, 0, "inputs_embeds", 0),
118
+ (batch_dim, seq_dim, model_dim),
119
+ ),
120
+ ],
121
+ )
122
+
123
+ print("Check...")
124
+ assert out_rf.raw_tensor.shape == out_hf.logits.shape
125
+ torch.testing.assert_allclose(out_rf.raw_tensor, out_hf.logits)
126
+ print(" all matched!")
127
+
128
+
129
+ def test_feed_forward_gated():
130
+ from returnn.frontend.decoder.transformer import FeedForwardGated
131
+ from returnn.frontend.conversions.hf_llama import import_params_hf_llama_mlp_to_rf_feed_forward_gated
132
+ from transformers.models.llama.modeling_llama import LlamaMLP, LlamaConfig
133
+
134
+ config = LlamaConfig(
135
+ vocab_size=11,
136
+ hidden_size=64,
137
+ intermediate_size=64 * 4,
138
+ num_hidden_layers=2,
139
+ num_attention_heads=2,
140
+ max_position_embeddings=128,
141
+ )
142
+
143
+ model_hf = LlamaMLP(config)
144
+
145
+ rf.select_backend_torch()
146
+ rf.set_random_seed(42)
147
+
148
+ model_dim = Dim(config.hidden_size, name="model")
149
+ model_rf = FeedForwardGated(out_dim=model_dim, ff_dim=Dim(config.intermediate_size, name="inter"), dropout=0.0)
150
+
151
+ import_params_hf_llama_mlp_to_rf_feed_forward_gated(model_hf, model_rf)
152
+
153
+ batch_dim = Dim(3, name="batch")
154
+ seq_dim = Dim(rf.random_uniform([batch_dim], minval=7, maxval=13, dtype="int32"), name="seq")
155
+ in_ = rf.random_uniform([batch_dim, seq_dim, model_dim])
156
+ in_.name = "input"
157
+
158
+ out_rf = model_rf(in_)
159
+ out_rf = out_rf.copy_transpose((batch_dim, seq_dim, model_dim))
160
+
161
+ out_hf = model_hf(in_.raw_tensor)
162
+
163
+ print("Check...")
164
+ assert out_rf.raw_tensor.shape == out_hf.shape
165
+ torch.testing.assert_allclose(out_rf.raw_tensor, out_hf)
166
+ print(" all matched!")
167
+
168
+
169
+ def test_transformer_rel_pos_att():
170
+ """
171
+ This tests that TransformerDecoder together with RelPosCausalSelfAttention
172
+ and FeedForwardGated works in a reasonable standard setup.
173
+ Works = does not cause exceptions.
174
+
175
+ Additionally, we test an issue that dim tags seems to be leaking.
176
+ """
177
+ from returnn.tensor import TensorDict, batch_dim
178
+ from returnn.frontend.decoder.transformer import TransformerDecoder, FeedForwardGated
179
+ from returnn.datasets.util.vocabulary import Vocabulary
180
+ from returnn.torch.data.extern_data import raw_dict_to_extern_data
181
+
182
+ rf.select_backend_torch()
183
+
184
+ vocab = Vocabulary.create_vocab_from_labels(
185
+ [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"], eos_label=0, bos_label=0
186
+ )
187
+ assert vocab.bos_label_id is not None and vocab.eos_label_id is not None
188
+ vocab_dim = Dim(vocab.num_labels, name="vocab", vocab=vocab)
189
+
190
+ model_def = rf.build_dict(
191
+ TransformerDecoder,
192
+ encoder_dim=None,
193
+ num_layers=2, # with higher number of layers, probelm occurs more, but also with 2
194
+ model_dim=20,
195
+ num_heads=2,
196
+ pos_enc=None,
197
+ norm=rf.build_dict(rf.RMSNorm),
198
+ ff=rf.build_dict(FeedForwardGated),
199
+ decoder_layer_opts=dict(self_att=rf.build_dict(rf.RelPosCausalSelfAttention, with_bias=False)),
200
+ dropout=0.0,
201
+ att_dropout=0.0,
202
+ )
203
+ model = rf.build_from_dict(model_def, vocab_dim=vocab_dim)
204
+ assert isinstance(model, TransformerDecoder)
205
+
206
+ leakages = []
207
+
208
+ # Adapted from Dim reset_raw.
209
+ def _num_referenced_dim_tags(self: Dim) -> int:
210
+ visited = set() # ids
211
+ queue = [self]
212
+ while queue:
213
+ # noinspection PyShadowingNames
214
+ dim: Dim = queue.pop()
215
+ if id(dim) in visited:
216
+ continue
217
+ visited.add(id(dim))
218
+ # noinspection PyProtectedMember
219
+ dim_extra = dim._extra
220
+ if dim_extra:
221
+ # Any dims via dim math could also contain raw tensors,
222
+ # so iterate through them.
223
+ print("Dim:", dim)
224
+ print(" cache_dim_math:", dim_extra.cache_dim_math)
225
+ print(" same_as:", dim_extra.same_as)
226
+ print(" copy_same_as:", dim_extra.copy_same_as)
227
+ print(" same_for_batch_ctx:", dim_extra.same_for_batch_ctx)
228
+ queue += dim_extra.cache_dim_math.values()
229
+ if dim_extra.same_as:
230
+ queue.append(dim_extra.same_as)
231
+ if dim_extra.copy_same_as:
232
+ queue.append(dim_extra.copy_same_as)
233
+ queue += dim_extra.same_for_batch_ctx.values()
234
+ print(f"{self} _num_referenced_dim_tags (reset_raw), visited {len(visited)}")
235
+ return len(visited)
236
+
237
+ time_dim = Dim(None, name="time")
238
+ extern_data_template = TensorDict([Tensor("data", (batch_dim, time_dim), "int32", sparse_dim=vocab_dim)])
239
+
240
+ prev_step_num_tags = 0
241
+ for step in range(10):
242
+ print("Step:", step)
243
+ rf.init_train_step_run_ctx(train_flag=False, step=step)
244
+
245
+ # Check that we don't have any dim tags leaking.
246
+ # Do that right after init_train_step_run_ctx, because that might clean some previous caches.
247
+ step_num_tags = _num_referenced_dim_tags(time_dim)
248
+ if step > 1 and step_num_tags > prev_step_num_tags:
249
+ leakages.append(step_num_tags - prev_step_num_tags)
250
+ prev_step_num_tags = step_num_tags
251
+
252
+ seq_lens = torch.randint(5, 11, (3,), dtype=torch.int32)
253
+ extern_data = raw_dict_to_extern_data(
254
+ {"data": torch.randint(0, vocab_dim.dimension, (3, seq_lens.max())), "data:seq_len": seq_lens},
255
+ extern_data_template=extern_data_template,
256
+ device="cpu",
257
+ )
258
+
259
+ targets = extern_data["data"]
260
+ targets_spatial_dim = time_dim
261
+ input_labels, (targets_w_eos_spatial_dim,) = rf.pad(
262
+ targets, axes=[targets_spatial_dim], padding=[(1, 0)], value=vocab.bos_label_id
263
+ )
264
+ targets_w_eos, _ = rf.pad(
265
+ targets,
266
+ axes=[targets_spatial_dim],
267
+ padding=[(0, 1)],
268
+ value=vocab.eos_label_id,
269
+ out_dims=[targets_w_eos_spatial_dim],
270
+ )
271
+
272
+ batch_dims = [batch_dim]
273
+
274
+ # Gradients not relevant for this test.
275
+ with torch.no_grad():
276
+ logits, _ = model(
277
+ input_labels,
278
+ spatial_dim=targets_w_eos_spatial_dim,
279
+ encoder=None,
280
+ state=model.default_initial_state(batch_dims=batch_dims),
281
+ )
282
+
283
+ logits_packed, pack_dim = rf.pack_padded(
284
+ logits, dims=batch_dims + [targets_w_eos_spatial_dim], enforce_sorted=False
285
+ )
286
+ targets_packed, _ = rf.pack_padded(
287
+ targets_w_eos, dims=batch_dims + [targets_w_eos_spatial_dim], enforce_sorted=False, out_dim=pack_dim
288
+ )
289
+
290
+ log_prob = rf.log_softmax(logits_packed, axis=model.vocab_dim)
291
+ # log_prob = rf.label_smoothed_log_prob_gradient(log_prob, 0.1, axis=model.target_dim)
292
+ loss = rf.cross_entropy(
293
+ target=targets_packed, estimated=log_prob, estimated_type="log-probs", axis=model.vocab_dim
294
+ )
295
+ loss.mark_as_loss("ce", use_normalized_loss=True)
296
+
297
+ best = rf.reduce_argmax(logits_packed, axis=model.vocab_dim)
298
+ frame_error = best != targets_packed
299
+ frame_error.mark_as_loss(name="fer", as_error=True)
300
+
301
+ assert not leakages, f"Leakages: {leakages}"
302
+
303
+
304
+ if __name__ == "__main__":
305
+ better_exchook.install()
306
+ if len(sys.argv) <= 1:
307
+ for k, v in sorted(globals().items()):
308
+ if k.startswith("test_"):
309
+ print("-" * 40)
310
+ print("Executing: %s" % k)
311
+ try:
312
+ v()
313
+ except unittest.SkipTest as exc:
314
+ print("SkipTest:", exc)
315
+ print("-" * 40)
316
+ print("Finished all tests.")
317
+ else:
318
+ assert len(sys.argv) >= 2
319
+ for arg in sys.argv[1:]:
320
+ print("Executing: %s" % arg)
321
+ if arg in globals():
322
+ globals()[arg]() # assume function and execute
323
+ else:
324
+ eval(arg) # assume Python code and execute
@@ -1,2 +0,0 @@
1
- version = '1.20240830.140746'
2
- long_version = '1.20240830.140746+git.d8709ff'