returnn 1.20240727.10001__tar.gz → 1.20240730.153730__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (458) hide show
  1. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/PKG-INFO +1 -1
  2. returnn-1.20240730.153730/_setup_info_generated.py +2 -0
  3. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/audio.py +40 -11
  4. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/basic.py +1 -0
  5. returnn-1.20240730.153730/returnn/datasets/postprocessing.py +210 -0
  6. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/attention.py +69 -5
  7. returnn-1.20240730.153730/returnn/frontend/conversions/__init__.py +3 -0
  8. returnn-1.20240730.153730/returnn/frontend/conversions/hf_llama.py +56 -0
  9. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/decoder/transformer.py +104 -11
  10. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/linear.py +1 -1
  11. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/normalization.py +41 -5
  12. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/debug.py +188 -1
  13. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn.egg-info/PKG-INFO +1 -1
  14. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn.egg-info/SOURCES.txt +3 -0
  15. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm-inspection-profile.xml +2 -1
  16. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +2 -1
  17. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Dataset.py +52 -1
  18. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_attention.py +239 -0
  19. returnn-1.20240727.10001/_setup_info_generated.py +0 -2
  20. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/.editorconfig +0 -0
  21. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/.gitignore +0 -0
  22. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/.gitmodules +0 -0
  23. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/.kateconfig +0 -0
  24. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/CHANGELOG.md +0 -0
  25. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/CODEOWNERS +0 -0
  26. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/CONTRIBUTING.md +0 -0
  27. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/LICENSE +0 -0
  28. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/MANIFEST.in +0 -0
  29. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/README.rst +0 -0
  30. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/__init__.py +0 -0
  31. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/12AX.cluster_map +0 -0
  32. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/_setup_returnn_env.py +0 -0
  33. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-fwd.config +0 -0
  34. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-horovod-mpi.py +0 -0
  35. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-horovod-mpi.py.sh +0 -0
  36. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-horovod-mpi.sh +0 -0
  37. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-hyper-param-tuning.config +0 -0
  38. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-iter-dataset.py +0 -0
  39. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-list-devices.py +0 -0
  40. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-lua-torch-layer.config +0 -0
  41. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-pretrain.config +0 -0
  42. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-record-and-push-to-webserver.py +0 -0
  43. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-returnn-as-framework.py +0 -0
  44. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-rf-pt-benchmark.py +0 -0
  45. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-rf.config +0 -0
  46. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-rhn-enwik8.config +0 -0
  47. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-sprint-interface.py +0 -0
  48. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-att-copy.config +0 -0
  49. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-attention.config +0 -0
  50. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  51. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  52. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-enc-dec.config +0 -0
  53. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-hard-att-copy.config +0 -0
  54. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-lstm-benchmark.py +0 -0
  55. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  56. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  57. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-native-lstm.12ax.config +0 -0
  58. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  59. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  60. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  61. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  62. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  63. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-rec-self-att.config +0 -0
  64. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-search-compiled-graph.py +0 -0
  65. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  66. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-timit-lstm-ctc.config +0 -0
  67. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-torch.config +0 -0
  68. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  69. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo.sh +0 -0
  70. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  71. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  72. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  73. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/README.txt +0 -0
  74. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/chars.txt +0 -0
  75. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/config_demo +0 -0
  76. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/config_fwd +0 -0
  77. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/config_real +0 -0
  78. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  79. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/decode.py +0 -0
  80. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  81. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/go.sh +0 -0
  82. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/lines.txt +0 -0
  83. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/split/eval.txt +0 -0
  84. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/split/train.txt +0 -0
  85. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/split/valid.txt +0 -0
  86. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/README.md +0 -0
  87. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  88. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial/forwardconfig +0 -0
  89. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial/go.sh +0 -0
  90. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial/trainconfig +0 -0
  91. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  92. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  93. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  94. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  95. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/pyproject.toml +0 -0
  96. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/requirements.txt +0 -0
  97. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/__init__.py +0 -0
  98. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/__main__.py +0 -0
  99. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/__old_mod_loader__.py +0 -0
  100. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/__setup__.py +0 -0
  101. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/config.py +0 -0
  102. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/__init__.py +0 -0
  103. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/bundle_file.py +0 -0
  104. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/cached.py +0 -0
  105. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/cached2.py +0 -0
  106. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/distrib_files.py +0 -0
  107. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/generating.py +0 -0
  108. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/hdf.py +0 -0
  109. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/lm.py +0 -0
  110. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/map.py +0 -0
  111. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/meta.py +0 -0
  112. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/multi_proc.py +0 -0
  113. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/normalization_data.py +0 -0
  114. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/numpy_dump.py +0 -0
  115. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/raw_wav.py +0 -0
  116. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/sprint.py +0 -0
  117. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/stereo.py +0 -0
  118. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/util/__init__.py +0 -0
  119. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/util/feature_extraction.py +0 -0
  120. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/util/strings.py +0 -0
  121. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/util/vocabulary.py +0 -0
  122. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/engine/__init__.py +0 -0
  123. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/engine/base.py +0 -0
  124. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/engine/batch.py +0 -0
  125. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/__init__.py +0 -0
  126. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/__main__.py +0 -0
  127. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  128. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  129. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  130. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  131. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  132. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  133. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  134. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  135. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  136. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  137. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  138. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  139. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  140. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  141. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  142. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  143. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  144. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  145. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  146. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  147. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  148. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  149. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  150. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  151. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  152. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/__init__.py +0 -0
  153. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/README.md +0 -0
  154. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/__init__.py +0 -0
  155. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/edit.py +0 -0
  156. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/reroute.py +0 -0
  157. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/select.py +0 -0
  158. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/subgraph.py +0 -0
  159. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/transform.py +0 -0
  160. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/util.py +0 -0
  161. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/forward_iface.py +0 -0
  162. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/__init__.py +0 -0
  163. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_backend.py +0 -0
  164. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/__init__.py +0 -0
  165. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/backend.cpp +0 -0
  166. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/backend.hpp +0 -0
  167. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/module.cpp +0 -0
  168. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/module.hpp +0 -0
  169. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/py_utils.hpp +0 -0
  170. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  171. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  172. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_numpy_backend.py +0 -0
  173. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_random_journal.py +0 -0
  174. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_utils.py +0 -0
  175. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/array_.py +0 -0
  176. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/audio/__init__.py +0 -0
  177. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/audio/mel.py +0 -0
  178. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/audio/specaugment.py +0 -0
  179. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/backend.py +0 -0
  180. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/build_from_dict.py +0 -0
  181. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/cond.py +0 -0
  182. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/const.py +0 -0
  183. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/container.py +0 -0
  184. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/control_flow_ctx.py +0 -0
  185. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/conv.py +0 -0
  186. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/decoder/__init__.py +0 -0
  187. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/device.py +0 -0
  188. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/dims.py +0 -0
  189. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/dropout.py +0 -0
  190. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/dtype.py +0 -0
  191. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/encoder/__init__.py +0 -0
  192. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/encoder/base.py +0 -0
  193. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/encoder/conformer.py +0 -0
  194. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/gradient.py +0 -0
  195. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/graph.py +0 -0
  196. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/hooks.py +0 -0
  197. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/init.py +0 -0
  198. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/label_smoothing.py +0 -0
  199. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/loop.py +0 -0
  200. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/loss.py +0 -0
  201. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/math_.py +0 -0
  202. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/matmul.py +0 -0
  203. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/module.py +0 -0
  204. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/parameter.py +0 -0
  205. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/parametrizations.py +0 -0
  206. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/parametrize.py +0 -0
  207. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/piecewise_linear.py +0 -0
  208. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/rand.py +0 -0
  209. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/rec.py +0 -0
  210. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/reduce.py +0 -0
  211. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/run_ctx.py +0 -0
  212. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/signal.py +0 -0
  213. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/state.py +0 -0
  214. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/stepwise_scheduler.py +0 -0
  215. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/tensor_array.py +0 -0
  216. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/types.py +0 -0
  217. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/import_/__init__.py +0 -0
  218. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/import_/common.py +0 -0
  219. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/import_/git.py +0 -0
  220. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/import_/import_.py +0 -0
  221. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/learning_rate_control.py +0 -0
  222. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/log.py +0 -0
  223. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/native_op.cpp +0 -0
  224. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/native_op.py +0 -0
  225. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/pretrain.py +0 -0
  226. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/__init__.py +0 -0
  227. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/cache.py +0 -0
  228. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/control.py +0 -0
  229. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/error_signals.py +0 -0
  230. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/extern_interface.py +0 -0
  231. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/interface.py +0 -0
  232. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/README.md +0 -0
  233. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/__init__.py +0 -0
  234. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/_dim_extra.py +0 -0
  235. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/_tensor_extra.py +0 -0
  236. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/_tensor_mixin_base.py +0 -0
  237. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/_tensor_op_overloads.py +0 -0
  238. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/control_flow_ctx.py +0 -0
  239. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/dim.py +0 -0
  240. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/marked_dim.py +0 -0
  241. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/tensor.py +0 -0
  242. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/tensor_dict.py +0 -0
  243. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/utils.py +0 -0
  244. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/__init__.py +0 -0
  245. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/compat.py +0 -0
  246. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/data_pipeline.py +0 -0
  247. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/distributed.py +0 -0
  248. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/engine.py +0 -0
  249. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/README.md +0 -0
  250. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/__init__.py +0 -0
  251. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/_backend.py +0 -0
  252. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/_utils.py +0 -0
  253. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/cond.py +0 -0
  254. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  255. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  256. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/dims.py +0 -0
  257. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/layer.py +0 -0
  258. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/loop.py +0 -0
  259. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/make_layer.py +0 -0
  260. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  261. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  262. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  263. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_low_level/__init__.py +0 -0
  264. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_low_level/_backend.py +0 -0
  265. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/horovod.py +0 -0
  266. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/hyper_param_tuning.py +0 -0
  267. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/__init__.py +0 -0
  268. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/base.py +0 -0
  269. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/basic.py +0 -0
  270. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/rec.py +0 -0
  271. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/segmental_model.py +0 -0
  272. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/signal_processing.py +0 -0
  273. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/variable.py +0 -0
  274. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/native_op.py +0 -0
  275. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/network.py +0 -0
  276. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/sprint.py +0 -0
  277. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/updater.py +0 -0
  278. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/__init__.py +0 -0
  279. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/basic.py +0 -0
  280. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/data.py +0 -0
  281. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/gradient_checkpoint.py +0 -0
  282. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/ken_lm.py +0 -0
  283. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/open_fst.py +0 -0
  284. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/README.md +0 -0
  285. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/__init__.py +0 -0
  286. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/__init__.py +0 -0
  287. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/extern_data.py +0 -0
  288. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/pipeline.py +0 -0
  289. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/queued_data_iter.py +0 -0
  290. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  291. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/tensor_utils.py +0 -0
  292. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/distributed.py +0 -0
  293. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/engine.py +0 -0
  294. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/__init__.py +0 -0
  295. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/_backend.py +0 -0
  296. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/_rand.py +0 -0
  297. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/bridge.py +0 -0
  298. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/raw_ops.py +0 -0
  299. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/updater.py +0 -0
  300. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/README.md +0 -0
  301. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/__init__.py +0 -0
  302. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/array_.py +0 -0
  303. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/diagnose_gpu.py +0 -0
  304. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/gradient_checkpoint.py +0 -0
  305. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/scaled_gradient.py +0 -0
  306. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/__init__.py +0 -0
  307. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/basic.py +0 -0
  308. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/better_exchook.py +0 -0
  309. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/bpe.py +0 -0
  310. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/debug_helpers.py +0 -0
  311. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/file_cache.py +0 -0
  312. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/fsa.py +0 -0
  313. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/literal_py_to_pickle.py +0 -0
  314. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/math.py +0 -0
  315. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  316. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/native_code_compiler.py +0 -0
  317. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/pprint.py +0 -0
  318. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/py-to-pickle.cpp +0 -0
  319. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/py_compat.py +0 -0
  320. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/py_ext_mod_compiler.py +0 -0
  321. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/result_with_reason.py +0 -0
  322. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/sig_proc.py +0 -0
  323. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/task_system.py +0 -0
  324. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/train_proc_manager.py +0 -0
  325. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/watch_memory.py +0 -0
  326. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn.egg-info/dependency_links.txt +0 -0
  327. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn.egg-info/top_level.txt +0 -0
  328. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/rnn.py +0 -0
  329. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/setup.cfg +0 -0
  330. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/setup.py +0 -0
  331. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/DummySprintExec.py +0 -0
  332. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/.gitignore +0 -0
  333. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/.name +0 -0
  334. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  335. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  336. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  337. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  338. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/misc.xml +0 -0
  339. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/modules.xml +0 -0
  340. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/returnn.iml +0 -0
  341. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  342. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/_set_num_threads1.py +0 -0
  343. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/_setup_returnn_env.py +0 -0
  344. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/_setup_test_env.py +0 -0
  345. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/bpe-unicode-demo.codes +0 -0
  346. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/bpe-unicode-demo.vocab +0 -0
  347. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lexicon_opt.fst +0 -0
  348. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lexicon_opt.isyms +0 -0
  349. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lexicon_opt.jpg +0 -0
  350. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lexicon_opt.osyms +0 -0
  351. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lint_common.py +0 -0
  352. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/pycharm-inspect.py +0 -0
  353. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/pylint.py +0 -0
  354. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/returnn-as-framework.py +0 -0
  355. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/rf_utils.py +0 -0
  356. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/spelling.dic +0 -0
  357. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Config.py +0 -0
  358. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Fsa.py +0 -0
  359. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_GeneratingDataset.py +0 -0
  360. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_HDFDataset.py +0 -0
  361. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_LearningRateControl.py +0 -0
  362. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Log.py +0 -0
  363. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_MultiProcDataset.py +0 -0
  364. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Pretrain.py +0 -0
  365. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_ResNet.py +0 -0
  366. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_SprintDataset.py +0 -0
  367. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_SprintInterface.py +0 -0
  368. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFEngine.py +0 -0
  369. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFNativeOp.py +0 -0
  370. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFNetworkLayer.py +0 -0
  371. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFNetworkRecLayer.py +0 -0
  372. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFNetworkSigProcLayer.py +0 -0
  373. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFUpdater.py +0 -0
  374. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFUtil.py +0 -0
  375. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TF_determinism.py +0 -0
  376. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TaskSystem.py +0 -0
  377. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TaskSystem_SharedMem.py +0 -0
  378. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TranslationDataset.py +0 -0
  379. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Util.py +0 -0
  380. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_demos.py +0 -0
  381. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_fork_exec.py +0 -0
  382. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_hdf_dump.py +0 -0
  383. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_array.py +0 -0
  384. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_base.py +0 -0
  385. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_cond.py +0 -0
  386. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_const.py +0 -0
  387. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_container.py +0 -0
  388. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_conv.py +0 -0
  389. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_encoder_conformer.py +0 -0
  390. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_gradient.py +0 -0
  391. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_label_smoothing.py +0 -0
  392. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_loop.py +0 -0
  393. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_math.py +0 -0
  394. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_normalization.py +0 -0
  395. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_piecewise_linear.py +0 -0
  396. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_rec.py +0 -0
  397. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_reduce.py +0 -0
  398. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_signal.py +0 -0
  399. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_tensor.py +0 -0
  400. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_tools.py +0 -0
  401. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_dataset.py +0 -0
  402. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_engine.py +0 -0
  403. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_frontend.py +0 -0
  404. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_internal_frontend.py +0 -0
  405. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_util.py +0 -0
  406. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/torch_utils.py +0 -0
  407. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/_setup_returnn_env.py +0 -0
  408. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/analyze-dataset-batches.py +0 -0
  409. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bliss-collect-seq-lens.py +0 -0
  410. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bliss-dump-text.py +0 -0
  411. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bliss-get-segment-names.py +0 -0
  412. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bliss-to-ogg-zip.py +0 -0
  413. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bpe-create-lexicon.py +0 -0
  414. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/calculate-word-error-rate.py +0 -0
  415. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/cleanup-old-models.py +0 -0
  416. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/collect-orth-symbols.py +0 -0
  417. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/collect-words.py +0 -0
  418. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/compile_native_op.py +0 -0
  419. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/compile_tf_graph.py +0 -0
  420. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/debug-dump-search-scores.py +0 -0
  421. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/debug-plot-search-scores.py +0 -0
  422. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-dataset-raw-strings.py +0 -0
  423. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-dataset.py +0 -0
  424. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-forward-stats.py +0 -0
  425. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-forward.py +0 -0
  426. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-network-json.py +0 -0
  427. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-pickle.py +0 -0
  428. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/extract_state_tying_from_dataset.py +0 -0
  429. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/get-attention-weights.py +0 -0
  430. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/get-best-model-epoch.py +0 -0
  431. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/hdf_dump.py +0 -0
  432. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/hdf_dump_translation_dataset.py +0 -0
  433. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/import-blocks-mt-model.py +0 -0
  434. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/import-t2t-mt-model.py +0 -0
  435. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/.gitignore +0 -0
  436. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/Makefile +0 -0
  437. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/README.md +0 -0
  438. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/README.md +0 -0
  439. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/libs_list +0 -0
  440. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  441. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  442. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  443. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/state_vars_list +0 -0
  444. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  445. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/file.h +0 -0
  446. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  447. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  448. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/main.cc +0 -0
  449. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/rescorer.h +0 -0
  450. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/vocabulary.cc +0 -0
  451. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/vocabulary.h +0 -0
  452. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/tf_avg_checkpoints.py +0 -0
  453. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/tf_inspect_checkpoint.py +0 -0
  454. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/tf_inspect_summary_log.py +0 -0
  455. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/torch_avg_checkpoints.py +0 -0
  456. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/torch_export_to_onnx.py +0 -0
  457. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/torch_inspect_checkpoint.py +0 -0
  458. {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20240727.10001
3
+ Version: 1.20240730.153730
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20240730.153730'
2
+ long_version = '1.20240730.153730+git.7b46160'
@@ -151,8 +151,6 @@ class OggZipDataset(CachedDataset2):
151
151
  self.num_outputs["classes"] = [self.targets.num_labels, 1]
152
152
  if self.feature_extractor:
153
153
  self.num_outputs["data"] = [self.num_inputs, 2]
154
- else:
155
- self.num_outputs["data"] = [0, 2]
156
154
  self._data: Optional[List[Dict[str, Any]]] = None # lazily loaded
157
155
  self._fixed_random_subset = fixed_random_subset
158
156
  self._fixed_random_subset_seed = fixed_random_subset_seed
@@ -402,15 +400,46 @@ class OggZipDataset(CachedDataset2):
402
400
  self._lazy_init()
403
401
  return len(self._data)
404
402
 
405
- def get_data_shape(self, key):
403
+ def get_data_dtype(self, key: str) -> str:
404
+ """:return: dtype of data entry with `key`"""
405
+ if key == "data":
406
+ return "float32"
407
+ elif key == "classes":
408
+ return "int32"
409
+ elif key == "raw":
410
+ return "string"
411
+ elif key == "orth":
412
+ return "uint8"
413
+ else:
414
+ raise ValueError(f"{self}: unknown data key: {key}")
415
+
416
+ def get_data_keys(self) -> List[str]:
417
+ """:return: available data keys"""
418
+ keys = []
419
+ if self.feature_extractor is not None:
420
+ keys.append("data")
421
+ if self.targets is not None:
422
+ keys.append("classes")
423
+ return [*keys, "orth", "raw"]
424
+
425
+ def get_data_shape(self, key: str):
406
426
  """
407
427
  :returns get_data(*, key).shape[1:], i.e. num-frames excluded
408
428
  :rtype: list[int]
409
429
  """
410
- if key == "data" and self.feature_extractor is not None:
430
+ if key == "data":
431
+ assert self.feature_extractor is not None
411
432
  if self.feature_extractor.num_channels is not None:
412
433
  return [self.feature_extractor.num_channels, self.feature_extractor.get_feature_dimension()]
413
- return super(OggZipDataset, self).get_data_shape(key)
434
+ return [self.feature_extractor.get_feature_dimension()]
435
+ elif key in ["classes", "orth", "raw"]:
436
+ return []
437
+ else:
438
+ raise ValueError(f"{self}: unknown data key {key}")
439
+
440
+ def is_data_sparse(self, key: str) -> bool:
441
+ """:return: whether data entry with `key` is sparse"""
442
+ return key == "classes"
414
443
 
415
444
  def _get_transcription(self, corpus_seq_idx: int):
416
445
  """
@@ -467,13 +496,14 @@ class OggZipDataset(CachedDataset2):
467
496
  """
468
497
  self._lazy_init()
469
498
  seq_tag = self._get_tag_from_info_dict(self._data[corpus_seq_idx])
499
+ features = {}
470
500
  if self.feature_extractor:
471
501
  with self._open_audio_file(corpus_seq_idx) as audio_file:
472
- features = self.feature_extractor.get_audio_features_from_raw_bytes(audio_file, seq_name=seq_tag)
473
- else:
474
- features = numpy.zeros((), dtype=numpy.float32) # currently the API requires some dummy values...
502
+ data = self.feature_extractor.get_audio_features_from_raw_bytes(audio_file, seq_name=seq_tag)
503
+ features["data"] = data
475
504
  targets, txt = self._get_transcription(corpus_seq_idx)
476
- targets = numpy.array(targets, dtype="int32")
505
+ if self.targets is not None:
506
+ features["classes"] = numpy.array(targets, dtype="int32")
477
507
  raw_txt = str_to_numpy_array(txt)
478
508
  orth = txt.encode("utf8")
479
509
  if PY3:
@@ -483,8 +513,7 @@ class OggZipDataset(CachedDataset2):
483
513
  orth = list(map(ord, orth))
484
514
  orth = numpy.array(orth, dtype="uint8")
485
515
  return DatasetSeq(
486
- features=features,
487
- targets={"classes": targets, "raw": raw_txt, "orth": orth},
516
+ features={**features, "raw": raw_txt, "orth": orth},
488
517
  seq_idx=corpus_seq_idx,
489
518
  seq_tag=seq_tag,
490
519
  )
@@ -1388,6 +1388,7 @@ def get_dataset_class(name: Union[str, Type[Dataset]]) -> Optional[Type[Dataset]
1388
1388
  "map",
1389
1389
  "multi_proc",
1390
1390
  "distrib_files",
1391
+ "postprocessing",
1391
1392
  ]
1392
1393
  for mod_name in mod_names:
1393
1394
  mod = import_module("returnn.datasets.%s" % mod_name)
@@ -0,0 +1,210 @@
1
+ """
2
+ Provides :class:`PostprocessingDataset`.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
8
+
9
+ from returnn.datasets.basic import DatasetSeq
10
+ from returnn.datasets.util.vocabulary import Vocabulary
11
+ from returnn.tensor import Tensor, TensorDict
12
+ from returnn.tensor.dim import Dim
13
+ from .basic import init_dataset
14
+ from .cached2 import CachedDataset2
15
+
16
+ __all__ = ["PostprocessingDataset"]
17
+
18
+
19
+ class PostprocessingDataset(CachedDataset2):
20
+ """
21
+ A dataset that allows for generic post-processing of data from another dataset
22
+ using a function on the segment level and on the level of multiple segments via
23
+ an iterator.
24
+
25
+ This allows integrating various data augmentation techniques like e.g. Mixup,
26
+ SpecAugment or speed perturbation into the data loading pipeline.
27
+
28
+ The integration into the data loading pipeline makes it easy to distribute the
29
+ data processing work across multiple CPU cores using `MultiProcDataset` and in
30
+ turn frees the GPU from data preprocessing tasks.
31
+
32
+ Example usage::
33
+
34
+ from returnn.tensor.dim import Dim, DimTypes
35
+
36
+ time_dim = Dim(None, kind=DimTypes.Spatial)
37
+ new_data_dim = Dim(128)
38
+
39
+ train = {
40
+ "class": "PostprocessingDataset",
41
+ "dataset": {
42
+ "class": "HDFDataset",
43
+ "files": ["/path/to/data.hdf"],
44
+ },
45
+ # one of them, but not both:
46
+ "map_seq": map_seq, # (data: TensorDict) -> TensorDict
47
+ "map_seq_stream": map_seqs, # (iter: Iterator[TensorDict]) -> Iterator[TensorDict]
48
+ # only required when data shapes change wrt. the wrapped dataset:
49
+ "map_outputs": {
50
+ "data": {"dims": [time_dim, new_data_dim]},
51
+ },
52
+ }
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ dataset: Dict[str, Any],
58
+ map_seq: Optional[Union[Callable[[TensorDict], TensorDict]]] = None,
59
+ map_seq_stream: Optional[Callable[[Iterator[TensorDict]], Iterator[TensorDict]]] = None,
60
+ map_outputs: Optional[Dict[str, Any]] = None,
61
+ **kwargs,
62
+ ):
63
+ """
64
+ :param dataset: inner dataset to be post-processed
65
+ :param map_seq: post processor function operating on the single-segment level.
66
+ To avoid confusion on the order of how the processing functions are applied to the data, only one of
67
+ `map_seq` and `map_seq_stream` can be specified at a time.
68
+ :param map_seq_stream: post processor function operating on the multiple segment level via an iterator.
69
+ Allows merging multiple segments into one, or generating multiple output segments from one input segment.
70
+ To avoid confusion on the order of how the processing functions are applied to the data, only one of
71
+ `map_seq` and `map_seq_stream` can be specified at a time.
72
+ :param map_outputs: Type and axis specification of the outputs of the mapping functions,
73
+ like extern_data and model_outputs.
74
+ To simplify the common case when no shapes change, this value can be left unspecified. The dataset then
75
+ assumes the same data layout as returned by the wrapped dataset.
76
+ Example: `map_outputs={"data": {"dim": 42}}`
77
+ :param kwargs: see :class:`CachedDataset2`, :class:`Dataset`
78
+ """
79
+ super().__init__(**kwargs)
80
+
81
+ if self.seq_ordering != "default":
82
+ raise ValueError(f"{self}: specify seq_ordering in wrapped dataset, not in {self.__class__.__name__}")
83
+ if map_seq is None and map_seq_stream is None:
84
+ raise ValueError(f"{self}: need to either set map_seq or map_seq_stream")
85
+ if map_seq and map_seq_stream:
86
+ raise ValueError(f"{self}: cannot set both map_seq and map_seq_stream")
87
+
88
+ self._dataset_def = dataset
89
+ self._map_seq = map_seq
90
+ self._map_seq_stream = map_seq_stream
91
+ self._map_outputs = map_outputs
92
+
93
+ self._dataset = init_dataset(self._dataset_def, parent_dataset=self)
94
+ if self._map_seq_stream is None:
95
+ # if the stream mapper is set, the num_seqs may change and the estimation is less accurate
96
+ self._estimated_num_seqs = self._dataset.estimated_num_seqs
97
+ self._data_iter: Optional[Iterator[Tuple[int, TensorDict]]] = None
98
+
99
+ self._in_tensor_dict_template = TensorDict(
100
+ {name: self._make_tensor_template_from_input(name) for name in self._dataset.get_data_keys()}
101
+ )
102
+ self._out_tensor_dict_template = (
103
+ TensorDict(self._map_outputs) if self._map_outputs is not None else self._in_tensor_dict_template
104
+ )
105
+ self.num_outputs = {
106
+ k: (t.sparse_dim.size if t.sparse_dim else t.shape[-1] if len(t.shape) > 0 else 1, t.ndim)
107
+ for k, t in self._out_tensor_dict_template.data.items()
108
+ }
109
+ self._default_input = "data" if "data" in self.num_outputs else next(iter(self.num_outputs.keys()))
110
+ self.num_inputs = self.num_outputs[self._default_input][0]
111
+
112
+ self.labels = {}
113
+ for k, t in self._out_tensor_dict_template.data.items():
114
+ if t.vocab:
115
+ self.labels[k] = t.vocab.labels
116
+ elif t.sparse_dim: # sparse_dim but not vocab
117
+ self.labels[k] = list(map(str, range(t.sparse_dim.dimension))) # dummy labels
118
+
119
+ def init_seq_order(
120
+ self, epoch: Optional[int] = None, seq_list: Optional[List[str]] = None, seq_order: Optional[List[int]] = None
121
+ ):
122
+ """
123
+ :param epoch:
124
+ :param seq_list:
125
+ :param seq_order:
126
+ :return: whether the order changed (True is always safe to return)
127
+ """
128
+ super().init_seq_order(epoch=epoch, seq_list=seq_list, seq_order=seq_order)
129
+
130
+ if epoch is None and seq_list is None and seq_order is None:
131
+ self._num_seqs = 0
132
+ return True
133
+
134
+ assert self._dataset is not None
135
+ self._dataset.init_seq_order(epoch=epoch, seq_list=seq_list, seq_order=seq_order)
136
+ self._data_iter = enumerate(self._build_mapping_iter())
137
+ return True
138
+
139
+ def _collect_single_seq(self, seq_idx: int) -> Optional[DatasetSeq]:
140
+ while True:
141
+ try:
142
+ loaded_seq_idx, tensor_dict = next(self._data_iter)
143
+ except StopIteration:
144
+ return None
145
+ assert loaded_seq_idx <= seq_idx, "_collect_single_seq must be done monotonically"
146
+ if loaded_seq_idx != seq_idx:
147
+ continue
148
+ seq = DatasetSeq(features={k: t.raw_tensor for k, t in tensor_dict.data.items()}, seq_idx=seq_idx)
149
+ return seq
150
+
151
+ def _build_mapping_iter(self) -> Iterator[TensorDict]:
152
+ """
153
+ :return: an iterator applying both the segment level and across-segment transformations on the given dataset
154
+ """
155
+
156
+ def _validate_tensor_dict_iter(inner: Iterator[TensorDict]) -> Iterator[TensorDict]:
157
+ for t_dict in inner:
158
+ for data_key, out_t in self._out_tensor_dict_template.data.items():
159
+ in_t = t_dict.data[data_key]
160
+ assert (
161
+ in_t.ndim == out_t.batch_ndim
162
+ and in_t.dtype == out_t.dtype
163
+ and all(d.dimension in (d_, None) for (d, d_) in zip(in_t.dims, out_t.shape))
164
+ )
165
+ yield t_dict
166
+
167
+ data_iter = self._iterate_dataset()
168
+ if self._map_seq_stream is not None:
169
+ data_iter = self._map_seq_stream(data_iter)
170
+ assert isinstance(
171
+ data_iter, Iterator
172
+ ), f"map_seq_stream must produce an {Iterator.__name__}, but produced {type(data_iter).__name__}"
173
+ return _validate_tensor_dict_iter(data_iter)
174
+
175
+ def _iterate_dataset(self) -> Iterator[TensorDict]:
176
+ """
177
+ :return: generator providing data samples in the form of a TensorDict
178
+ """
179
+ data_keys = self._dataset.get_data_keys()
180
+
181
+ seq_index = 0
182
+ while self._dataset.is_less_than_num_seqs(seq_index):
183
+ self._dataset.load_seqs(seq_index, seq_index + 1)
184
+ tensor_dict = self._in_tensor_dict_template.copy_template()
185
+ for data_key in data_keys:
186
+ tensor_dict.data[data_key].raw_tensor = self._dataset.get_data(seq_index, data_key)
187
+ if self._map_seq is not None:
188
+ tensor_dict = self._map_seq(tensor_dict)
189
+ assert isinstance(
190
+ tensor_dict, TensorDict
191
+ ), f"map_seq must produce a {TensorDict.__name__}, but produced {type(tensor_dict).__name__}"
192
+ yield tensor_dict
193
+ seq_index += 1
194
+
195
+ def _make_tensor_template_from_input(self, data_key: str) -> Tensor:
196
+ dtype = self._dataset.get_data_dtype(data_key)
197
+ if dtype == "string":
198
+ dims = []
199
+ else:
200
+ feature_dims = [
201
+ Dim(dimension=dim, name=f"{data_key}_dim{i + 1}")
202
+ for i, dim in enumerate(self._dataset.get_data_shape(data_key))
203
+ ]
204
+ dims = [Dim(dimension=None, name=f"{data_key}_frame"), *feature_dims]
205
+ sparse_dim = None
206
+ if self._dataset.is_data_sparse(data_key):
207
+ sparse_dim = Dim(dimension=self._dataset.get_data_dim(data_key), name=f"{data_key}_sparse")
208
+ if data_key in self._dataset.labels:
209
+ sparse_dim.vocab = Vocabulary.create_vocab_from_labels(self._dataset.labels[data_key])
210
+ return Tensor(data_key, dims=dims, dtype=dtype, sparse_dim=sparse_dim)
@@ -2,7 +2,6 @@
2
2
  Attention
3
3
  """
4
4
 
5
-
6
5
  from __future__ import annotations
7
6
  from typing import Tuple, Union, Optional, Sequence
8
7
  import weakref
@@ -17,6 +16,7 @@ __all__ = [
17
16
  "SelfAttention",
18
17
  "CausalSelfAttention",
19
18
  "CausalSelfAttentionState",
19
+ "RotaryPosCausalSelfAttention",
20
20
  "RelPosSelfAttention",
21
21
  "RelPosCausalSelfAttention",
22
22
  "CrossAttention",
@@ -264,6 +264,69 @@ class CausalSelfAttentionState(rf.State):
264
264
  self.accum_axis = accum_axis
265
265
 
266
266
 
267
+ class RotaryPosCausalSelfAttention(CausalSelfAttention):
268
+ """
269
+ Rotary positional encoding (RoPE)-based causal self attention
270
+ """
271
+
272
+ def __call__(
273
+ self,
274
+ source: Tensor,
275
+ axis: Dim,
276
+ *,
277
+ state: Optional[CausalSelfAttentionState] = None,
278
+ ) -> Tuple[Tensor, CausalSelfAttentionState]:
279
+ """forward"""
280
+ q, k, v = self.forward_qkv(source)
281
+ k, v, hist_dim, new_state = _causal_self_att_step(k, v, axis=axis, state=state, self=self)
282
+
283
+ # Apply RoPE using sinusoidal positional encoding.
284
+ # Note: base is a bit different in rf.sinusoidal_positional_encoding (like the original)
285
+ # vs how it's commonly used for RoPE.
286
+ # log(base) / (dim / 2 - 1) = log(10_000) * 2 / dim
287
+ # <=> log(base) = log(10_000) * (dim / 2 - 1) * 2 / dim = log(10_000) * (1 - 2 / dim)
288
+ # <=> base = 10_000 ** (1 - 2 / dim)
289
+ pos_enc = rf.sinusoidal_positional_encoding(
290
+ spatial_dim=hist_dim,
291
+ feat_dim=self.key_dim_per_head,
292
+ base=10_000 ** (1 - 2 / self.key_dim_per_head.dimension),
293
+ ) # [T,D]
294
+ q = _apply_rope(
295
+ q,
296
+ (
297
+ rf.gather(pos_enc, axis=hist_dim, indices=hist_dim.dyn_size_ext - 1)
298
+ if axis == single_step_dim
299
+ else rf.replace_dim(pos_enc, in_dim=hist_dim, out_dim=axis)[0]
300
+ ),
301
+ self.key_dim_per_head,
302
+ )
303
+ k = _apply_rope(k, pos_enc, self.key_dim_per_head)
304
+
305
+ output = self.attention(q, k, v, kv_axis=hist_dim)
306
+ return output, new_state
307
+
308
+
309
+ def _apply_rope(x: Tensor, pos_enc: Tensor, feat_dim: Dim) -> Tensor:
310
+ """
311
+ :param x: [...,T,D] or [...,D]
312
+ :param pos_enc: [T,D] or [D]
313
+ :param feat_dim: D
314
+ :return: [...,T,D] or [...,D]
315
+ """
316
+ feat_half_dim = feat_dim.div_left(2)
317
+ pe_imag, pe_real = rf.split(pos_enc, axis=feat_dim, out_dims=[feat_half_dim] * 2) # [T,D/2]
318
+ # pe_imag = sin, pe_real = cos
319
+ d2 = Dim(2, name="complex")
320
+ x = rf.split_dims(x, axis=feat_dim, dims=(feat_half_dim, d2)) # [...,T,D/2,2]
321
+ x_real = rf.gather(x, indices=0, axis=d2)
322
+ x_imag = rf.gather(x, indices=1, axis=d2)
323
+ x_real_ = x_real * pe_real - x_imag * pe_imag
324
+ x_imag_ = x_real * pe_imag + x_imag * pe_real
325
+ x_, _ = rf.stack((x_real_, x_imag_), out_dim=d2) # [...,T,D/2,2]
326
+ x_, _ = rf.merge_dims(x_, dims=(feat_half_dim, d2), out_dim=feat_dim) # [...,T,D]
327
+ return x_
328
+
329
+
267
330
  class RelPosSelfAttention(SelfAttentionBase):
268
331
  """
269
332
  Self-attention with relative positional encoding.
@@ -836,7 +899,7 @@ def relative_positional_encoding(
836
899
  return emb, out_spatial_dim
837
900
 
838
901
 
839
- _positional_encoding_cache = weakref.WeakKeyDictionary() # run ctx -> (spatial_dim, feat_dim) -> enc
902
+ _sinusoidal_positional_encoding_cache = weakref.WeakKeyDictionary() # run ctx -> (spatial_dim, feat_dim) -> enc
840
903
 
841
904
 
842
905
  def sinusoidal_positional_encoding(
@@ -844,6 +907,7 @@ def sinusoidal_positional_encoding(
844
907
  spatial_dim: Dim,
845
908
  feat_dim: Dim,
846
909
  offset: Optional[Union[int, Tensor]] = None,
910
+ base: Union[int, float] = 1e4,
847
911
  dtype: Optional[str] = None,
848
912
  device: Optional[str] = None,
849
913
  ) -> Tensor:
@@ -867,8 +931,8 @@ def sinusoidal_positional_encoding(
867
931
  dtype = rf.get_default_float_dtype()
868
932
  if not device:
869
933
  device = rf.get_default_device()
870
- cache = _positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
871
- cache_key = (spatial_dim, feat_dim, offset, dtype, device)
934
+ cache = _sinusoidal_positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
935
+ cache_key = (spatial_dim, feat_dim, offset, base, dtype, device)
872
936
  if cache_key in cache:
873
937
  return cache[cache_key]
874
938
  import math
@@ -886,7 +950,7 @@ def sinusoidal_positional_encoding(
886
950
 
887
951
  feat2_dim = feat_dim.div_left(2)
888
952
  div_term = rf.exp(
889
- rf.range_over_dim(feat2_dim, dtype=dtype, device=device) * -(math.log(1e4) / (feat2_dim.dimension - 1))
953
+ rf.range_over_dim(feat2_dim, dtype=dtype, device=device) * -(math.log(base) / (feat2_dim.dimension - 1))
890
954
  )
891
955
  arg_sin = rf.combine_bc(rf.cast(indices, dtype), "*", div_term)
892
956
  arg_cos = arg_sin + math.pi / 2.0
@@ -0,0 +1,3 @@
1
+ """
2
+ Model conversion code, to import model parameters from some external source
3
+ """
@@ -0,0 +1,56 @@
1
+ """
2
+ Import the parameters from the HuggingFace Llama model.
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from typing import TYPE_CHECKING
7
+ import returnn.frontend as rf
8
+
9
+ if TYPE_CHECKING:
10
+ from transformers.models.llama.modeling_llama import LlamaAttention
11
+
12
+
13
+ def import_params_hf_llama_att_to_rf_rotary_att(model_hf: LlamaAttention, model_rf: rf.RotaryPosCausalSelfAttention):
14
+ """
15
+ Import the parameters from the HF attention module.
16
+ """
17
+ import torch
18
+
19
+ assert model_hf.num_heads == model_rf.num_heads.dimension
20
+ assert model_hf.hidden_size == model_rf.in_dim.dimension
21
+ dim = model_hf.hidden_size
22
+ nh = model_hf.num_heads
23
+ hdim = dim // nh
24
+
25
+ print("HF Model:")
26
+ print(model_hf)
27
+ print("Parameters:")
28
+ num_params_hf = 0
29
+ for k, v in model_hf.named_parameters():
30
+ print(f"{k}: {list(v.shape)} {v.dtype}")
31
+ num_params_hf += v.numel()
32
+ print("Total number of parameters:", num_params_hf)
33
+
34
+ print("RF Model:")
35
+ print(model_rf)
36
+ print("Parameters:")
37
+ num_params_rf = 0
38
+ for k, v in model_rf.named_parameters():
39
+ print(f"{k}: {list(v.dims)} {v.dtype}")
40
+ assert isinstance(v.raw_tensor, torch.nn.Parameter)
41
+ num_params_rf += v.num_elements()
42
+ print("Total number of parameters:", num_params_rf)
43
+ assert num_params_rf == num_params_hf
44
+
45
+ # Torch Linear: (out,in), but RF has (in,out).
46
+ q = model_hf.q_proj.weight.T.reshape(dim, nh, hdim) # (in,h,out/h)
47
+ k = model_hf.k_proj.weight.T.reshape(dim, nh, hdim) # (in,h,out/h)
48
+ v = model_hf.v_proj.weight.T.reshape(dim, nh, hdim) # (in,h,out/h)
49
+ q = q.reshape(dim, nh, 2, hdim // 2).transpose(-1, -2).flatten(-2) # reorder complex numbers
50
+ k = k.reshape(dim, nh, 2, hdim // 2).transpose(-1, -2).flatten(-2) # reorder complex numbers
51
+ qkv = torch.cat([q, k, v], dim=2) # (in,h,out/h*3)
52
+ qkv = qkv.reshape(dim, 3 * dim)
53
+ assert model_hf.q_proj.bias is None # not implemented
54
+ with torch.no_grad():
55
+ model_rf.qkv.weight.raw_tensor.copy_(qkv)
56
+ model_rf.proj.weight.raw_tensor.copy_(model_hf.o_proj.weight.T)