returnn 1.20241205.152736__tar.gz → 1.20241210.111636__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (469) hide show
  1. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/PKG-INFO +1 -1
  2. returnn-1.20241210.111636/_setup_info_generated.py +2 -0
  3. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/__main__.py +35 -34
  4. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/meta.py +6 -2
  5. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/attention.py +6 -2
  6. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/decoder/transformer.py +29 -14
  7. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/encoder/transformer.py +26 -10
  8. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/basic.py +54 -0
  9. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn.egg-info/PKG-INFO +1 -1
  10. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_attention.py +79 -1
  11. returnn-1.20241205.152736/_setup_info_generated.py +0 -2
  12. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/.editorconfig +0 -0
  13. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/.gitignore +0 -0
  14. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/.gitmodules +0 -0
  15. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/.kateconfig +0 -0
  16. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/CHANGELOG.md +0 -0
  17. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/CODEOWNERS +0 -0
  18. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/CONTRIBUTING.md +0 -0
  19. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/LICENSE +0 -0
  20. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/MANIFEST.in +0 -0
  21. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/README.rst +0 -0
  22. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/__init__.py +0 -0
  23. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/12AX.cluster_map +0 -0
  24. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/_setup_returnn_env.py +0 -0
  25. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-fwd.config +0 -0
  26. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-horovod-mpi.py +0 -0
  27. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-horovod-mpi.py.sh +0 -0
  28. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-horovod-mpi.sh +0 -0
  29. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-hyper-param-tuning.config +0 -0
  30. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-iter-dataset.py +0 -0
  31. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-list-devices.py +0 -0
  32. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-lua-torch-layer.config +0 -0
  33. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-pretrain.config +0 -0
  34. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-record-and-push-to-webserver.py +0 -0
  35. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-returnn-as-framework.py +0 -0
  36. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-rf-pt-benchmark.py +0 -0
  37. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-rf.config +0 -0
  38. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-rhn-enwik8.config +0 -0
  39. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-sprint-interface.py +0 -0
  40. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-att-copy.config +0 -0
  41. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-attention.config +0 -0
  42. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  43. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  44. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-enc-dec.config +0 -0
  45. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-hard-att-copy.config +0 -0
  46. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-lstm-benchmark.py +0 -0
  47. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  48. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  49. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-native-lstm.12ax.config +0 -0
  50. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  51. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  52. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  53. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  54. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  55. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-rec-self-att.config +0 -0
  56. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-search-compiled-graph.py +0 -0
  57. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  58. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-timit-lstm-ctc.config +0 -0
  59. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-torch.config +0 -0
  60. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  61. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/demo.sh +0 -0
  62. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  63. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  64. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  65. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/README.txt +0 -0
  66. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/chars.txt +0 -0
  67. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/config_demo +0 -0
  68. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/config_fwd +0 -0
  69. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/config_real +0 -0
  70. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  71. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/decode.py +0 -0
  72. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  73. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/go.sh +0 -0
  74. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/lines.txt +0 -0
  75. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/split/eval.txt +0 -0
  76. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/split/train.txt +0 -0
  77. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/IAM/split/valid.txt +0 -0
  78. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/README.md +0 -0
  79. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  80. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/artificial/forwardconfig +0 -0
  81. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/artificial/go.sh +0 -0
  82. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/artificial/trainconfig +0 -0
  83. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  84. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  85. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  86. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  87. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/pyproject.toml +0 -0
  88. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/requirements.txt +0 -0
  89. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/__init__.py +0 -0
  90. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/__old_mod_loader__.py +0 -0
  91. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/__setup__.py +0 -0
  92. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/config.py +0 -0
  93. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/__init__.py +0 -0
  94. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/audio.py +0 -0
  95. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/basic.py +0 -0
  96. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/bundle_file.py +0 -0
  97. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/cached.py +0 -0
  98. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/cached2.py +0 -0
  99. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/distrib_files.py +0 -0
  100. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/generating.py +0 -0
  101. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/hdf.py +0 -0
  102. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/lm.py +0 -0
  103. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/map.py +0 -0
  104. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/multi_proc.py +0 -0
  105. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/normalization_data.py +0 -0
  106. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/numpy_dump.py +0 -0
  107. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/postprocessing.py +0 -0
  108. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/raw_wav.py +0 -0
  109. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/sprint.py +0 -0
  110. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/stereo.py +0 -0
  111. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/util/__init__.py +0 -0
  112. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/util/feature_extraction.py +0 -0
  113. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/util/strings.py +0 -0
  114. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/datasets/util/vocabulary.py +0 -0
  115. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/engine/__init__.py +0 -0
  116. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/engine/base.py +0 -0
  117. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/engine/batch.py +0 -0
  118. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/__init__.py +0 -0
  119. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/__main__.py +0 -0
  120. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  121. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  122. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  123. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  124. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  125. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  126. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  127. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  128. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  129. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  130. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  131. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  132. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  133. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  134. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  135. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  136. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  137. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  138. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  139. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  140. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  141. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  142. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  143. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  144. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  145. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/__init__.py +0 -0
  146. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/graph_editor/README.md +0 -0
  147. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/graph_editor/__init__.py +0 -0
  148. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/graph_editor/edit.py +0 -0
  149. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/graph_editor/reroute.py +0 -0
  150. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/graph_editor/select.py +0 -0
  151. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/graph_editor/subgraph.py +0 -0
  152. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/graph_editor/transform.py +0 -0
  153. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/extern/graph_editor/util.py +0 -0
  154. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/forward_iface.py +0 -0
  155. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/__init__.py +0 -0
  156. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_backend.py +0 -0
  157. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_native/__init__.py +0 -0
  158. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_native/backend.cpp +0 -0
  159. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_native/backend.hpp +0 -0
  160. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_native/module.cpp +0 -0
  161. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_native/module.hpp +0 -0
  162. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_native/py_utils.hpp +0 -0
  163. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  164. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  165. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_numpy_backend.py +0 -0
  166. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_random_journal.py +0 -0
  167. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/_utils.py +0 -0
  168. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/array_.py +0 -0
  169. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/audio/__init__.py +0 -0
  170. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/audio/mel.py +0 -0
  171. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/audio/specaugment.py +0 -0
  172. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/backend.py +0 -0
  173. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/build_from_dict.py +0 -0
  174. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/cond.py +0 -0
  175. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/const.py +0 -0
  176. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/container.py +0 -0
  177. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/control_flow_ctx.py +0 -0
  178. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/conv.py +0 -0
  179. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/conversions/__init__.py +0 -0
  180. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  181. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/conversions/hf_llama.py +0 -0
  182. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/conversions/torch_nn.py +0 -0
  183. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/decoder/__init__.py +0 -0
  184. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/device.py +0 -0
  185. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/dims.py +0 -0
  186. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/dropout.py +0 -0
  187. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/dtype.py +0 -0
  188. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/encoder/__init__.py +0 -0
  189. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/encoder/base.py +0 -0
  190. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/encoder/conformer.py +0 -0
  191. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/encoder/e_branchformer.py +0 -0
  192. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/gradient.py +0 -0
  193. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/graph.py +0 -0
  194. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/hooks.py +0 -0
  195. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/init.py +0 -0
  196. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/label_smoothing.py +0 -0
  197. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/linear.py +0 -0
  198. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/loop.py +0 -0
  199. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/loss.py +0 -0
  200. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/math_.py +0 -0
  201. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/matmul.py +0 -0
  202. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/module.py +0 -0
  203. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/normalization.py +0 -0
  204. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/parameter.py +0 -0
  205. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/parametrizations.py +0 -0
  206. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/parametrize.py +0 -0
  207. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/piecewise_linear.py +0 -0
  208. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/rand.py +0 -0
  209. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/rec.py +0 -0
  210. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/reduce.py +0 -0
  211. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/run_ctx.py +0 -0
  212. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/signal.py +0 -0
  213. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/state.py +0 -0
  214. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/stepwise_scheduler.py +0 -0
  215. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/tensor_array.py +0 -0
  216. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/frontend/types.py +0 -0
  217. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/import_/__init__.py +0 -0
  218. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/import_/common.py +0 -0
  219. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/import_/git.py +0 -0
  220. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/import_/import_.py +0 -0
  221. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/learning_rate_control.py +0 -0
  222. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/log.py +0 -0
  223. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/native_op.cpp +0 -0
  224. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/native_op.py +0 -0
  225. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/pretrain.py +0 -0
  226. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/sprint/__init__.py +0 -0
  227. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/sprint/cache.py +0 -0
  228. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/sprint/control.py +0 -0
  229. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/sprint/error_signals.py +0 -0
  230. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/sprint/extern_interface.py +0 -0
  231. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/sprint/interface.py +0 -0
  232. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/README.md +0 -0
  233. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/__init__.py +0 -0
  234. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/_dim_extra.py +0 -0
  235. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/_tensor_extra.py +0 -0
  236. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/_tensor_mixin_base.py +0 -0
  237. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/_tensor_op_overloads.py +0 -0
  238. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/control_flow_ctx.py +0 -0
  239. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/dim.py +0 -0
  240. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/marked_dim.py +0 -0
  241. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/tensor.py +0 -0
  242. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/tensor_dict.py +0 -0
  243. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tensor/utils.py +0 -0
  244. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/__init__.py +0 -0
  245. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/compat.py +0 -0
  246. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/data_pipeline.py +0 -0
  247. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/distributed.py +0 -0
  248. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/engine.py +0 -0
  249. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/README.md +0 -0
  250. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/__init__.py +0 -0
  251. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/_backend.py +0 -0
  252. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/_utils.py +0 -0
  253. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/cond.py +0 -0
  254. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  255. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  256. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/dims.py +0 -0
  257. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/layer.py +0 -0
  258. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/loop.py +0 -0
  259. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/make_layer.py +0 -0
  260. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  261. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  262. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  263. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_low_level/__init__.py +0 -0
  264. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/frontend_low_level/_backend.py +0 -0
  265. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/horovod.py +0 -0
  266. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/hyper_param_tuning.py +0 -0
  267. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/layers/__init__.py +0 -0
  268. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/layers/base.py +0 -0
  269. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/layers/basic.py +0 -0
  270. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/layers/rec.py +0 -0
  271. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/layers/segmental_model.py +0 -0
  272. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/layers/signal_processing.py +0 -0
  273. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/layers/variable.py +0 -0
  274. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/native_op.py +0 -0
  275. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/network.py +0 -0
  276. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/sprint.py +0 -0
  277. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/updater.py +0 -0
  278. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/util/__init__.py +0 -0
  279. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/util/basic.py +0 -0
  280. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/util/data.py +0 -0
  281. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/util/gradient_checkpoint.py +0 -0
  282. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/util/ken_lm.py +0 -0
  283. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/tf/util/open_fst.py +0 -0
  284. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/README.md +0 -0
  285. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/__init__.py +0 -0
  286. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/data/__init__.py +0 -0
  287. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/data/extern_data.py +0 -0
  288. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/data/pipeline.py +0 -0
  289. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/data/queued_data_iter.py +0 -0
  290. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  291. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/data/tensor_utils.py +0 -0
  292. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/distributed.py +0 -0
  293. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/engine.py +0 -0
  294. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/frontend/__init__.py +0 -0
  295. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/frontend/_backend.py +0 -0
  296. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/frontend/_rand.py +0 -0
  297. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/frontend/bridge.py +0 -0
  298. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/frontend/raw_ops.py +0 -0
  299. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/optim/README.md +0 -0
  300. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/optim/__init__.py +0 -0
  301. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/optim/lion.py +0 -0
  302. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/updater.py +0 -0
  303. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/README.md +0 -0
  304. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/__init__.py +0 -0
  305. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/array_.py +0 -0
  306. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/debug_inf_nan.py +0 -0
  307. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/diagnose_gpu.py +0 -0
  308. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/exception_helper.py +0 -0
  309. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/gradient_checkpoint.py +0 -0
  310. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/module.py +0 -0
  311. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/torch/util/scaled_gradient.py +0 -0
  312. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/__init__.py +0 -0
  313. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/better_exchook.py +0 -0
  314. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/bpe.py +0 -0
  315. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/debug.py +0 -0
  316. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/debug_helpers.py +0 -0
  317. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/file_cache.py +0 -0
  318. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/fsa.py +0 -0
  319. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/literal_py_to_pickle.py +0 -0
  320. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/math.py +0 -0
  321. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  322. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/native_code_compiler.py +0 -0
  323. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/pprint.py +0 -0
  324. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/py-to-pickle.cpp +0 -0
  325. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/py_compat.py +0 -0
  326. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/py_ext_mod_compiler.py +0 -0
  327. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/result_with_reason.py +0 -0
  328. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/sig_proc.py +0 -0
  329. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/task_system.py +0 -0
  330. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/train_proc_manager.py +0 -0
  331. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn/util/watch_memory.py +0 -0
  332. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn.egg-info/SOURCES.txt +0 -0
  333. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn.egg-info/dependency_links.txt +0 -0
  334. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/returnn.egg-info/top_level.txt +0 -0
  335. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/rnn.py +0 -0
  336. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/setup.cfg +0 -0
  337. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/setup.py +0 -0
  338. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/DummySprintExec.py +0 -0
  339. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm-inspection-profile.xml +0 -0
  340. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/.gitignore +0 -0
  341. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/.name +0 -0
  342. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  343. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  344. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  345. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  346. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  347. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/misc.xml +0 -0
  348. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/modules.xml +0 -0
  349. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/returnn.iml +0 -0
  350. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  351. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/_set_num_threads1.py +0 -0
  352. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/_setup_returnn_env.py +0 -0
  353. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/_setup_test_env.py +0 -0
  354. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/bpe-unicode-demo.codes +0 -0
  355. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/bpe-unicode-demo.vocab +0 -0
  356. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/lexicon_opt.fst +0 -0
  357. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/lexicon_opt.isyms +0 -0
  358. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/lexicon_opt.jpg +0 -0
  359. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/lexicon_opt.osyms +0 -0
  360. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/lint_common.py +0 -0
  361. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/pycharm-inspect.py +0 -0
  362. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/pylint.py +0 -0
  363. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/returnn-as-framework.py +0 -0
  364. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/rf_utils.py +0 -0
  365. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/spelling.dic +0 -0
  366. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_Config.py +0 -0
  367. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_Dataset.py +0 -0
  368. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_Fsa.py +0 -0
  369. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_GeneratingDataset.py +0 -0
  370. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_HDFDataset.py +0 -0
  371. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_LearningRateControl.py +0 -0
  372. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_Log.py +0 -0
  373. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_MultiProcDataset.py +0 -0
  374. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_Pretrain.py +0 -0
  375. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_ResNet.py +0 -0
  376. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_SprintDataset.py +0 -0
  377. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_SprintInterface.py +0 -0
  378. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TFEngine.py +0 -0
  379. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TFNativeOp.py +0 -0
  380. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TFNetworkLayer.py +0 -0
  381. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TFNetworkRecLayer.py +0 -0
  382. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TFNetworkSigProcLayer.py +0 -0
  383. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TFUpdater.py +0 -0
  384. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TFUtil.py +0 -0
  385. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TF_determinism.py +0 -0
  386. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TaskSystem.py +0 -0
  387. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TaskSystem_SharedMem.py +0 -0
  388. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_TranslationDataset.py +0 -0
  389. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_Util.py +0 -0
  390. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_demos.py +0 -0
  391. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_fork_exec.py +0 -0
  392. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_hdf_dump.py +0 -0
  393. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_array.py +0 -0
  394. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_base.py +0 -0
  395. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_cond.py +0 -0
  396. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_const.py +0 -0
  397. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_container.py +0 -0
  398. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_conv.py +0 -0
  399. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_decoder_transformer.py +0 -0
  400. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_encoder_conformer.py +0 -0
  401. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_gradient.py +0 -0
  402. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_label_smoothing.py +0 -0
  403. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_loop.py +0 -0
  404. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_math.py +0 -0
  405. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_normalization.py +0 -0
  406. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_piecewise_linear.py +0 -0
  407. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_rec.py +0 -0
  408. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_reduce.py +0 -0
  409. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_rf_signal.py +0 -0
  410. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_tensor.py +0 -0
  411. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_tools.py +0 -0
  412. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_torch_dataset.py +0 -0
  413. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_torch_engine.py +0 -0
  414. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_torch_frontend.py +0 -0
  415. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_torch_internal_frontend.py +0 -0
  416. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/test_torch_util.py +0 -0
  417. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tests/torch_utils.py +0 -0
  418. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/_setup_returnn_env.py +0 -0
  419. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/analyze-dataset-batches.py +0 -0
  420. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/bliss-collect-seq-lens.py +0 -0
  421. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/bliss-dump-text.py +0 -0
  422. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/bliss-get-segment-names.py +0 -0
  423. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/bliss-to-ogg-zip.py +0 -0
  424. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/bpe-create-lexicon.py +0 -0
  425. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/calculate-word-error-rate.py +0 -0
  426. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/cleanup-old-models.py +0 -0
  427. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/collect-orth-symbols.py +0 -0
  428. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/collect-words.py +0 -0
  429. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/compile_native_op.py +0 -0
  430. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/compile_tf_graph.py +0 -0
  431. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/debug-dump-search-scores.py +0 -0
  432. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/debug-plot-search-scores.py +0 -0
  433. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/dump-dataset-raw-strings.py +0 -0
  434. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/dump-dataset.py +0 -0
  435. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/dump-forward-stats.py +0 -0
  436. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/dump-forward.py +0 -0
  437. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/dump-network-json.py +0 -0
  438. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/dump-pickle.py +0 -0
  439. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/extract_state_tying_from_dataset.py +0 -0
  440. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/get-attention-weights.py +0 -0
  441. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/get-best-model-epoch.py +0 -0
  442. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/hdf_dump.py +0 -0
  443. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/hdf_dump_translation_dataset.py +0 -0
  444. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/import-blocks-mt-model.py +0 -0
  445. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/import-t2t-mt-model.py +0 -0
  446. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/.gitignore +0 -0
  447. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/Makefile +0 -0
  448. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/README.md +0 -0
  449. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/example/README.md +0 -0
  450. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/example/libs_list +0 -0
  451. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  452. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  453. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  454. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/example/state_vars_list +0 -0
  455. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  456. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/file.h +0 -0
  457. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  458. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  459. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/main.cc +0 -0
  460. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/rescorer.h +0 -0
  461. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/vocabulary.cc +0 -0
  462. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/lattice_rescorer/vocabulary.h +0 -0
  463. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/tf_avg_checkpoints.py +0 -0
  464. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/tf_inspect_checkpoint.py +0 -0
  465. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/tf_inspect_summary_log.py +0 -0
  466. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/torch_avg_checkpoints.py +0 -0
  467. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/torch_export_to_onnx.py +0 -0
  468. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/torch_inspect_checkpoint.py +0 -0
  469. {returnn-1.20241205.152736 → returnn-1.20241210.111636}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241205.152736
3
+ Version: 1.20241210.111636
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20241210.111636'
2
+ long_version = '1.20241210.111636+git.b091bd6'
@@ -452,40 +452,41 @@ def init(config_filename=None, command_line_options=(), config_updates=None, ext
452
452
  :param dict[str]|None config_updates: see :func:`init_config`
453
453
  :param str|None extra_greeting:
454
454
  """
455
- debug_util.init_better_exchook()
456
- util.init_thread_join_hack()
457
- init_config(
458
- config_filename=config_filename, command_line_options=command_line_options, extra_updates=config_updates
459
- )
460
- if config.bool("use_train_proc_manager", False):
461
- from returnn.util.train_proc_manager import maybe_start_train_proc_manager
462
-
463
- maybe_start_train_proc_manager(config=config)
464
- if config.bool("patch_atfork", False):
465
- from returnn.util.basic import maybe_restart_returnn_with_atfork_patch
466
-
467
- maybe_restart_returnn_with_atfork_patch()
468
- init_log()
469
- if extra_greeting:
470
- print(extra_greeting, file=log.v1)
471
- returnn_greeting(config_filename=config_filename, command_line_options=command_line_options)
472
- debug_util.init_faulthandler()
473
- if config.bool("watch_memory", False):
474
- from returnn.util.watch_memory import watch_memory
475
-
476
- watch_memory()
477
- init_backend_engine()
478
- if config.bool("ipython", False):
479
- debug_util.init_ipython_kernel()
480
- if config.typed_value("startup_callback"):
481
- startup_callback = config.typed_value("startup_callback")
482
- startup_callback(config=config)
483
- if need_data():
484
- if config.bool("use_dummy_datasets", False):
485
- setup_dummy_datasets()
486
- init_data()
487
- print_task_properties()
488
- init_engine()
455
+ with util.ReportImportedDevModules(description="RETURNN init"):
456
+ debug_util.init_better_exchook()
457
+ util.init_thread_join_hack()
458
+ init_config(
459
+ config_filename=config_filename, command_line_options=command_line_options, extra_updates=config_updates
460
+ )
461
+ if config.bool("use_train_proc_manager", False):
462
+ from returnn.util.train_proc_manager import maybe_start_train_proc_manager
463
+
464
+ maybe_start_train_proc_manager(config=config)
465
+ if config.bool("patch_atfork", False):
466
+ from returnn.util.basic import maybe_restart_returnn_with_atfork_patch
467
+
468
+ maybe_restart_returnn_with_atfork_patch()
469
+ init_log()
470
+ if extra_greeting:
471
+ print(extra_greeting, file=log.v1)
472
+ returnn_greeting(config_filename=config_filename, command_line_options=command_line_options)
473
+ debug_util.init_faulthandler()
474
+ if config.bool("watch_memory", False):
475
+ from returnn.util.watch_memory import watch_memory
476
+
477
+ watch_memory()
478
+ init_backend_engine()
479
+ if config.bool("ipython", False):
480
+ debug_util.init_ipython_kernel()
481
+ if config.typed_value("startup_callback"):
482
+ startup_callback = config.typed_value("startup_callback")
483
+ startup_callback(config=config)
484
+ if need_data():
485
+ if config.bool("use_dummy_datasets", False):
486
+ setup_dummy_datasets()
487
+ init_data()
488
+ print_task_properties()
489
+ init_engine()
489
490
 
490
491
 
491
492
  def finalize(error_occurred=False):
@@ -333,20 +333,24 @@ class MetaDataset(CachedDataset2):
333
333
  file=log.v1,
334
334
  )
335
335
  other_tags = self.datasets[key].get_all_tags()
336
+ other_tags_set = set(other_tags)
336
337
  for tag in seq_list:
337
- if tag not in other_tags:
338
+ if tag not in other_tags_set:
338
339
  print(
339
340
  "Seq tag %r in dataset %r but not in dataset %r." % (tag, self.default_dataset_key, key),
340
341
  file=log.v1,
341
342
  )
342
343
  break # only print one
344
+ del other_tags_set
345
+ seq_list_set = set(seq_list)
343
346
  for tag in other_tags:
344
- if tag not in seq_list:
347
+ if tag not in seq_list_set:
345
348
  print(
346
349
  "Seq tag %r in dataset %r but not in dataset %r." % (tag, key, self.default_dataset_key),
347
350
  file=log.v1,
348
351
  )
349
352
  break # only print one
353
+ del seq_list_set
350
354
  raise Exception("Dataset %r is missing seqs." % key)
351
355
  elif isinstance(seq_list_file, str):
352
356
  seq_list = Dataset._load_seq_list_file(seq_list_file, expect_list=False)
@@ -325,7 +325,7 @@ class RotaryPosCausalSelfAttention(CausalSelfAttention):
325
325
  q = _apply_rope(
326
326
  q,
327
327
  (
328
- rf.gather(pos_enc, axis=hist_dim, indices=hist_dim.dyn_size_ext - 1)
328
+ rf.gather(pos_enc, axis=hist_dim, indices=hist_dim.get_size_tensor() - 1)
329
329
  if axis == single_step_dim
330
330
  else rf.replace_dim(pos_enc, in_dim=hist_dim, out_dim=axis)[0]
331
331
  ),
@@ -503,6 +503,8 @@ def _rel_pos_enc_shift(x: Tensor, axis: Dim, pos_emb_spatial_dim: Dim, hist_dim:
503
503
  :param hist_dim: T' (equal to T but separate dim)
504
504
  :return: [B,H,T,T']
505
505
  """
506
+ if pos_emb_spatial_dim == hist_dim: # happens for single_step_dim
507
+ return x # no shift needed
506
508
  batch_dims = x.remaining_dims((axis, pos_emb_spatial_dim))
507
509
  x_padded, (pos_emb_spatial_dim_,) = rf.pad(
508
510
  x, axes=[pos_emb_spatial_dim], padding=[(1, 0)], value=0.0
@@ -604,6 +606,7 @@ class RelPosCausalSelfAttention(CausalSelfAttention):
604
606
  pos_emb, pos_emb_spatial_dim = relative_positional_encoding(
605
607
  query_spatial_dim=axis, key_value_spatial_dim=hist_dim, feat_dim=self.pos_emb_feat_dim
606
608
  )
609
+ # pos_emb_spatial_dim is 2*time1-1 if axis!=single_step_dim, else time1
607
610
  if self.pos_emb_dropout:
608
611
  pos_emb = rf.dropout(pos_emb, self.pos_emb_dropout)
609
612
  if self.linear_pos is not None:
@@ -850,7 +853,8 @@ def _make_indices(
850
853
  if query_spatial_dim == single_step_dim:
851
854
  indices = kv_pos_vec
852
855
  out_spatial_dim = key_value_spatial_dim
853
- assert query_offset is None # not sure if any custom query offset makes sense?
856
+ # not sure if any custom query offset makes sense?
857
+ assert query_offset is None or (isinstance(query_offset, int) and query_offset == 0)
854
858
  # Assume the kv are the accumulated history, and query is cur frame of it,
855
859
  # corresponding to the last frame of the kv.
856
860
  query_offset = key_value_spatial_dim.get_size_tensor() - 1
@@ -45,14 +45,15 @@ class TransformerDecoder(rf.Module):
45
45
  num_heads: int = 8,
46
46
  att_dropout: float = 0.1,
47
47
  norm: Union[type, Dict[str, Any], rf.Module, Callable] = rf.LayerNorm,
48
- decoder_layer: Optional[Union[TransformerDecoderLayer, rf.Module, type, Any]] = None,
49
- decoder_layer_opts: Optional[Dict[str, Any]] = None,
48
+ layer: Optional[Union[TransformerDecoderLayer, rf.Module, type, Dict[str, Any], Any]] = None,
49
+ layer_opts: Optional[Dict[str, Any]] = None,
50
50
  embed_dim: Optional[Dim] = None,
51
51
  share_embedding: bool = None,
52
52
  input_embedding_scale: float = None,
53
53
  input_dropout: float = None,
54
54
  logits_with_bias: bool = False,
55
55
  sequential=rf.Sequential,
56
+ **compat_kwargs,
56
57
  ):
57
58
  """
58
59
  :param encoder_dim: for cross-attention. None if no cross-attention.
@@ -67,8 +68,8 @@ class TransformerDecoder(rf.Module):
67
68
  :param num_heads: the number of attention heads
68
69
  :param att_dropout: attention dropout value
69
70
  :param norm: pre-normalization for FF and attention blocks
70
- :param decoder_layer: an instance of :class:`TransformerDecoderLayer` or similar
71
- :param decoder_layer_opts: options for the encoder layer
71
+ :param layer: an instance of :class:`TransformerDecoderLayer` or similar
72
+ :param layer_opts: options for the decoder layer
72
73
  :param embed_dim: if given, will first have an embedding [vocab,embed] and then a linear [embed,model].
73
74
  :param share_embedding:
74
75
  :param input_embedding_scale:
@@ -78,6 +79,16 @@ class TransformerDecoder(rf.Module):
78
79
  """
79
80
  super().__init__()
80
81
 
82
+ if compat_kwargs:
83
+ if "decoder_layer" in compat_kwargs: # compatibility, we used to have this before
84
+ assert layer is None
85
+ layer = compat_kwargs.pop("decoder_layer")
86
+ if "decoder_layer_opts" in compat_kwargs: # compatibility, we used to have this before
87
+ assert layer_opts is None
88
+ layer_opts = compat_kwargs.pop("decoder_layer_opts")
89
+ if compat_kwargs:
90
+ raise TypeError(f"unexpected kwargs {compat_kwargs!r}")
91
+
81
92
  if not isinstance(vocab_dim, Dim):
82
93
  raise TypeError(f"TransformerDecoder: unexpected vocab_dim {vocab_dim!r} type {type(vocab_dim)}")
83
94
  if isinstance(model_dim, int):
@@ -136,8 +147,8 @@ class TransformerDecoder(rf.Module):
136
147
  input_dropout = dropout if BehaviorVersion.get() >= 20 else 0.0
137
148
  self.input_dropout = input_dropout
138
149
 
139
- if not decoder_layer or isinstance(decoder_layer, type):
140
- decoder_layer_opts_ = dict(
150
+ if not layer or isinstance(layer, (dict, type)):
151
+ layer_opts_ = dict(
141
152
  encoder_dim=encoder_dim,
142
153
  out_dim=model_dim,
143
154
  ff=ff,
@@ -148,16 +159,20 @@ class TransformerDecoder(rf.Module):
148
159
  att_dropout=att_dropout,
149
160
  norm=norm,
150
161
  )
151
- if decoder_layer_opts:
152
- decoder_layer_opts_.update(decoder_layer_opts)
153
- if not decoder_layer:
154
- decoder_layer = TransformerDecoderLayer(**decoder_layer_opts_)
155
- elif isinstance(decoder_layer, type):
156
- decoder_layer = decoder_layer(**decoder_layer_opts_)
162
+ layer_opts_ = {k: v for (k, v) in layer_opts_.items() if v is not NotSpecified}
163
+ if layer_opts:
164
+ layer_opts_.update(layer_opts)
165
+ if not layer:
166
+ layer = TransformerDecoderLayer(**layer_opts_)
167
+ elif isinstance(layer, type):
168
+ layer = layer(**layer_opts_)
169
+ elif isinstance(layer, dict):
170
+ layer_opts_ = {k: v for (k, v) in layer_opts_.items() if k not in layer}
171
+ layer = rf.build_from_dict(layer, **layer_opts_)
157
172
  else:
158
- raise TypeError(f"unexpected decoder_layer {decoder_layer!r}")
173
+ raise TypeError(f"unexpected layer {layer!r}")
159
174
 
160
- self.layers = sequential(_copy.deepcopy(decoder_layer) for _ in range(num_layers))
175
+ self.layers = sequential(_copy.deepcopy(layer) for _ in range(num_layers))
161
176
 
162
177
  self.final_layer_norm = make_norm(norm, model_dim)
163
178
 
@@ -32,11 +32,13 @@ class TransformerEncoder(rf.Module):
32
32
  num_heads: int = 8,
33
33
  att_dropout: float = 0.1,
34
34
  norm: Union[type, Dict[str, Any], rf.Module, Callable] = rf.LayerNorm,
35
- decoder_layer: Optional[Union[TransformerEncoderLayer, rf.Module, type, Any]] = None,
35
+ layer: Optional[Union[TransformerEncoderLayer, rf.Module, type, Dict[str, Any], Any]] = None,
36
+ layer_opts: Optional[Dict[str, Any]] = None,
36
37
  embed_dim: Optional[Dim] = None,
37
38
  input_embedding_scale: float = None,
38
39
  input_dropout: float = None,
39
40
  sequential=rf.Sequential,
41
+ **compat_kwargs,
40
42
  ):
41
43
  """
42
44
  :param vocab_dim:
@@ -48,7 +50,8 @@ class TransformerEncoder(rf.Module):
48
50
  :param num_heads: the number of attention heads
49
51
  :param att_dropout: attention dropout value
50
52
  :param norm: pre-normalization for FF and attention blocks
51
- :param decoder_layer: an instance of :class:`TransformerDecoderLayer` or similar
53
+ :param layer: an instance of :class:`TransformerEncoderLayer` or similar
54
+ :param layer_opts: options for the encoder layer
52
55
  :param embed_dim: if given, will first have an embedding [vocab,embed] and then a linear [embed,model].
53
56
  :param input_embedding_scale:
54
57
  :param input_dropout:
@@ -56,6 +59,13 @@ class TransformerEncoder(rf.Module):
56
59
  """
57
60
  super().__init__()
58
61
 
62
+ if compat_kwargs:
63
+ if "decoder_layer" in compat_kwargs: # compatibility, we (weirdly) used to have this before
64
+ assert layer is None
65
+ layer = compat_kwargs.pop("decoder_layer")
66
+ if compat_kwargs:
67
+ raise TypeError(f"unexpected kwargs {compat_kwargs!r}")
68
+
59
69
  if not isinstance(vocab_dim, Dim):
60
70
  raise TypeError(f"TransformerDecoder: unexpected vocab_dim {vocab_dim!r} type {type(vocab_dim)}")
61
71
  if isinstance(model_dim, int):
@@ -97,8 +107,8 @@ class TransformerEncoder(rf.Module):
97
107
  input_dropout = dropout
98
108
  self.input_dropout = input_dropout
99
109
 
100
- if not decoder_layer or isinstance(decoder_layer, type):
101
- decoder_layer_opts_ = dict(
110
+ if not layer or isinstance(layer, (dict, type)):
111
+ layer_opts_ = dict(
102
112
  out_dim=model_dim,
103
113
  ff=ff,
104
114
  dropout=dropout,
@@ -106,14 +116,20 @@ class TransformerEncoder(rf.Module):
106
116
  att_dropout=att_dropout,
107
117
  norm=norm,
108
118
  )
109
- if not decoder_layer:
110
- decoder_layer = TransformerEncoderLayer(**decoder_layer_opts_)
111
- elif isinstance(decoder_layer, type):
112
- decoder_layer = decoder_layer(**decoder_layer_opts_)
119
+ layer_opts_ = {k: v for (k, v) in layer_opts_.items() if v is not NotSpecified}
120
+ if layer_opts:
121
+ layer_opts_.update(layer_opts)
122
+ if not layer:
123
+ layer = TransformerEncoderLayer(**layer_opts_)
124
+ elif isinstance(layer, type):
125
+ layer = layer(**layer_opts_)
126
+ elif isinstance(layer, dict):
127
+ layer_opts_ = {k: v for (k, v) in layer_opts_.items() if k not in layer}
128
+ layer = rf.build_from_dict(layer, **layer_opts_)
113
129
  else:
114
- raise TypeError(f"unexpected decoder_layer {decoder_layer!r}")
130
+ raise TypeError(f"unexpected layer {layer!r}")
115
131
 
116
- self.layers = sequential(_copy.deepcopy(decoder_layer) for _ in range(num_layers))
132
+ self.layers = sequential(_copy.deepcopy(layer) for _ in range(num_layers))
117
133
 
118
134
  self.final_layer_norm = make_norm(norm, model_dim)
119
135
 
@@ -564,6 +564,60 @@ def get_tensorflow_version_tuple() -> Tuple[int, ...]:
564
564
  return tuple([int(re.sub("(-rc[0-9]|-dev[0-9]*)", "", s)) for s in tf.__version__.split(".")])
565
565
 
566
566
 
567
+ class ReportImportedDevModules:
568
+ """
569
+ This is supposed to be used as a context manager.
570
+ We track all additionally loaded modules during this context, and also extensions to sys.path.
571
+ We try to detect if such loaded module is inside a Git repository, and if so, report the Git commit.
572
+ """
573
+
574
+ def __init__(self, *, description: str):
575
+ self.description = description
576
+ self.ignore_sys_path: Optional[Set[str]] = None
577
+ self.ignore_sys_modules: Optional[Set[str]] = None
578
+
579
+ def __enter__(self):
580
+ self.ignore_sys_path = set(sys.path)
581
+ self.ignore_sys_modules = set(sys.modules)
582
+ self.ignore_sys_modules.add("__mp_main__")
583
+
584
+ def __exit__(self, exc_type, exc_val, exc_tb):
585
+ if not log.verbose: # it might have never been initialized due to some error, or forked proc
586
+ return
587
+ if not log.verbose[3]:
588
+ return
589
+ if exc_type:
590
+ return
591
+ print(f"Tracked changes to sys.path and sys.modules during {self.description}:", file=log.v4)
592
+ has_changes = False
593
+ for path in sys.path:
594
+ if path not in self.ignore_sys_path:
595
+ print("New sys.path entry:", path, file=log.v3)
596
+ has_changes = True
597
+ for mod_name, mod in sys.modules.items():
598
+ if "." not in mod_name and mod_name not in self.ignore_sys_modules:
599
+ if hasattr(mod, "__file__") and mod.__file__:
600
+ # __file__ is e.g. ".../recipe/i6_experiments/__init__.py"
601
+ mod_dir = os.path.dirname(mod.__file__) # e.g. ".../recipe/i6_experiments"
602
+ if os.path.exists(mod_dir + "/.git"):
603
+ git_dir = mod_dir
604
+ elif os.path.exists(mod_dir + "/../.git"):
605
+ # Use realpath because the mod dir might be a symlink.
606
+ git_dir = os.path.dirname(os.path.realpath(mod_dir))
607
+ else:
608
+ git_dir = None
609
+ if git_dir:
610
+ try:
611
+ git_info = git_describe_head_version(git_dir=git_dir)
612
+ except Exception as e:
613
+ git_info = f"<git-error: {e}>"
614
+ mod_info = "(%s in %s)" % (git_info, mod_dir)
615
+ print("New module:", mod_name, mod_info, file=log.v3)
616
+ has_changes = True
617
+ if not has_changes:
618
+ print("(No changes to sys.modules or sys.path.)", file=log.v4)
619
+
620
+
567
621
  def eval_shell_env(token):
568
622
  """
569
623
  :param str token:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241205.152736
3
+ Version: 1.20241210.111636
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -3,7 +3,7 @@ RETURNN frontend (returnn.frontend) tests
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Tuple
6
+ from typing import Union, Tuple
7
7
  import numpy as np
8
8
  import numpy.testing
9
9
  import _setup_test_env # noqa
@@ -440,6 +440,84 @@ def test_rope_causal_self_att():
440
440
  print(" all matched!")
441
441
 
442
442
 
443
+ def test_causal_self_att_variants_single_step_vs_full_seq():
444
+ from returnn.tensor import single_step_dim
445
+
446
+ time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
447
+ in_dim = Dim(7 * 2, name="in")
448
+ extern_data = TensorDict(
449
+ {
450
+ "data": Tensor("data", [batch_dim, time_dim, in_dim], dtype="float32"),
451
+ }
452
+ )
453
+
454
+ # noinspection PyShadowingNames
455
+ def _forward_step(*, model: Union[rf.CausalSelfAttention], extern_data: TensorDict):
456
+ x = extern_data["data"]
457
+
458
+ out_seq_level, _ = model(x, axis=time_dim)
459
+ out_seq_level.mark_as_output("out_seq_level", shape=[batch_dim, time_dim, model.out_dim])
460
+
461
+ out_seq_level_explicit_initial_state, _ = model(
462
+ x, axis=time_dim, state=model.default_initial_state(batch_dims=[batch_dim])
463
+ )
464
+ out_seq_level_explicit_initial_state.mark_as_output(
465
+ "out_seq_level_explicit_initial_state", shape=[batch_dim, time_dim, model.out_dim]
466
+ )
467
+
468
+ def _body(
469
+ _x: Tensor, _state: Union[rf.CausalSelfAttentionState]
470
+ ) -> Tuple[Tensor, Union[rf.CausalSelfAttentionState]]:
471
+ return model(_x, axis=single_step_dim, state=_state)
472
+
473
+ out_single_steps, _, _ = rf.scan(
474
+ spatial_dim=time_dim,
475
+ xs=x,
476
+ body=_body,
477
+ ys=Tensor("y", dims=[batch_dim, model.out_dim], dtype="float32"),
478
+ initial=model.default_initial_state(batch_dims=[batch_dim]),
479
+ )
480
+ out_single_steps.mark_as_output("out_single_steps", shape=[batch_dim, time_dim, model.out_dim])
481
+
482
+ common_opts = dict(
483
+ in_dim=in_dim,
484
+ proj_dim=Dim(5, name="out"),
485
+ key_dim_total=Dim(21 * 2, name="key-dim-total"),
486
+ value_dim_total=Dim(33, name="value-dim-total"),
487
+ num_heads=3,
488
+ )
489
+
490
+ def _make_causal_self_att(**_kwargs):
491
+ return rf.CausalSelfAttention(**common_opts)
492
+
493
+ def _make_rope_causal_self_att(**_kwargs):
494
+ return rf.RotaryPosCausalSelfAttention(**common_opts)
495
+
496
+ def _make_rel_pos_causal_self_att(**_kwargs):
497
+ return rf.RelPosCausalSelfAttention(**common_opts)
498
+
499
+ models = [_make_causal_self_att, _make_rope_causal_self_att, _make_rel_pos_causal_self_att]
500
+
501
+ for get_model in models:
502
+ print("> Testing model:", get_model.__name__)
503
+ res = run_model(
504
+ extern_data,
505
+ get_model,
506
+ _forward_step,
507
+ # TF needs TensorArray unstack, not implemented yet
508
+ test_tensorflow=False,
509
+ )
510
+
511
+ # Check that the single-step and the seq-level output are the same.
512
+ res_seq_level = res.data["out_seq_level"].raw_tensor
513
+ for key in ["out_seq_level_explicit_initial_state", "out_single_steps"]:
514
+ res_other = res.data[key].raw_tensor
515
+ assert res_seq_level.shape == res_other.shape
516
+ numpy.testing.assert_allclose(
517
+ res_other, res_seq_level, atol=1e-5, rtol=1e-5, err_msg=f"output {key} differs"
518
+ )
519
+
520
+
443
521
  def test_relative_positional_encoding():
444
522
  time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
445
523
  in_dim = Dim(8, name="in")
@@ -1,2 +0,0 @@
1
- version = '1.20241205.152736'
2
- long_version = '1.20241205.152736+git.4a762b5'