returnn 1.20250826.155029__tar.gz → 1.20250828.142552__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (476) hide show
  1. {returnn-1.20250826.155029/returnn.egg-info → returnn-1.20250828.142552}/PKG-INFO +1 -1
  2. returnn-1.20250828.142552/_setup_info_generated.py +2 -0
  3. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_cache.py +4 -2
  4. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/array_.py +72 -18
  5. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/conv.py +2 -1
  6. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/encoder/conformer.py +32 -8
  7. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/_dim_extra.py +34 -6
  8. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/basic.py +8 -6
  9. {returnn-1.20250826.155029 → returnn-1.20250828.142552/returnn.egg-info}/PKG-INFO +1 -1
  10. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/rf_utils.py +26 -5
  11. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_array.py +40 -0
  12. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_attention.py +2 -2
  13. returnn-1.20250826.155029/_setup_info_generated.py +0 -2
  14. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/.editorconfig +0 -0
  15. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/.gitignore +0 -0
  16. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/.gitmodules +0 -0
  17. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/.kateconfig +0 -0
  18. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/CHANGELOG.md +0 -0
  19. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/CODEOWNERS +0 -0
  20. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/CONTRIBUTING.md +0 -0
  21. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/LICENSE +0 -0
  22. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/MANIFEST.in +0 -0
  23. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/README.rst +0 -0
  24. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/__init__.py +0 -0
  25. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/12AX.cluster_map +0 -0
  26. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/_setup_returnn_env.py +0 -0
  27. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-fwd.config +0 -0
  28. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-horovod-mpi.py +0 -0
  29. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-horovod-mpi.py.sh +0 -0
  30. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-horovod-mpi.sh +0 -0
  31. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-hyper-param-tuning.config +0 -0
  32. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-iter-dataset.py +0 -0
  33. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-list-devices.py +0 -0
  34. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-lua-torch-layer.config +0 -0
  35. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-pretrain.config +0 -0
  36. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-record-and-push-to-webserver.py +0 -0
  37. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-returnn-as-framework.py +0 -0
  38. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-rf-pt-benchmark.py +0 -0
  39. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-rf.config +0 -0
  40. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-rhn-enwik8.config +0 -0
  41. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-sprint-interface.py +0 -0
  42. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-att-copy.config +0 -0
  43. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-attention.config +0 -0
  44. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  45. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  46. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-enc-dec.config +0 -0
  47. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-hard-att-copy.config +0 -0
  48. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-lstm-benchmark.py +0 -0
  49. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  50. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  51. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-native-lstm.12ax.config +0 -0
  52. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  53. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  54. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  55. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  56. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  57. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-rec-self-att.config +0 -0
  58. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-search-compiled-graph.py +0 -0
  59. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  60. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-timit-lstm-ctc.config +0 -0
  61. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-torch.config +0 -0
  62. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  63. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/demo.sh +0 -0
  64. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  65. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  66. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  67. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/README.txt +0 -0
  68. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/chars.txt +0 -0
  69. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/config_demo +0 -0
  70. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/config_fwd +0 -0
  71. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/config_real +0 -0
  72. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  73. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/decode.py +0 -0
  74. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  75. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/go.sh +0 -0
  76. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/lines.txt +0 -0
  77. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/split/eval.txt +0 -0
  78. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/split/train.txt +0 -0
  79. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/IAM/split/valid.txt +0 -0
  80. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/README.md +0 -0
  81. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  82. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/artificial/forwardconfig +0 -0
  83. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/artificial/go.sh +0 -0
  84. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/artificial/trainconfig +0 -0
  85. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  86. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  87. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  88. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  89. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/pyproject.toml +0 -0
  90. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/requirements.txt +0 -0
  91. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/__init__.py +0 -0
  92. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/__main__.py +0 -0
  93. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/__old_mod_loader__.py +0 -0
  94. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/__setup__.py +0 -0
  95. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/config.py +0 -0
  96. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/__init__.py +0 -0
  97. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/audio.py +0 -0
  98. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/basic.py +0 -0
  99. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/bundle_file.py +0 -0
  100. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/cached.py +0 -0
  101. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/cached2.py +0 -0
  102. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/distrib_files.py +0 -0
  103. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/generating.py +0 -0
  104. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/hdf.py +0 -0
  105. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/lm.py +0 -0
  106. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/map.py +0 -0
  107. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/meta.py +0 -0
  108. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/multi_proc.py +0 -0
  109. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/normalization_data.py +0 -0
  110. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/numpy_dump.py +0 -0
  111. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/postprocessing.py +0 -0
  112. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/raw_wav.py +0 -0
  113. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/sprint.py +0 -0
  114. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/stereo.py +0 -0
  115. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/text_dict.py +0 -0
  116. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/util/__init__.py +0 -0
  117. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/util/feature_extraction.py +0 -0
  118. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/util/strings.py +0 -0
  119. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/datasets/util/vocabulary.py +0 -0
  120. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/engine/__init__.py +0 -0
  121. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/engine/base.py +0 -0
  122. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/engine/batch.py +0 -0
  123. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/__init__.py +0 -0
  124. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/__main__.py +0 -0
  125. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  126. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  127. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  128. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  129. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  130. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  131. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  132. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  133. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  134. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  135. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  136. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  137. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  138. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  139. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  140. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  141. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  142. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  143. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  144. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  145. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  146. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  147. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  148. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  149. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  150. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/__init__.py +0 -0
  151. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/graph_editor/README.md +0 -0
  152. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/graph_editor/__init__.py +0 -0
  153. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/graph_editor/edit.py +0 -0
  154. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/graph_editor/reroute.py +0 -0
  155. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/graph_editor/select.py +0 -0
  156. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/graph_editor/subgraph.py +0 -0
  157. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/graph_editor/transform.py +0 -0
  158. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/extern/graph_editor/util.py +0 -0
  159. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/forward_iface.py +0 -0
  160. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/__init__.py +0 -0
  161. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_backend.py +0 -0
  162. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_native/__init__.py +0 -0
  163. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_native/backend.cpp +0 -0
  164. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_native/backend.hpp +0 -0
  165. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_native/module.cpp +0 -0
  166. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_native/module.hpp +0 -0
  167. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_native/py_utils.hpp +0 -0
  168. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  169. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  170. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_numpy_backend.py +0 -0
  171. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_random_journal.py +0 -0
  172. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/_utils.py +0 -0
  173. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/attention.py +0 -0
  174. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/audio/__init__.py +0 -0
  175. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/audio/mel.py +0 -0
  176. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/audio/specaugment.py +0 -0
  177. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/backend.py +0 -0
  178. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/build_from_dict.py +0 -0
  179. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/cond.py +0 -0
  180. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/const.py +0 -0
  181. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/container.py +0 -0
  182. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/control_flow_ctx.py +0 -0
  183. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/conversions/__init__.py +0 -0
  184. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  185. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/conversions/hf_llama.py +0 -0
  186. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/conversions/torch_nn.py +0 -0
  187. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/decoder/__init__.py +0 -0
  188. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/decoder/transformer.py +0 -0
  189. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/device.py +0 -0
  190. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/dims.py +0 -0
  191. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/dropout.py +0 -0
  192. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/dtype.py +0 -0
  193. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/encoder/__init__.py +0 -0
  194. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/encoder/base.py +0 -0
  195. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/encoder/conformer_v2.py +0 -0
  196. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/encoder/e_branchformer.py +0 -0
  197. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/encoder/transformer.py +0 -0
  198. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/gradient.py +0 -0
  199. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/graph.py +0 -0
  200. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/hooks.py +0 -0
  201. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/init.py +0 -0
  202. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/label_smoothing.py +0 -0
  203. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/linear.py +0 -0
  204. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/loop.py +0 -0
  205. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/loss.py +0 -0
  206. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/math_.py +0 -0
  207. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/matmul.py +0 -0
  208. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/module.py +0 -0
  209. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/nested.py +0 -0
  210. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/normalization.py +0 -0
  211. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/parameter.py +0 -0
  212. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/parametrizations.py +0 -0
  213. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/parametrize.py +0 -0
  214. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/piecewise_linear.py +0 -0
  215. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/rand.py +0 -0
  216. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/rec.py +0 -0
  217. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/reduce.py +0 -0
  218. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/run_ctx.py +0 -0
  219. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/signal.py +0 -0
  220. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/state.py +0 -0
  221. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/stepwise_scheduler.py +0 -0
  222. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/tensor_array.py +0 -0
  223. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/frontend/types.py +0 -0
  224. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/import_/__init__.py +0 -0
  225. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/import_/common.py +0 -0
  226. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/import_/git.py +0 -0
  227. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/import_/import_.py +0 -0
  228. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/learning_rate_control.py +0 -0
  229. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/log.py +0 -0
  230. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/native_op.cpp +0 -0
  231. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/native_op.py +0 -0
  232. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/pretrain.py +0 -0
  233. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/sprint/__init__.py +0 -0
  234. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/sprint/cache.py +0 -0
  235. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/sprint/control.py +0 -0
  236. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/sprint/error_signals.py +0 -0
  237. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/sprint/extern_interface.py +0 -0
  238. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/sprint/interface.py +0 -0
  239. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/README.md +0 -0
  240. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/__init__.py +0 -0
  241. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/_tensor_extra.py +0 -0
  242. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/_tensor_mixin_base.py +0 -0
  243. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/_tensor_op_overloads.py +0 -0
  244. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/control_flow_ctx.py +0 -0
  245. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/dim.py +0 -0
  246. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/marked_dim.py +0 -0
  247. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/tensor.py +0 -0
  248. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/tensor_dict.py +0 -0
  249. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tensor/utils.py +0 -0
  250. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/__init__.py +0 -0
  251. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/compat.py +0 -0
  252. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/data_pipeline.py +0 -0
  253. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/distributed.py +0 -0
  254. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/engine.py +0 -0
  255. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/README.md +0 -0
  256. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/__init__.py +0 -0
  257. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/_backend.py +0 -0
  258. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/_utils.py +0 -0
  259. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/cond.py +0 -0
  260. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  261. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  262. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/dims.py +0 -0
  263. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/layer.py +0 -0
  264. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/loop.py +0 -0
  265. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/make_layer.py +0 -0
  266. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  267. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  268. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  269. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_low_level/__init__.py +0 -0
  270. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/frontend_low_level/_backend.py +0 -0
  271. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/horovod.py +0 -0
  272. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/hyper_param_tuning.py +0 -0
  273. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/layers/__init__.py +0 -0
  274. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/layers/base.py +0 -0
  275. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/layers/basic.py +0 -0
  276. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/layers/rec.py +0 -0
  277. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/layers/segmental_model.py +0 -0
  278. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/layers/signal_processing.py +0 -0
  279. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/layers/variable.py +0 -0
  280. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/native_op.py +0 -0
  281. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/network.py +0 -0
  282. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/sprint.py +0 -0
  283. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/updater.py +0 -0
  284. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/util/__init__.py +0 -0
  285. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/util/basic.py +0 -0
  286. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/util/data.py +0 -0
  287. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/util/gradient_checkpoint.py +0 -0
  288. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/util/ken_lm.py +0 -0
  289. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/tf/util/open_fst.py +0 -0
  290. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/README.md +0 -0
  291. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/__init__.py +0 -0
  292. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/data/__init__.py +0 -0
  293. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/data/extern_data.py +0 -0
  294. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/data/pipeline.py +0 -0
  295. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/data/queued_data_iter.py +0 -0
  296. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  297. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/data/tensor_utils.py +0 -0
  298. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/distributed.py +0 -0
  299. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/engine.py +0 -0
  300. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/frontend/__init__.py +0 -0
  301. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/frontend/_backend.py +0 -0
  302. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/frontend/_rand.py +0 -0
  303. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/frontend/bridge.py +0 -0
  304. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/frontend/raw_ops.py +0 -0
  305. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/optim/README.md +0 -0
  306. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/optim/__init__.py +0 -0
  307. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/optim/lion.py +0 -0
  308. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/updater.py +0 -0
  309. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/README.md +0 -0
  310. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/__init__.py +0 -0
  311. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/array_.py +0 -0
  312. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/debug_inf_nan.py +0 -0
  313. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/diagnose_gpu.py +0 -0
  314. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/exception_helper.py +0 -0
  315. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/gradient_checkpoint.py +0 -0
  316. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/module.py +0 -0
  317. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/torch/util/scaled_gradient.py +0 -0
  318. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/__init__.py +0 -0
  319. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/better_exchook.py +0 -0
  320. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/bpe.py +0 -0
  321. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/debug.py +0 -0
  322. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/debug_helpers.py +0 -0
  323. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/file_cache.py +0 -0
  324. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/fsa.py +0 -0
  325. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/literal_py_to_pickle.py +0 -0
  326. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/lru_cache.py +0 -0
  327. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/math.py +0 -0
  328. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  329. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/native_code_compiler.py +0 -0
  330. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/pprint.py +0 -0
  331. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/py-to-pickle.cpp +0 -0
  332. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/py_ext_mod_compiler.py +0 -0
  333. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/result_with_reason.py +0 -0
  334. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/sig_proc.py +0 -0
  335. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/task_system.py +0 -0
  336. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/train_proc_manager.py +0 -0
  337. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn/util/watch_memory.py +0 -0
  338. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn.egg-info/SOURCES.txt +0 -0
  339. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn.egg-info/dependency_links.txt +0 -0
  340. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn.egg-info/requires.txt +0 -0
  341. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/returnn.egg-info/top_level.txt +0 -0
  342. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/rnn.py +0 -0
  343. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/setup.cfg +0 -0
  344. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/setup.py +0 -0
  345. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/DummySprintExec.py +0 -0
  346. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm-inspection-profile.xml +0 -0
  347. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/.gitignore +0 -0
  348. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/.name +0 -0
  349. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  350. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  351. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  352. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  353. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  354. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/misc.xml +0 -0
  355. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/modules.xml +0 -0
  356. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/returnn.iml +0 -0
  357. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  358. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/_set_num_threads1.py +0 -0
  359. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/_setup_returnn_env.py +0 -0
  360. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/_setup_test_env.py +0 -0
  361. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/bpe-unicode-demo.codes +0 -0
  362. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/bpe-unicode-demo.vocab +0 -0
  363. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/lexicon_opt.fst +0 -0
  364. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/lexicon_opt.isyms +0 -0
  365. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/lexicon_opt.jpg +0 -0
  366. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/lexicon_opt.osyms +0 -0
  367. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/lint_common.py +0 -0
  368. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/pycharm-inspect.py +0 -0
  369. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/pylint.py +0 -0
  370. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/returnn-as-framework.py +0 -0
  371. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/spelling.dic +0 -0
  372. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_Config.py +0 -0
  373. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_Dataset.py +0 -0
  374. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_Fsa.py +0 -0
  375. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_GeneratingDataset.py +0 -0
  376. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_HDFDataset.py +0 -0
  377. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_LearningRateControl.py +0 -0
  378. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_Log.py +0 -0
  379. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_MultiProcDataset.py +0 -0
  380. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_Pretrain.py +0 -0
  381. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_ResNet.py +0 -0
  382. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_SprintDataset.py +0 -0
  383. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_SprintInterface.py +0 -0
  384. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TFEngine.py +0 -0
  385. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TFNativeOp.py +0 -0
  386. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TFNetworkLayer.py +0 -0
  387. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TFNetworkRecLayer.py +0 -0
  388. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TFNetworkSigProcLayer.py +0 -0
  389. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TFUpdater.py +0 -0
  390. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TFUtil.py +0 -0
  391. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TF_determinism.py +0 -0
  392. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TaskSystem.py +0 -0
  393. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TaskSystem_SharedMem.py +0 -0
  394. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_TranslationDataset.py +0 -0
  395. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_Util.py +0 -0
  396. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_demos.py +0 -0
  397. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_fork_exec.py +0 -0
  398. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_hdf_dump.py +0 -0
  399. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_base.py +0 -0
  400. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_cond.py +0 -0
  401. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_const.py +0 -0
  402. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_container.py +0 -0
  403. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_conv.py +0 -0
  404. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_decoder_transformer.py +0 -0
  405. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_encoder_conformer.py +0 -0
  406. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_gradient.py +0 -0
  407. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_label_smoothing.py +0 -0
  408. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_loop.py +0 -0
  409. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_math.py +0 -0
  410. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_normalization.py +0 -0
  411. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_piecewise_linear.py +0 -0
  412. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_rec.py +0 -0
  413. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_reduce.py +0 -0
  414. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_rf_signal.py +0 -0
  415. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_tensor.py +0 -0
  416. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_threading.py +0 -0
  417. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_tools.py +0 -0
  418. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_torch_dataset.py +0 -0
  419. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_torch_engine.py +0 -0
  420. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_torch_frontend.py +0 -0
  421. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_torch_internal_frontend.py +0 -0
  422. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/test_torch_util.py +0 -0
  423. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tests/torch_utils.py +0 -0
  424. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/_setup_returnn_env.py +0 -0
  425. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/analyze-dataset-batches.py +0 -0
  426. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/bliss-collect-seq-lens.py +0 -0
  427. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/bliss-dump-text.py +0 -0
  428. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/bliss-get-segment-names.py +0 -0
  429. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/bliss-to-ogg-zip.py +0 -0
  430. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/bpe-create-lexicon.py +0 -0
  431. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/calculate-word-error-rate.py +0 -0
  432. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/cleanup-old-models.py +0 -0
  433. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/collect-orth-symbols.py +0 -0
  434. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/collect-words.py +0 -0
  435. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/compile_native_op.py +0 -0
  436. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/compile_tf_graph.py +0 -0
  437. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/debug-dump-search-scores.py +0 -0
  438. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/debug-plot-search-scores.py +0 -0
  439. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/dump-dataset-raw-strings.py +0 -0
  440. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/dump-dataset.py +0 -0
  441. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/dump-forward-stats.py +0 -0
  442. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/dump-forward.py +0 -0
  443. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/dump-network-json.py +0 -0
  444. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/dump-pickle.py +0 -0
  445. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/extract_state_tying_from_dataset.py +0 -0
  446. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/get-attention-weights.py +0 -0
  447. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/get-best-model-epoch.py +0 -0
  448. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/hdf_dump.py +0 -0
  449. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/hdf_dump_translation_dataset.py +0 -0
  450. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/import-blocks-mt-model.py +0 -0
  451. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/import-t2t-mt-model.py +0 -0
  452. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/.gitignore +0 -0
  453. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/Makefile +0 -0
  454. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/README.md +0 -0
  455. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/example/README.md +0 -0
  456. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/example/libs_list +0 -0
  457. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  458. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  459. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  460. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/example/state_vars_list +0 -0
  461. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  462. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/file.h +0 -0
  463. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  464. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  465. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/main.cc +0 -0
  466. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/rescorer.h +0 -0
  467. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/vocabulary.cc +0 -0
  468. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/lattice_rescorer/vocabulary.h +0 -0
  469. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/tf_avg_checkpoints.py +0 -0
  470. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/tf_inspect_checkpoint.py +0 -0
  471. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/tf_inspect_summary_log.py +0 -0
  472. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/torch_avg_checkpoints.py +0 -0
  473. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/torch_export_to_onnx.py +0 -0
  474. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/torch_inspect_checkpoint.py +0 -0
  475. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
  476. {returnn-1.20250826.155029 → returnn-1.20250828.142552}/tools/torch_scale_tuning.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250826.155029
3
+ Version: 1.20250828.142552
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20250828.142552'
2
+ long_version = '1.20250828.142552+git.f81cb9a'
@@ -6,7 +6,7 @@ One use case example is :func:`sinusoidal_positional_encoding` and :func:`relati
6
6
  """
7
7
 
8
8
  from __future__ import annotations
9
- from typing import Optional, Union, Any, Type, Callable, Tuple, Dict
9
+ from typing import Optional, Union, Any, Type, Callable, Tuple, Dict, List
10
10
  from weakref import ref
11
11
  import tree
12
12
  from returnn.util.lru_cache import lru_cache
@@ -59,6 +59,8 @@ class Cache:
59
59
  if isinstance(key_item_orig, DimWrapper):
60
60
  assert isinstance(key_item, DimWrapper)
61
61
  dim_orig = key_item_orig.dim_ref()
62
+ if dim_orig is None: # orig dim could be dead. but then it would not be used anyway
63
+ continue
62
64
  dim = key_item.dim_ref()
63
65
  assert isinstance(dim_orig, Dim) and isinstance(dim, Dim)
64
66
  dim_map[dim_orig] = dim
@@ -103,7 +105,7 @@ def _transform_key(
103
105
  key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Optional[Dict[Dim, DimWrapper]] = None
104
106
  ) -> Tuple[Union[Type[Backend], ref[rf.RunCtx], _KeyItemType], ...]:
105
107
  backend = _get_backend(key)
106
- keys_flat = [backend]
108
+ keys_flat: List[Any] = [backend]
107
109
  if not backend.executing_eagerly():
108
110
  # See comment above: If graph-mode, the cached value becomes invalid
109
111
  # when the current run ctx goes out of scope.
@@ -188,22 +188,18 @@ def merge_dims(
188
188
  return source, dims[0]
189
189
  return rf.replace_dim(source, in_dim=dims[0], out_dim=out_dim)
190
190
  if out_dim is None:
191
- out_dim = dims[0]
192
- reset_dyn_size = False
193
- for d in dims[1:]:
194
- reset_dyn_size |= d.need_masking() and out_dim.capacity != 1
195
- out_dim = out_dim * d
196
- if reset_dyn_size:
191
+ from returnn.util.basic import prod
192
+
193
+ if any(d.need_masking() for d in dims[1:]):
197
194
  # The dynamic sizes as calculated via dim math would not correctly describe how the tensor looks like.
198
195
  # This would then potentially discard some of the data in the tensor in subsequent operations,
199
196
  # when masking is applied.
200
197
  # Thus, discard the dynamic sizes, and just treat it as a flat dim with scalar dynamic size.
201
198
  # https://github.com/rwth-i6/returnn/issues/1694
202
- out_dim_size = dims[0].get_dim_value_tensor()
203
- for d in dims[1:]:
204
- out_dim_size *= d.get_dim_value_tensor()
205
- assert isinstance(out_dim_size, Tensor) and out_dim_size.dims == () # scalar
206
- out_dim.dyn_size_ext = out_dim_size
199
+ # See also similar logic in :func:`concat`.
200
+ out_dim = Dim(prod(d.get_dim_value_tensor() for d in dims), name="merged")
201
+ else:
202
+ out_dim = prod(dims)
207
203
  # noinspection PyProtectedMember
208
204
  return source._raw_backend.merge_dims(source, dims=dims, out_dim=out_dim), out_dim
209
205
 
@@ -427,13 +423,40 @@ def concat(
427
423
  dims = sources[0][0].dims_set - {sources[0][1]}
428
424
  for src, dim in sources:
429
425
  assert src.dims_set - {dim} == dims, f"concat {sources}, need allow_broadcast=True"
426
+ need_handle_dynamic_dims = False
427
+ for src, dim in sources[:-1]:
428
+ if dim.need_masking():
429
+ need_handle_dynamic_dims = True
430
+ if handle_dynamic_dims is None:
431
+ handle_dynamic_dims = need_handle_dynamic_dims
430
432
  if not out_dim:
431
- out_dim = sum(d for _, d in sources)
432
- if handle_dynamic_dims is None or handle_dynamic_dims:
433
- for src, dim in sources[:-1]:
434
- assert dim.is_static(), f"concat {sources}, dim {dim} is not static, not yet implemented..."
435
- # noinspection PyProtectedMember
436
- return sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim), out_dim
433
+ if handle_dynamic_dims or not need_handle_dynamic_dims:
434
+ out_dim = sum(d for _, d in sources)
435
+ else: # not handle_dynamic_dims but need_handle_dynamic_dims
436
+ # There are dynamic dims, but we don't want to handle them.
437
+ # So, summing the dims would be incorrect.
438
+ # Just add the dim values.
439
+ out_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources if d.dimension is not None), name="concat")
440
+ if handle_dynamic_dims:
441
+ out_non_masked_dim = Dim(sum(d.get_dim_value_tensor() for _, d in sources))
442
+ # noinspection PyProtectedMember
443
+ out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_non_masked_dim)
444
+ masks = []
445
+ for _, dim in sources:
446
+ masks.append(
447
+ dim.get_mask(dim_order=(dim,) + dim.dyn_size_ext.dims, device=out.device)
448
+ if dim.need_masking()
449
+ else rf.constant(True, dims=[dim], device=out.device)
450
+ )
451
+ # noinspection PyProtectedMember
452
+ mask_concat = sources[0][0]._raw_backend.concat(
453
+ *[(mask, dim) for (_, dim), mask in zip(sources, masks)], allow_broadcast=True, out_dim=out_non_masked_dim
454
+ )
455
+ out, _ = rf.masked_select(out, mask=mask_concat, dims=[out_non_masked_dim], out_dim=out_dim)
456
+ else:
457
+ # noinspection PyProtectedMember
458
+ out = sources[0][0]._raw_backend.concat(*sources, allow_broadcast=allow_broadcast, out_dim=out_dim)
459
+ return out, out_dim
437
460
 
438
461
 
439
462
  def concat_features(*sources: Tensor, allow_broadcast=False) -> Tensor:
@@ -478,7 +501,12 @@ def pad(
478
501
  if handle_dynamic_dims is None:
479
502
  handle_dynamic_dims = _pad_handle_dynamic_dims_default(axes, padding, mode=mode)
480
503
  if not out_dims:
481
- out_dims = [left + middle + right for middle, (left, right) in zip(axes, padding)]
504
+ out_dims = [
505
+ (left + middle + right)
506
+ if handle_dynamic_dims or not _pad_need_dyn_dim_handling(middle, left, right, mode=mode)
507
+ else _pad_sum_dims_no_dyn_dim_handling(middle, left, right)
508
+ for middle, (left, right) in zip(axes, padding)
509
+ ]
482
510
  # noinspection PyProtectedMember
483
511
  return (
484
512
  source._raw_backend.pad(
@@ -544,6 +572,32 @@ def _pad_need_dyn_dim_handling(
544
572
  return True
545
573
 
546
574
 
575
+ def _pad_sum_dims_no_dyn_dim_handling(
576
+ middle: Dim, left: Union[Dim, int, Tensor], right: Union[Dim, int, Tensor]
577
+ ) -> Dim:
578
+ """
579
+ This gets called when we need to handle dyn dims, but handle_dynamic_dims=False.
580
+ See also the same logic in :func:`concat`.
581
+ """
582
+ if isinstance(left, Dim):
583
+ left = left.get_dim_value_tensor()
584
+ elif isinstance(left, int):
585
+ pass
586
+ elif isinstance(left, Tensor):
587
+ assert left.dims == () # scalar
588
+ else:
589
+ raise TypeError(f"invalid left pad {left}")
590
+ if isinstance(right, Dim):
591
+ right = right.get_dim_value_tensor()
592
+ elif isinstance(right, int):
593
+ pass
594
+ elif isinstance(right, Tensor):
595
+ assert right.dims == () # scalar
596
+ else:
597
+ raise TypeError(f"invalid right pad {right}")
598
+ return Dim(left + middle.get_dim_value_tensor() + right, name="pad")
599
+
600
+
547
601
  def cum_concat_step(
548
602
  source: Tensor, *, prev_accum: Tensor, axis: Dim, out_spatial_dim: Optional[Dim] = None
549
603
  ) -> Tuple[Tensor, Dim]:
@@ -862,8 +862,9 @@ def _consistent_same_padding(
862
862
  pad_right = (s - 1) * d - pad_left
863
863
  paddings.append((pad_left, pad_right))
864
864
  # We expect that masking was already done before (or we don't care about it), thus handle_dynamic_dims=False.
865
+ out_dims = [(left + middle + right) for middle, (left, right) in zip(in_spatial_dims, paddings)]
865
866
  source, in_spatial_dims = rf.pad(
866
- source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False
867
+ source, axes=in_spatial_dims, padding=paddings, value=pad_value, handle_dynamic_dims=False, out_dims=out_dims
867
868
  )
868
869
  return source, in_spatial_dims, 0
869
870
 
@@ -8,6 +8,8 @@ https://github.com/rwth-i6/returnn_common/issues/233
8
8
 
9
9
  from __future__ import annotations
10
10
  from typing import Optional, Union, Any, Tuple, List, Dict, Callable
11
+ from types import FunctionType
12
+ import functools
11
13
  import copy as _copy
12
14
  from returnn.tensor import Tensor, Dim
13
15
  import returnn.frontend as rf
@@ -298,7 +300,8 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
298
300
  *,
299
301
  num_layers: int,
300
302
  input_layer: Optional[Union[ConformerConvSubsample, ISeqDownsamplingEncoder, rf.Module, Any]],
301
- input_embedding_scale: float = 1.0,
303
+ input_embedding_scale: Optional[float] = None,
304
+ pos_enc: Union[None, Callable, Dict[str, Any], rf.Module] = None,
302
305
  input_dropout: float = 0.1,
303
306
  ff_dim: Dim = NotSpecified,
304
307
  ff_activation: Union[Callable[[Tensor], Tensor], Dict[str, Any], rf.Module] = NotSpecified,
@@ -317,8 +320,17 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
317
320
  :param num_layers: the number of encoder layers
318
321
  :param input_layer: input/frontend/prenet with potential subsampling.
319
322
  (x, in_spatial_dim) -> (y, out_spatial_dim)
320
- :param input_embedding_scale: applied after input_layer. 1.0 by default for historic reasons.
321
- In std Transformer, also ESPnet E-Branchformer and Conformer, this is sqrt(out_dim).
323
+ :param input_embedding_scale: applied after input_layer.
324
+ 1.0 by default for historic reasons if pos_enc is None,
325
+ else sqrt(out_dim) by default.
326
+ In std Transformer, also ESPnet E-Branchformer and Conformer, this is sqrt(out_dim),
327
+ which is relevant when you add positional encoding.
328
+ :param pos_enc: positional encoding, applied after input_embedding_scale.
329
+ None (no positional encoding) by default, unlike standard Transformer.
330
+ E.g. :func:`rf.sinusoidal_positional_encoding` for absolute pos enc.
331
+ Note, relative positional encoding is usually part of the attention layer,
332
+ e.g. :class:`rf.RelPosSelfAttention`,
333
+ and nothing needs to be set here.
322
334
  :param input_dropout: applied after input_projection(input_layer(x))
323
335
  :param ff_dim: the dimension of feed-forward layers. 2048 originally, or 4 times out_dim
324
336
  :param ff_activation: activation function for feed-forward network
@@ -352,12 +364,22 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
352
364
  else:
353
365
  raise TypeError(f"unexpected input_layer {input_layer!r}")
354
366
  self.input_layer = input_layer
355
- self.input_projection = (
356
- rf.Linear(self.input_layer.out_dim if self.input_layer else self.in_dim, self.out_dim, with_bias=False)
357
- if input_layer
358
- else None
359
- )
367
+ in_dim = self.input_layer.out_dim if self.input_layer else self.in_dim
368
+ self.input_projection = rf.Linear(in_dim, self.out_dim, with_bias=False) if in_dim != self.out_dim else None
369
+ if input_embedding_scale is None:
370
+ input_embedding_scale = (self.out_dim.dimension**0.5) if pos_enc is not None else 1.0
360
371
  self.input_embedding_scale = input_embedding_scale
372
+ if pos_enc is None:
373
+ pass
374
+ elif isinstance(pos_enc, dict):
375
+ pos_enc = rf.build_from_dict(pos_enc, feat_dim=self.out_dim)
376
+ elif isinstance(pos_enc, rf.Module):
377
+ pass
378
+ elif isinstance(pos_enc, FunctionType):
379
+ pos_enc = functools.partial(pos_enc, feat_dim=self.out_dim)
380
+ else:
381
+ raise TypeError(f"unexpected pos_enc type {pos_enc!r}")
382
+ self.pos_enc = pos_enc
361
383
  self.input_dropout = input_dropout
362
384
 
363
385
  if not encoder_layer or isinstance(encoder_layer, (dict, type)):
@@ -411,6 +433,8 @@ class ConformerEncoder(ISeqDownsamplingEncoder):
411
433
  x = self.input_projection(x_subsample) if self.input_projection else x_subsample
412
434
  if self.input_embedding_scale != 1.0:
413
435
  x = x * self.input_embedding_scale
436
+ if self.pos_enc is not None:
437
+ x = x + self.pos_enc(spatial_dim=out_spatial_dim)
414
438
  x = rf.dropout(x, self.input_dropout, axis=self.dropout_broadcast and self.out_dim)
415
439
  x = self.layers(x, spatial_dim=out_spatial_dim, collected_outputs=collected_outputs)
416
440
  return x, out_spatial_dim
@@ -1264,7 +1264,6 @@ class _DimMixin:
1264
1264
  raise TypeError(f"complete_dyn_size: _relu: unexpected type {type(a)}")
1265
1265
 
1266
1266
  y: Optional[_t.Tensor] = None # resulting dyn size
1267
- y_max_value: Optional[_t.Tensor] = None # resulting dyn size max value
1268
1267
  inputs = list(op.inputs)
1269
1268
  assert inputs
1270
1269
  for x_dim in inputs:
@@ -1275,8 +1274,6 @@ class _DimMixin:
1275
1274
  if x_dim.dyn_size_ext is None and x_dim.dimension is None:
1276
1275
  return
1277
1276
  y = _bin_op(y, x_dim.dimension if x_dim.dimension is not None else x_dim.dyn_size_ext)
1278
- if not template_only and y.raw_tensor is not None:
1279
- y_max_value = _bin_op(y_max_value, x_dim.get_dim_value_tensor())
1280
1277
  assert y is not None, f"op {op}?"
1281
1278
  if self.dyn_size_ext is not None:
1282
1279
  assert self.dyn_size_ext.dim_tags == y.dim_tags
@@ -1286,9 +1283,14 @@ class _DimMixin:
1286
1283
  else:
1287
1284
  self.batch = y.batch
1288
1285
  self.dyn_size_ext = y
1289
- if not template_only and y_max_value is not None:
1290
- assert y_max_value is not None and y_max_value.raw_tensor is not None
1291
- self._dyn_size_max_value = y_max_value
1286
+ if not template_only and y.raw_tensor is not None:
1287
+ # Note: Earlier, we had this wrong.
1288
+ # It is not correct to replicate the same math (bin ops)
1289
+ # on the dim values (_dyn_size_max_value of each dim).
1290
+ # Consider sizes1=[2,3], sizes2=[5,4], and the op is "add".
1291
+ # Then the result sizes would be [7,7], thus its max is 7,
1292
+ # but max(sizes1)+max(sizes2)=3+5=8.
1293
+ self._dyn_size_max_value = rf.reduce_max(y, axis=y.dims) if y.dims else y
1292
1294
  if tf and y.placeholder is not None:
1293
1295
  self.set_tag_on_size_tensor(y.placeholder)
1294
1296
 
@@ -2080,6 +2082,8 @@ class _DimMixin:
2080
2082
  :return: self + other. note that this is not commutative, i.e. different from other + self.
2081
2083
  :rtype: Dim
2082
2084
  """
2085
+ if isinstance(other, int) and other == 0:
2086
+ return self
2083
2087
  cache_key = ("add", other)
2084
2088
  cache = self.get_same_base()._make_extra().cache_dim_math
2085
2089
  cache_entry = cache.get(cache_key, None)
@@ -2098,6 +2102,8 @@ class _DimMixin:
2098
2102
  :return: other + self
2099
2103
  :rtype: Dim
2100
2104
  """
2105
+ if isinstance(other, int) and other == 0:
2106
+ return self
2101
2107
  cache_key = ("add_left", other)
2102
2108
  cache = self.get_same_base()._make_extra().cache_dim_math
2103
2109
  cache_entry = cache.get(cache_key, None)
@@ -2115,6 +2121,8 @@ class _DimMixin:
2115
2121
  :param Dim|int other:
2116
2122
  :rtype: Dim
2117
2123
  """
2124
+ if isinstance(other, int) and other == 0:
2125
+ return self
2118
2126
  return self.sub_right(other)
2119
2127
 
2120
2128
  def sub_right(self: Dim, other):
@@ -2123,6 +2131,8 @@ class _DimMixin:
2123
2131
  :return: self - other
2124
2132
  :rtype: Dim
2125
2133
  """
2134
+ if isinstance(other, int) and other == 0:
2135
+ return self
2126
2136
  cache_key = ("sub", other)
2127
2137
  cache = self.get_same_base()._make_extra().cache_dim_math
2128
2138
  cache_entry = cache.get(cache_key, None)
@@ -2141,6 +2151,8 @@ class _DimMixin:
2141
2151
  :return: (-other) + self
2142
2152
  :rtype: Dim
2143
2153
  """
2154
+ if isinstance(other, int) and other == 0:
2155
+ return self
2144
2156
  cache_key = ("sub_left", other)
2145
2157
  cache = self.get_same_base()._make_extra().cache_dim_math
2146
2158
  cache_entry = cache.get(cache_key, None)
@@ -2158,6 +2170,8 @@ class _DimMixin:
2158
2170
  :param Dim|int other:
2159
2171
  :rtype: Dim
2160
2172
  """
2173
+ if isinstance(other, int) and other == 1:
2174
+ return self
2161
2175
  cache_key = ("mul", other)
2162
2176
  cache = self.get_same_base()._make_extra().cache_dim_math
2163
2177
  cache_entry = cache.get(cache_key, None)
@@ -2175,6 +2189,8 @@ class _DimMixin:
2175
2189
  :param Dim|int other:
2176
2190
  :rtype: Dim
2177
2191
  """
2192
+ if isinstance(other, int) and other == 1:
2193
+ return self
2178
2194
  cache_key = ("mul_left", other)
2179
2195
  cache = self.get_same_base()._make_extra().cache_dim_math
2180
2196
  cache_entry = cache.get(cache_key, None)
@@ -2192,6 +2208,8 @@ class _DimMixin:
2192
2208
  :param Dim|int other:
2193
2209
  :rtype: Dim
2194
2210
  """
2211
+ if isinstance(other, int) and other == 1:
2212
+ return self
2195
2213
  cache_key = ("floordiv", other)
2196
2214
  cache = self.get_same_base()._make_extra().cache_dim_math
2197
2215
  cache_entry = cache.get(cache_key, None)
@@ -2209,6 +2227,8 @@ class _DimMixin:
2209
2227
  :param Dim|int other:
2210
2228
  :rtype: Dim
2211
2229
  """
2230
+ if isinstance(other, int) and other == 1:
2231
+ return self
2212
2232
  return self.div_right(other)
2213
2233
 
2214
2234
  def div_left(self: Dim, other):
@@ -2216,6 +2236,8 @@ class _DimMixin:
2216
2236
  :param Dim|int other:
2217
2237
  :rtype: Dim
2218
2238
  """
2239
+ if isinstance(other, int) and other == 1:
2240
+ return self
2219
2241
  cache_key = ("truediv_left", other)
2220
2242
  cache = self.get_same_base()._make_extra().cache_dim_math
2221
2243
  cache_entry = cache.get(cache_key, None)
@@ -2233,6 +2255,8 @@ class _DimMixin:
2233
2255
  :param Dim|int other:
2234
2256
  :rtype: Dim
2235
2257
  """
2258
+ if isinstance(other, int) and other == 1:
2259
+ return self
2236
2260
  cache_key = ("truediv", other)
2237
2261
  cache = self.get_same_base()._make_extra().cache_dim_math
2238
2262
  cache_entry = cache.get(cache_key, None)
@@ -2250,6 +2274,8 @@ class _DimMixin:
2250
2274
  :param Dim|int other:
2251
2275
  :rtype: Dim
2252
2276
  """
2277
+ if isinstance(other, int) and other == 1:
2278
+ return self
2253
2279
  cache_key = ("ceildiv_left", other)
2254
2280
  cache = self.get_same_base()._make_extra().cache_dim_math
2255
2281
  cache_entry = cache.get(cache_key, None)
@@ -2267,6 +2293,8 @@ class _DimMixin:
2267
2293
  :param Dim|int other:
2268
2294
  :rtype: Dim
2269
2295
  """
2296
+ if isinstance(other, int) and other == 1:
2297
+ return self
2270
2298
  cache_key = ("ceildiv", other)
2271
2299
  cache = self.get_same_base()._make_extra().cache_dim_math
2272
2300
  cache_entry = cache.get(cache_key, None)
@@ -1693,15 +1693,17 @@ def inplace_increment(x: numpy.ndarray, idx: numpy.ndarray, y: Union[numpy.ndarr
1693
1693
  raise NotImplementedError("This feature was removed with dropped Theano support")
1694
1694
 
1695
1695
 
1696
- def prod(ls):
1696
+ def prod(ls: Union[Iterable[T], numpy.ndarray]) -> Union[int, T, float]:
1697
1697
  """
1698
- :param list[T]|tuple[T]|numpy.ndarray ls:
1699
- :rtype: T|int|float
1698
+ :param ls:
1699
+ :return: ls[0] * ls[1] * ...
1700
1700
  """
1701
- if len(ls) == 0:
1701
+ it = iter(ls)
1702
+ try:
1703
+ x = next(it)
1704
+ except StopIteration:
1702
1705
  return 1
1703
- x = ls[0]
1704
- for y in ls[1:]:
1706
+ for y in it:
1705
1707
  x = x * y # *= doesn't work because x might be a tensor, and for e.g. torch.Tensor this op is in-place
1706
1708
  return x
1707
1709
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250826.155029
3
+ Version: 1.20250828.142552
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -48,6 +48,26 @@ def tf_scope():
48
48
  yield session
49
49
 
50
50
 
51
+ class RunModelException(Exception):
52
+ """run model exception"""
53
+
54
+
55
+ class NonFiniteValuesException(RunModelException):
56
+ """non-finite values exception"""
57
+
58
+
59
+ class CompareResultsMismatchException(RunModelException):
60
+ """compare results exception"""
61
+
62
+
63
+ class CompareResultsMismatchTfVsPtException(CompareResultsMismatchException):
64
+ """compare results TF vs PT exception"""
65
+
66
+
67
+ class CompareResultsMismatchSingleVsMultiBatchException(CompareResultsMismatchException):
68
+ """compare results single vs multi batch exception"""
69
+
70
+
51
71
  def run_model(
52
72
  extern_data: TensorDict,
53
73
  get_model: rf.GetModelFunc,
@@ -85,7 +105,7 @@ def run_model(
85
105
  lambda: (_run_model_torch(extern_data, get_model, forward_step), None)[-1],
86
106
  stop_reporting_after_first_inf_nan=False,
87
107
  )
88
- raise Exception(f"Non-finite values in output: {non_finite_outputs}. See log above.")
108
+ raise NonFiniteValuesException(f"Non-finite values in output: {non_finite_outputs}. See log above.")
89
109
 
90
110
  if test_single_batch_entry and batch_dim in extern_data_dims:
91
111
  dyn_dims = [
@@ -146,7 +166,7 @@ def run_model(
146
166
  if not numpy.allclose(v_pt, v_tf, atol=1e-5, rtol=1e-5):
147
167
  print(f" PT:\n{v_pt}")
148
168
  print(f" TF:\n{v_tf}")
149
- raise Exception(f"output {k!r} differs")
169
+ raise CompareResultsMismatchTfVsPtException(f"output {k!r} differs")
150
170
  return out_pt
151
171
 
152
172
 
@@ -300,9 +320,10 @@ def _run_model_torch_single_batch(
300
320
  # Slice the raw ref output to be able to match it to the raw single output.
301
321
  ref_output_raw = ref_output_.raw_tensor[_get_slices(output_)]
302
322
  single_output_raw = output_.raw_tensor
303
- numpy.testing.assert_allclose(
304
- ref_output_raw, single_output_raw, atol=1e-5, rtol=1e-5, err_msg=f"output {key!r} differs"
305
- )
323
+ if not numpy.allclose(ref_output_raw, single_output_raw, atol=1e-5, rtol=1e-5):
324
+ print(f" Batched:\n{ref_output_raw}")
325
+ print(f" Single:\n{single_output_raw}")
326
+ raise CompareResultsMismatchSingleVsMultiBatchException(f"output {key!r} differs")
306
327
 
307
328
  # Recover original data.
308
329
  extern_data.reset_content()
@@ -411,6 +411,46 @@ def test_concat():
411
411
  run_model(extern_data, lambda *, epoch, step: _Net(), _forward_step)
412
412
 
413
413
 
414
+ def test_concat_partly_dyn_dim():
415
+ time_static_dim = Dim(5, name="time_static")
416
+ time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
417
+ in_dim = Dim(7, name="in")
418
+ extern_data = TensorDict(
419
+ {
420
+ "left": Tensor("left", [batch_dim, time_static_dim, in_dim], dtype="float32"),
421
+ "right": Tensor("right", [batch_dim, time_dim, in_dim], dtype="float32"),
422
+ }
423
+ )
424
+
425
+ # noinspection PyShadowingNames
426
+ def _forward_step(*, extern_data: TensorDict, **_kwargs):
427
+ left, right = extern_data["left"], extern_data["right"]
428
+ out, out_time_dim = rf.concat((left, time_static_dim), (right, time_dim))
429
+ out.mark_as_default_output(shape=(batch_dim, out_time_dim, in_dim))
430
+
431
+ run_model(extern_data, lambda **_: rf.Module(), _forward_step)
432
+
433
+
434
+ def test_concat_dyn_time():
435
+ time1_dim = Dim(Tensor("time1", [batch_dim], dtype="int32"))
436
+ time2_dim = Dim(Tensor("time2", [batch_dim], dtype="int32"))
437
+ extern_data = TensorDict(
438
+ {
439
+ "left": Tensor("left", [batch_dim, time1_dim], dtype="float32"),
440
+ "right": Tensor("right", [batch_dim, time2_dim], dtype="float32"),
441
+ }
442
+ )
443
+
444
+ # noinspection PyShadowingNames
445
+ def _forward_step(*, extern_data: TensorDict, **_kwargs):
446
+ left, right = extern_data["left"], extern_data["right"]
447
+ out, out_time_dim = rf.concat((left, time1_dim), (right, time2_dim))
448
+ out.mark_as_default_output(shape=(batch_dim, out_time_dim))
449
+
450
+ # test_single_batch_entry should test the interesting case.
451
+ run_model(extern_data, lambda **_: rf.Module(), _forward_step, test_tensorflow=False)
452
+
453
+
414
454
  def test_pad():
415
455
  time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
416
456
  in_dim = Dim(7, name="in")
@@ -38,7 +38,7 @@ def test_dot_attention():
38
38
 
39
39
  class _Net(rf.Module):
40
40
  def __call__(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
41
- kv_axis = Dim(None, name=f"kv-axis")
41
+ kv_axis = Dim(None, name="kv-axis")
42
42
  k, _ = rf.replace_dim(k, in_dim=time_dim, out_dim=kv_axis)
43
43
  v, _ = rf.replace_dim(v, in_dim=time_dim, out_dim=kv_axis)
44
44
  return rf.dot_attention(q, k, v, axis=kv_axis, key_dim=key_dim)
@@ -604,7 +604,7 @@ def test_rel_pos_self_attention():
604
604
  x_b = rf.gather(x, axis=batch_dim, indices=b)
605
605
  assert batch_dim in axis.dyn_size_ext.dims # current assumption...
606
606
  seq_len = rf.gather(axis.dyn_size_ext, axis=batch_dim, indices=b)
607
- axis_b = Dim(seq_len)
607
+ axis_b = Dim(seq_len, name=f"time_b{b}")
608
608
  # Note: The current order (replace_dim and then slice) is somewhat dependent
609
609
  # on the current internal behavior of gather and replace_dim,
610
610
  # which might change at some point...
@@ -1,2 +0,0 @@
1
- version = '1.20250826.155029'
2
- long_version = '1.20250826.155029+git.cca4212'