returnn 1.20250125.618__tar.gz → 1.20250131.151606__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (475) hide show
  1. {returnn-1.20250125.618/returnn.egg-info → returnn-1.20250131.151606}/PKG-INFO +1 -1
  2. returnn-1.20250131.151606/_setup_info_generated.py +2 -0
  3. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/postprocessing.py +1 -1
  4. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/frontend/_backend.py +2 -2
  5. {returnn-1.20250125.618 → returnn-1.20250131.151606/returnn.egg-info}/PKG-INFO +1 -1
  6. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn.egg-info/SOURCES.txt +1 -0
  7. returnn-1.20250131.151606/tools/torch_scale_tuning.py +594 -0
  8. returnn-1.20250125.618/_setup_info_generated.py +0 -2
  9. {returnn-1.20250125.618 → returnn-1.20250131.151606}/.editorconfig +0 -0
  10. {returnn-1.20250125.618 → returnn-1.20250131.151606}/.gitignore +0 -0
  11. {returnn-1.20250125.618 → returnn-1.20250131.151606}/.gitmodules +0 -0
  12. {returnn-1.20250125.618 → returnn-1.20250131.151606}/.kateconfig +0 -0
  13. {returnn-1.20250125.618 → returnn-1.20250131.151606}/CHANGELOG.md +0 -0
  14. {returnn-1.20250125.618 → returnn-1.20250131.151606}/CODEOWNERS +0 -0
  15. {returnn-1.20250125.618 → returnn-1.20250131.151606}/CONTRIBUTING.md +0 -0
  16. {returnn-1.20250125.618 → returnn-1.20250131.151606}/LICENSE +0 -0
  17. {returnn-1.20250125.618 → returnn-1.20250131.151606}/MANIFEST.in +0 -0
  18. {returnn-1.20250125.618 → returnn-1.20250131.151606}/README.rst +0 -0
  19. {returnn-1.20250125.618 → returnn-1.20250131.151606}/__init__.py +0 -0
  20. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/12AX.cluster_map +0 -0
  21. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/_setup_returnn_env.py +0 -0
  22. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-fwd.config +0 -0
  23. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-horovod-mpi.py +0 -0
  24. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-horovod-mpi.py.sh +0 -0
  25. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-horovod-mpi.sh +0 -0
  26. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-hyper-param-tuning.config +0 -0
  27. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-iter-dataset.py +0 -0
  28. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-list-devices.py +0 -0
  29. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-lua-torch-layer.config +0 -0
  30. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-pretrain.config +0 -0
  31. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-record-and-push-to-webserver.py +0 -0
  32. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-returnn-as-framework.py +0 -0
  33. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-rf-pt-benchmark.py +0 -0
  34. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-rf.config +0 -0
  35. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-rhn-enwik8.config +0 -0
  36. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-sprint-interface.py +0 -0
  37. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-att-copy.config +0 -0
  38. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-attention.config +0 -0
  39. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  40. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  41. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-enc-dec.config +0 -0
  42. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-hard-att-copy.config +0 -0
  43. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-lstm-benchmark.py +0 -0
  44. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  45. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  46. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-native-lstm.12ax.config +0 -0
  47. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  48. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  49. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  50. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  51. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  52. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-rec-self-att.config +0 -0
  53. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-search-compiled-graph.py +0 -0
  54. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  55. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-timit-lstm-ctc.config +0 -0
  56. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-torch.config +0 -0
  57. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  58. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/demo.sh +0 -0
  59. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  60. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  61. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  62. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/README.txt +0 -0
  63. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/chars.txt +0 -0
  64. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/config_demo +0 -0
  65. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/config_fwd +0 -0
  66. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/config_real +0 -0
  67. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  68. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/decode.py +0 -0
  69. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  70. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/go.sh +0 -0
  71. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/lines.txt +0 -0
  72. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/split/eval.txt +0 -0
  73. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/split/train.txt +0 -0
  74. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/IAM/split/valid.txt +0 -0
  75. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/README.md +0 -0
  76. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  77. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/artificial/forwardconfig +0 -0
  78. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/artificial/go.sh +0 -0
  79. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/artificial/trainconfig +0 -0
  80. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  81. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  82. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  83. {returnn-1.20250125.618 → returnn-1.20250131.151606}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  84. {returnn-1.20250125.618 → returnn-1.20250131.151606}/pyproject.toml +0 -0
  85. {returnn-1.20250125.618 → returnn-1.20250131.151606}/requirements.txt +0 -0
  86. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/__init__.py +0 -0
  87. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/__main__.py +0 -0
  88. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/__old_mod_loader__.py +0 -0
  89. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/__setup__.py +0 -0
  90. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/config.py +0 -0
  91. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/__init__.py +0 -0
  92. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/audio.py +0 -0
  93. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/basic.py +0 -0
  94. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/bundle_file.py +0 -0
  95. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/cached.py +0 -0
  96. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/cached2.py +0 -0
  97. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/distrib_files.py +0 -0
  98. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/generating.py +0 -0
  99. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/hdf.py +0 -0
  100. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/lm.py +0 -0
  101. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/map.py +0 -0
  102. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/meta.py +0 -0
  103. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/multi_proc.py +0 -0
  104. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/normalization_data.py +0 -0
  105. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/numpy_dump.py +0 -0
  106. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/raw_wav.py +0 -0
  107. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/sprint.py +0 -0
  108. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/stereo.py +0 -0
  109. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/text_dict.py +0 -0
  110. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/util/__init__.py +0 -0
  111. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/util/feature_extraction.py +0 -0
  112. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/util/strings.py +0 -0
  113. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/datasets/util/vocabulary.py +0 -0
  114. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/engine/__init__.py +0 -0
  115. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/engine/base.py +0 -0
  116. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/engine/batch.py +0 -0
  117. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/__init__.py +0 -0
  118. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/__main__.py +0 -0
  119. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  120. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  121. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  122. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  123. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  124. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  125. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  126. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  127. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  128. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  129. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  130. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  131. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  132. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  133. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  134. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  135. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  136. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  137. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  138. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  139. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  140. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  141. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  142. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  143. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  144. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/__init__.py +0 -0
  145. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/graph_editor/README.md +0 -0
  146. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/graph_editor/__init__.py +0 -0
  147. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/graph_editor/edit.py +0 -0
  148. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/graph_editor/reroute.py +0 -0
  149. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/graph_editor/select.py +0 -0
  150. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/graph_editor/subgraph.py +0 -0
  151. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/graph_editor/transform.py +0 -0
  152. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/extern/graph_editor/util.py +0 -0
  153. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/forward_iface.py +0 -0
  154. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/__init__.py +0 -0
  155. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_backend.py +0 -0
  156. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_cache.py +0 -0
  157. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_native/__init__.py +0 -0
  158. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_native/backend.cpp +0 -0
  159. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_native/backend.hpp +0 -0
  160. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_native/module.cpp +0 -0
  161. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_native/module.hpp +0 -0
  162. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_native/py_utils.hpp +0 -0
  163. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  164. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  165. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_numpy_backend.py +0 -0
  166. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_random_journal.py +0 -0
  167. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/_utils.py +0 -0
  168. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/array_.py +0 -0
  169. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/attention.py +0 -0
  170. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/audio/__init__.py +0 -0
  171. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/audio/mel.py +0 -0
  172. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/audio/specaugment.py +0 -0
  173. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/backend.py +0 -0
  174. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/build_from_dict.py +0 -0
  175. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/cond.py +0 -0
  176. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/const.py +0 -0
  177. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/container.py +0 -0
  178. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/control_flow_ctx.py +0 -0
  179. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/conv.py +0 -0
  180. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/conversions/__init__.py +0 -0
  181. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  182. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/conversions/hf_llama.py +0 -0
  183. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/conversions/torch_nn.py +0 -0
  184. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/decoder/__init__.py +0 -0
  185. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/decoder/transformer.py +0 -0
  186. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/device.py +0 -0
  187. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/dims.py +0 -0
  188. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/dropout.py +0 -0
  189. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/dtype.py +0 -0
  190. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/encoder/__init__.py +0 -0
  191. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/encoder/base.py +0 -0
  192. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/encoder/conformer.py +0 -0
  193. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/encoder/conformer_v2.py +0 -0
  194. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/encoder/e_branchformer.py +0 -0
  195. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/encoder/transformer.py +0 -0
  196. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/gradient.py +0 -0
  197. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/graph.py +0 -0
  198. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/hooks.py +0 -0
  199. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/init.py +0 -0
  200. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/label_smoothing.py +0 -0
  201. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/linear.py +0 -0
  202. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/loop.py +0 -0
  203. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/loss.py +0 -0
  204. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/math_.py +0 -0
  205. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/matmul.py +0 -0
  206. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/module.py +0 -0
  207. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/nested.py +0 -0
  208. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/normalization.py +0 -0
  209. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/parameter.py +0 -0
  210. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/parametrizations.py +0 -0
  211. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/parametrize.py +0 -0
  212. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/piecewise_linear.py +0 -0
  213. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/rand.py +0 -0
  214. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/rec.py +0 -0
  215. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/reduce.py +0 -0
  216. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/run_ctx.py +0 -0
  217. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/signal.py +0 -0
  218. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/state.py +0 -0
  219. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/stepwise_scheduler.py +0 -0
  220. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/tensor_array.py +0 -0
  221. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/frontend/types.py +0 -0
  222. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/import_/__init__.py +0 -0
  223. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/import_/common.py +0 -0
  224. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/import_/git.py +0 -0
  225. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/import_/import_.py +0 -0
  226. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/learning_rate_control.py +0 -0
  227. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/log.py +0 -0
  228. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/native_op.cpp +0 -0
  229. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/native_op.py +0 -0
  230. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/pretrain.py +0 -0
  231. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/sprint/__init__.py +0 -0
  232. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/sprint/cache.py +0 -0
  233. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/sprint/control.py +0 -0
  234. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/sprint/error_signals.py +0 -0
  235. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/sprint/extern_interface.py +0 -0
  236. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/sprint/interface.py +0 -0
  237. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/README.md +0 -0
  238. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/__init__.py +0 -0
  239. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/_dim_extra.py +0 -0
  240. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/_tensor_extra.py +0 -0
  241. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/_tensor_mixin_base.py +0 -0
  242. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/_tensor_op_overloads.py +0 -0
  243. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/control_flow_ctx.py +0 -0
  244. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/dim.py +0 -0
  245. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/marked_dim.py +0 -0
  246. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/tensor.py +0 -0
  247. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/tensor_dict.py +0 -0
  248. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tensor/utils.py +0 -0
  249. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/__init__.py +0 -0
  250. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/compat.py +0 -0
  251. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/data_pipeline.py +0 -0
  252. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/distributed.py +0 -0
  253. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/engine.py +0 -0
  254. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/README.md +0 -0
  255. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/__init__.py +0 -0
  256. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/_backend.py +0 -0
  257. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/_utils.py +0 -0
  258. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/cond.py +0 -0
  259. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  260. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  261. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/dims.py +0 -0
  262. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/layer.py +0 -0
  263. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/loop.py +0 -0
  264. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/make_layer.py +0 -0
  265. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  266. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  267. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  268. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_low_level/__init__.py +0 -0
  269. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/frontend_low_level/_backend.py +0 -0
  270. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/horovod.py +0 -0
  271. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/hyper_param_tuning.py +0 -0
  272. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/layers/__init__.py +0 -0
  273. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/layers/base.py +0 -0
  274. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/layers/basic.py +0 -0
  275. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/layers/rec.py +0 -0
  276. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/layers/segmental_model.py +0 -0
  277. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/layers/signal_processing.py +0 -0
  278. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/layers/variable.py +0 -0
  279. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/native_op.py +0 -0
  280. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/network.py +0 -0
  281. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/sprint.py +0 -0
  282. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/updater.py +0 -0
  283. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/util/__init__.py +0 -0
  284. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/util/basic.py +0 -0
  285. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/util/data.py +0 -0
  286. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/util/gradient_checkpoint.py +0 -0
  287. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/util/ken_lm.py +0 -0
  288. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/tf/util/open_fst.py +0 -0
  289. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/README.md +0 -0
  290. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/__init__.py +0 -0
  291. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/data/__init__.py +0 -0
  292. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/data/extern_data.py +0 -0
  293. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/data/pipeline.py +0 -0
  294. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/data/queued_data_iter.py +0 -0
  295. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  296. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/data/tensor_utils.py +0 -0
  297. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/distributed.py +0 -0
  298. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/engine.py +0 -0
  299. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/frontend/__init__.py +0 -0
  300. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/frontend/_rand.py +0 -0
  301. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/frontend/bridge.py +0 -0
  302. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/frontend/raw_ops.py +0 -0
  303. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/optim/README.md +0 -0
  304. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/optim/__init__.py +0 -0
  305. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/optim/lion.py +0 -0
  306. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/updater.py +0 -0
  307. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/README.md +0 -0
  308. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/__init__.py +0 -0
  309. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/array_.py +0 -0
  310. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/debug_inf_nan.py +0 -0
  311. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/diagnose_gpu.py +0 -0
  312. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/exception_helper.py +0 -0
  313. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/gradient_checkpoint.py +0 -0
  314. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/module.py +0 -0
  315. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/torch/util/scaled_gradient.py +0 -0
  316. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/__init__.py +0 -0
  317. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/basic.py +0 -0
  318. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/better_exchook.py +0 -0
  319. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/bpe.py +0 -0
  320. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/debug.py +0 -0
  321. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/debug_helpers.py +0 -0
  322. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/file_cache.py +0 -0
  323. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/fsa.py +0 -0
  324. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/literal_py_to_pickle.py +0 -0
  325. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/lru_cache.py +0 -0
  326. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/math.py +0 -0
  327. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  328. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/native_code_compiler.py +0 -0
  329. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/pprint.py +0 -0
  330. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/py-to-pickle.cpp +0 -0
  331. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/py_ext_mod_compiler.py +0 -0
  332. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/result_with_reason.py +0 -0
  333. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/sig_proc.py +0 -0
  334. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/task_system.py +0 -0
  335. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/train_proc_manager.py +0 -0
  336. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn/util/watch_memory.py +0 -0
  337. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn.egg-info/dependency_links.txt +0 -0
  338. {returnn-1.20250125.618 → returnn-1.20250131.151606}/returnn.egg-info/top_level.txt +0 -0
  339. {returnn-1.20250125.618 → returnn-1.20250131.151606}/rnn.py +0 -0
  340. {returnn-1.20250125.618 → returnn-1.20250131.151606}/setup.cfg +0 -0
  341. {returnn-1.20250125.618 → returnn-1.20250131.151606}/setup.py +0 -0
  342. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/DummySprintExec.py +0 -0
  343. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm-inspection-profile.xml +0 -0
  344. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/.gitignore +0 -0
  345. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/.name +0 -0
  346. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  347. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  348. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  349. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  350. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  351. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/misc.xml +0 -0
  352. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/modules.xml +0 -0
  353. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/returnn.iml +0 -0
  354. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  355. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/_set_num_threads1.py +0 -0
  356. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/_setup_returnn_env.py +0 -0
  357. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/_setup_test_env.py +0 -0
  358. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/bpe-unicode-demo.codes +0 -0
  359. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/bpe-unicode-demo.vocab +0 -0
  360. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/lexicon_opt.fst +0 -0
  361. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/lexicon_opt.isyms +0 -0
  362. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/lexicon_opt.jpg +0 -0
  363. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/lexicon_opt.osyms +0 -0
  364. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/lint_common.py +0 -0
  365. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/pycharm-inspect.py +0 -0
  366. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/pylint.py +0 -0
  367. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/returnn-as-framework.py +0 -0
  368. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/rf_utils.py +0 -0
  369. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/spelling.dic +0 -0
  370. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_Config.py +0 -0
  371. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_Dataset.py +0 -0
  372. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_Fsa.py +0 -0
  373. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_GeneratingDataset.py +0 -0
  374. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_HDFDataset.py +0 -0
  375. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_LearningRateControl.py +0 -0
  376. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_Log.py +0 -0
  377. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_MultiProcDataset.py +0 -0
  378. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_Pretrain.py +0 -0
  379. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_ResNet.py +0 -0
  380. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_SprintDataset.py +0 -0
  381. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_SprintInterface.py +0 -0
  382. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TFEngine.py +0 -0
  383. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TFNativeOp.py +0 -0
  384. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TFNetworkLayer.py +0 -0
  385. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TFNetworkRecLayer.py +0 -0
  386. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TFNetworkSigProcLayer.py +0 -0
  387. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TFUpdater.py +0 -0
  388. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TFUtil.py +0 -0
  389. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TF_determinism.py +0 -0
  390. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TaskSystem.py +0 -0
  391. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TaskSystem_SharedMem.py +0 -0
  392. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_TranslationDataset.py +0 -0
  393. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_Util.py +0 -0
  394. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_demos.py +0 -0
  395. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_fork_exec.py +0 -0
  396. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_hdf_dump.py +0 -0
  397. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_array.py +0 -0
  398. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_attention.py +0 -0
  399. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_base.py +0 -0
  400. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_cond.py +0 -0
  401. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_const.py +0 -0
  402. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_container.py +0 -0
  403. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_conv.py +0 -0
  404. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_decoder_transformer.py +0 -0
  405. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_encoder_conformer.py +0 -0
  406. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_gradient.py +0 -0
  407. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_label_smoothing.py +0 -0
  408. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_loop.py +0 -0
  409. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_math.py +0 -0
  410. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_normalization.py +0 -0
  411. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_piecewise_linear.py +0 -0
  412. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_rec.py +0 -0
  413. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_reduce.py +0 -0
  414. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_rf_signal.py +0 -0
  415. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_tensor.py +0 -0
  416. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_threading.py +0 -0
  417. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_tools.py +0 -0
  418. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_torch_dataset.py +0 -0
  419. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_torch_engine.py +0 -0
  420. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_torch_frontend.py +0 -0
  421. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_torch_internal_frontend.py +0 -0
  422. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/test_torch_util.py +0 -0
  423. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tests/torch_utils.py +0 -0
  424. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/_setup_returnn_env.py +0 -0
  425. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/analyze-dataset-batches.py +0 -0
  426. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/bliss-collect-seq-lens.py +0 -0
  427. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/bliss-dump-text.py +0 -0
  428. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/bliss-get-segment-names.py +0 -0
  429. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/bliss-to-ogg-zip.py +0 -0
  430. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/bpe-create-lexicon.py +0 -0
  431. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/calculate-word-error-rate.py +0 -0
  432. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/cleanup-old-models.py +0 -0
  433. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/collect-orth-symbols.py +0 -0
  434. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/collect-words.py +0 -0
  435. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/compile_native_op.py +0 -0
  436. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/compile_tf_graph.py +0 -0
  437. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/debug-dump-search-scores.py +0 -0
  438. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/debug-plot-search-scores.py +0 -0
  439. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/dump-dataset-raw-strings.py +0 -0
  440. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/dump-dataset.py +0 -0
  441. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/dump-forward-stats.py +0 -0
  442. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/dump-forward.py +0 -0
  443. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/dump-network-json.py +0 -0
  444. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/dump-pickle.py +0 -0
  445. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/extract_state_tying_from_dataset.py +0 -0
  446. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/get-attention-weights.py +0 -0
  447. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/get-best-model-epoch.py +0 -0
  448. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/hdf_dump.py +0 -0
  449. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/hdf_dump_translation_dataset.py +0 -0
  450. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/import-blocks-mt-model.py +0 -0
  451. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/import-t2t-mt-model.py +0 -0
  452. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/.gitignore +0 -0
  453. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/Makefile +0 -0
  454. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/README.md +0 -0
  455. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/example/README.md +0 -0
  456. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/example/libs_list +0 -0
  457. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  458. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  459. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  460. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/example/state_vars_list +0 -0
  461. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  462. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/file.h +0 -0
  463. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  464. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  465. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/main.cc +0 -0
  466. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/rescorer.h +0 -0
  467. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/vocabulary.cc +0 -0
  468. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/lattice_rescorer/vocabulary.h +0 -0
  469. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/tf_avg_checkpoints.py +0 -0
  470. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/tf_inspect_checkpoint.py +0 -0
  471. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/tf_inspect_summary_log.py +0 -0
  472. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/torch_avg_checkpoints.py +0 -0
  473. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/torch_export_to_onnx.py +0 -0
  474. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/torch_inspect_checkpoint.py +0 -0
  475. {returnn-1.20250125.618 → returnn-1.20250131.151606}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250125.618
3
+ Version: 1.20250131.151606
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20250131.151606'
2
+ long_version = '1.20250131.151606+git.d5465da'
@@ -364,11 +364,11 @@ class LaplaceOrdering(Callable[[Iterator[TensorDict]], Iterator[TensorDict]]):
364
364
  is_down_phase = False
365
365
 
366
366
  seq_buffer = list(islice(iterator, self.num_seqs_per_bin))
367
+ has_ended = False
367
368
  while True:
368
369
  seq_buffer.sort(key=self._get_seq_len, reverse=is_down_phase)
369
370
 
370
371
  next_seq_buffer = []
371
- has_ended = False
372
372
 
373
373
  # Yield items to trainer while gradually pulling more data from PP function.
374
374
  # This optimizes CPU load when multiple workers are used.
@@ -1468,9 +1468,9 @@ class TorchBackend(Backend[torch.Tensor]):
1468
1468
  if use_mask and any(dim.need_masking() for dim in axis):
1469
1469
  source = source.copy()
1470
1470
  dtype = source.raw_tensor.dtype
1471
- if mode in ("max", "logsumexp"):
1471
+ if mode in ("max", "logsumexp", "argmax"):
1472
1472
  mask_value = torch.finfo(dtype).min if dtype.is_floating_point else torch.iinfo(dtype).min
1473
- elif mode == "min":
1473
+ elif mode in ("min", "argmin"):
1474
1474
  mask_value = torch.finfo(dtype).max if dtype.is_floating_point else torch.iinfo(dtype).max
1475
1475
  elif mode == "sum":
1476
1476
  mask_value = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250125.618
3
+ Version: 1.20250131.151606
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -452,6 +452,7 @@ tools/torch_avg_checkpoints.py
452
452
  tools/torch_export_to_onnx.py
453
453
  tools/torch_inspect_checkpoint.py
454
454
  tools/torch_inspect_checkpoint_and_opt.py
455
+ tools/torch_scale_tuning.py
455
456
  tools/lattice_rescorer/.gitignore
456
457
  tools/lattice_rescorer/Makefile
457
458
  tools/lattice_rescorer/README.md
@@ -0,0 +1,594 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ Scale tuning
5
+ """
6
+
7
+ from __future__ import annotations
8
+ import argparse
9
+ import os
10
+ import numpy as np
11
+ from dataclasses import dataclass
12
+ from typing import Optional, Union, Dict, List, Tuple, Set
13
+
14
+ import _setup_returnn_env # noqa
15
+
16
+ from returnn.config import set_global_config, Config
17
+ from returnn.log import log
18
+ from returnn.util.basic import describe_returnn_version, BehaviorVersion
19
+ from returnn.util import better_exchook
20
+ from returnn.tensor import Tensor, Dim
21
+ import returnn.frontend as rf
22
+ import torch
23
+ import json
24
+
25
+
26
+ def main():
27
+ """main"""
28
+ arg_parser = argparse.ArgumentParser()
29
+ arg_parser.add_argument("--names", required=True, nargs="+", help="names for scores")
30
+ arg_parser.add_argument("--scores", required=True, nargs="+", help="score-paths")
31
+ arg_parser.add_argument("--evaluation", required=True, help="'edit_distance'")
32
+ arg_parser.add_argument("--ref", help="ref-path")
33
+ arg_parser.add_argument("--fixed-scales", nargs="*", help="(name, scale) pairs, fixed scales")
34
+ arg_parser.add_argument("--negative-scales", nargs="*", help="list of names, negative scales")
35
+ arg_parser.add_argument("--scale-relative-to", nargs="*", help="(name, other) pairs, scale relative to")
36
+ arg_parser.add_argument("--max-scale", type=float, default=2.0)
37
+ arg_parser.add_argument("--min-scales", help="(name, scale) pairs, min scales")
38
+ arg_parser.add_argument("--max-scales", help="(name, scale) pairs, max scales")
39
+ arg_parser.add_argument("--num-iterations", type=int, default=10)
40
+ arg_parser.add_argument("--num-steps", type=int, default=21)
41
+ arg_parser.add_argument("--search-mode", default="auto", help="'auto', 'grid', 'single'")
42
+ arg_parser.add_argument("--device", default="cpu", help="auto, cpu, cuda, ...")
43
+ arg_parser.add_argument("--batch-size", type=int, default=1024)
44
+ arg_parser.add_argument("--output-scales", help="file to write (relative) scales into")
45
+ arg_parser.add_argument("--output-real-scales", help="file to write real scales into")
46
+ arg_parser.add_argument("--output-grid-plot", help="file to write grid plot into")
47
+ args = arg_parser.parse_args()
48
+
49
+ print(f"{os.path.basename(__file__)}, RETURNN {describe_returnn_version()}")
50
+
51
+ config = Config()
52
+ set_global_config(config)
53
+
54
+ log.initialize(verbosity=[5])
55
+ BehaviorVersion.set_min_behavior_version(22)
56
+ better_exchook.install()
57
+ rf.select_backend_torch()
58
+
59
+ try:
60
+ # noinspection PyUnresolvedReferences,PyPackageRequirements
61
+ import lovely_tensors
62
+
63
+ lovely_tensors.monkey_patch()
64
+ except ImportError:
65
+ pass # ignore
66
+
67
+ names = list(args.names)
68
+ names_dim = Dim(len(names), name="names")
69
+
70
+ # Resort scale names, such that we first have the fixed scales, then the scale_relative_to.
71
+ fixed_scales_names = set()
72
+ if args.fixed_scales:
73
+ assert len(args.fixed_scales) % 2 == 0
74
+ for name, scale in zip(args.fixed_scales[::2], args.fixed_scales[1::2]):
75
+ fixed_scales_names.add(name)
76
+ scale_relative_to_target_names = set()
77
+ if args.scale_relative_to:
78
+ assert len(args.scale_relative_to) % 2 == 0
79
+ for name, other in zip(args.scale_relative_to[::2], args.scale_relative_to[1::2]):
80
+ scale_relative_to_target_names.add(other)
81
+ names.sort(key=lambda key: (key not in fixed_scales_names, key not in scale_relative_to_target_names))
82
+ print("Names:", names)
83
+
84
+ # Prepare scale relation info, based on scale/name idx.
85
+ fixed_scales: Dict[int, float] = {}
86
+ if args.fixed_scales:
87
+ assert len(args.fixed_scales) % 2 == 0
88
+ for name, scale in zip(args.fixed_scales[::2], args.fixed_scales[1::2]):
89
+ name_idx = names.index(name)
90
+ assert name_idx not in fixed_scales
91
+ fixed_scales[name_idx] = float(scale)
92
+ scale_relative_to: Dict[int, int] = {} # name idx -> other name idx (before)
93
+ if args.scale_relative_to:
94
+ assert len(args.scale_relative_to) % 2 == 0
95
+ for name, other in zip(args.scale_relative_to[::2], args.scale_relative_to[1::2]):
96
+ name_idx = names.index(name)
97
+ other_idx = names.index(other)
98
+ assert name_idx not in scale_relative_to
99
+ assert name_idx not in fixed_scales
100
+ assert other_idx not in fixed_scales
101
+ assert other_idx < name_idx, "make sure, when --scale-relative-to, that the other name is before"
102
+ scale_relative_to[name_idx] = other_idx
103
+ neg_scales: Set[int] = set()
104
+ if args.negative_scales:
105
+ for name in args.negative_scales:
106
+ name_idx = names.index(name)
107
+ assert name_idx not in fixed_scales
108
+ neg_scales.add(name_idx)
109
+
110
+ print("Num scales to search over:", len(names) - len(fixed_scales))
111
+ search_mode = args.search_mode
112
+ if search_mode == "auto":
113
+ if len(names) - len(fixed_scales) == 2:
114
+ search_mode = "grid"
115
+ else:
116
+ search_mode = "single"
117
+ print("Search mode:", search_mode)
118
+
119
+ # Load data
120
+ vocab: Dict[str, int] = {}
121
+ hyps: Dict[str, List[List[int]]] = {} # seq_tag -> beam of seqs
122
+ scores: Dict[str, Dict[str, List[float]]] = {} # name -> seq_tag -> beam of score
123
+ assert args.scores and len(args.scores) == len(names)
124
+ for name, path in zip(args.names, args.scores):
125
+ if not os.path.exists(path):
126
+ raise FileNotFoundError(f"score file not found: {path}, for name: {name}")
127
+ scores[name], hyps = _load_text_dict_hyps_file(
128
+ path, name=name, vocab=vocab, expect_same_hyps=hyps if hyps else None
129
+ )
130
+ assert scores[name] and hyps
131
+ print("num seqs:", len(hyps))
132
+
133
+ assert scores and hyps
134
+ print("len vocab after reading hyps:", len(vocab))
135
+ assert vocab, "no labels found?"
136
+ ref: Optional[Dict[str, List[int]]] = (
137
+ _load_text_dict_file(args.ref, name="ref", vocab=vocab) if args.ref is not None else None
138
+ )
139
+ print("len vocab after reading ref:", len(vocab))
140
+ if len(vocab) < 2**15:
141
+ dtype = torch.int16
142
+ elif len(vocab) < 2**31:
143
+ dtype = torch.int32
144
+ elif len(vocab) < 2**63:
145
+ dtype = torch.int64
146
+ else:
147
+ raise ValueError(f"vocab too large: {len(vocab)}")
148
+ print("dtype:", dtype)
149
+
150
+ if ref:
151
+ total_ref_seq_len = sum(len(seq) for seq in ref.values())
152
+ print("total ref seq len:", total_ref_seq_len)
153
+ avg_ref_seq_len = total_ref_seq_len / len(ref)
154
+ print("avg ref seq len:", avg_ref_seq_len)
155
+ else:
156
+ avg_ref_seq_len = None
157
+
158
+ # Sort by lengths (reversed) to be able to prepare batches without too much padding.
159
+ if ref:
160
+ seq_list_ordered_by_len = sorted(ref.keys(), key=lambda tag: len(ref[tag]), reverse=True)
161
+ else:
162
+ seq_list_ordered_by_len = sorted(hyps.keys(), key=lambda tag: len(hyps[tag][0]), reverse=True)
163
+
164
+ if args.device == "auto":
165
+ dev_s = "cuda" if torch.cuda.is_available() else "cpu"
166
+ else:
167
+ dev_s = args.device
168
+ dev = torch.device(dev_s)
169
+ print("Device:", dev)
170
+ _report_dev_memory(dev)
171
+
172
+ # Prepare batches, load all into device memory
173
+ # Note: If we get GPU OOM here, we could instead keep this on CPU... Currently, not yet implemented...
174
+ print("Preparing batches, calculating evaluation on all hyps...")
175
+ batches = []
176
+ for i in range(0, len(seq_list_ordered_by_len), args.batch_size):
177
+ print(
178
+ f"Batch {len(batches)}, seqs {i} - {min(i + args.batch_size, len(seq_list_ordered_by_len))}"
179
+ f" / {len(seq_list_ordered_by_len)}, {i/len(seq_list_ordered_by_len)*100:.1f}%"
180
+ )
181
+ batch_seq_tag_list: List[str] = seq_list_ordered_by_len[i : i + args.batch_size]
182
+
183
+ beam_sizes_ = [len(hyps[tag]) for tag in batch_seq_tag_list]
184
+ beam_sizes_t = torch.tensor(beam_sizes_, dtype=torch.int32) # [Batch], int32
185
+ hyps_seq_lens_: List[List[int]] = [
186
+ [len(hyps[tag][beam]) for beam in range(len(hyps[tag]))] for tag in batch_seq_tag_list
187
+ ]
188
+ hyps_seq_lens_t = _make_padded_tensor_2d(hyps_seq_lens_, dtype=torch.int32) # [Batch,Beam], int32
189
+ hyps__: List[List[List[int]]] = [hyps[tag] for tag in batch_seq_tag_list]
190
+ hyps_t = _make_padded_tensor_3d(hyps__, dtype=dtype, device=dev) # [Batch,Beam,HypSeq], int16|int32|int64
191
+
192
+ batch_scores: List[List[List[float]]] = [[scores[name][tag] for tag in batch_seq_tag_list] for name in names]
193
+ batch_scores_t = _make_padded_tensor_3d(
194
+ batch_scores, dtype=torch.float32, device=dev
195
+ ) # [Names,Batch,Beam], float32
196
+
197
+ if ref is not None:
198
+ ref_: List[List[int]] = [ref[tag] for tag in batch_seq_tag_list]
199
+ ref_t = _make_padded_tensor_2d(ref_, dtype=dtype, device=dev) # [Batch,RefSeq], int16|int32|int64
200
+ ref_seq_lens = [len(ref[tag]) for tag in batch_seq_tag_list]
201
+ ref_seq_lens_t = torch.tensor(ref_seq_lens, dtype=torch.int32) # [Batch], int32
202
+ else:
203
+ ref_t = None
204
+ ref_seq_lens_t = None
205
+
206
+ batch_dim = Dim(len(batch_seq_tag_list), name="batch")
207
+ beam_sizes_rf = rf.convert_to_tensor(beam_sizes_t, dims=[batch_dim])
208
+ beam_dim = Dim(beam_sizes_rf, name="beam")
209
+ hyps_seq_lens_rf = rf.convert_to_tensor(hyps_seq_lens_t, dims=[batch_dim, beam_dim])
210
+ hyps_seq_dim = Dim(hyps_seq_lens_rf, name="hyps_seq")
211
+ hyps_rf = rf.convert_to_tensor(hyps_t, dims=[batch_dim, beam_dim, hyps_seq_dim])
212
+ if ref_t is not None:
213
+ ref_seq_lens_rf = rf.convert_to_tensor(ref_seq_lens_t, dims=[batch_dim])
214
+ ref_seq_dim = Dim(ref_seq_lens_rf, name="ref_seq")
215
+ ref_rf = rf.convert_to_tensor(ref_t, dims=[batch_dim, ref_seq_dim])
216
+ else:
217
+ ref_seq_dim = None
218
+ ref_rf = None
219
+
220
+ if args.evaluation == "edit_distance":
221
+ assert ref_rf is not None, "need --ref for edit_distance"
222
+ res = rf.edit_distance(ref_rf, ref_seq_dim, hyps_rf, hyps_seq_dim)
223
+ assert res.dims_set == {batch_dim, beam_dim}
224
+ res = res.copy_transpose((batch_dim, beam_dim))
225
+ res = rf.cast(res, "float64")
226
+ # Using avg_ref_seq_len makes this numerically more stable, as the numbers are not so small,
227
+ # and we can use reduce_mean below, which is also more stable.
228
+ res /= avg_ref_seq_len # WER / avg_len = (WER / total_len) * num_seqs -> can use reduce_mean
229
+ else:
230
+ raise ValueError(f"unknown evaluation {args.evaluation!r}")
231
+
232
+ batch_scores_rf = rf.convert_to_tensor(batch_scores_t, dims=[names_dim, batch_dim, beam_dim])
233
+
234
+ batches.append(
235
+ Batch(
236
+ seq_tags=batch_seq_tag_list,
237
+ batch_dim=batch_dim,
238
+ beam_dim=beam_dim,
239
+ hyps_eval=res,
240
+ names_dim=names_dim,
241
+ scores=batch_scores_rf,
242
+ )
243
+ )
244
+ print("num batches:", len(batches))
245
+ _report_dev_memory(dev)
246
+
247
+ def _eval_max_min(*, use_max: bool) -> float:
248
+ hyps_eval_ts = []
249
+ for batch in batches:
250
+ hyps_eval = (rf.reduce_max if use_max else rf.reduce_min)(
251
+ batch.hyps_eval, axis=batch.beam_dim
252
+ ) # [Batch], float64
253
+ assert hyps_eval.dims_set == {batch.batch_dim}
254
+ hyps_eval_ts.append(hyps_eval.raw_tensor)
255
+ hyps_eval_t = torch.concatenate(hyps_eval_ts) # [NumSeqs], float64
256
+ hyps_eval_t = torch.mean(hyps_eval_t) # scalar, float64
257
+ return hyps_eval_t.cpu().item()
258
+
259
+ best_eval, worst_eval = _eval_max_min(use_max=False), _eval_max_min(use_max=True)
260
+ print(f"Best {args.evaluation}: {best_eval}, worst {args.evaluation}: {worst_eval}")
261
+
262
+ scales_min = [0.0] * len(names)
263
+ scales_max = [args.max_scale] * len(names)
264
+ if args.min_scales:
265
+ assert len(args.min_scales) % 2 == 0
266
+ for name, scale in zip(args.min_scales[::2], args.min_scales[1::2]):
267
+ scales_min[names.index(name)] = float(scale)
268
+ if args.max_scales:
269
+ assert len(args.max_scales) % 2 == 0
270
+ for name, scale in zip(args.max_scales[::2], args.max_scales[1::2]):
271
+ scales_max[names.index(name)] = float(scale)
272
+
273
+ scales = [(scales_min[i] + scales_max[i]) / 2 for i in range(len(names))]
274
+ for name_idx, scale in fixed_scales.items():
275
+ scales[name_idx] = scale
276
+
277
+ def _real_scales(scales_: List[float]):
278
+ scales_ = list(scales_)
279
+ for i_ in range(len(scales_)):
280
+ if i_ in scale_relative_to:
281
+ scales_[i_] *= scales_[scale_relative_to[i_]]
282
+ if i_ in neg_scales:
283
+ scales_[i_] *= -1
284
+ return scales_
285
+
286
+ def _eval_for_scales(scales_: List[float]) -> float:
287
+ real_scales_t = torch.tensor(_real_scales(scales_), dtype=torch.float32).to(dev) # [Names], float32
288
+ real_scales = rf.convert_to_tensor(real_scales_t, dims=[names_dim])
289
+ hyps_eval_ts = []
290
+ for batch in batches:
291
+ scores_scaled = rf.matmul(batch.scores, real_scales, reduce=names_dim) # [Batch,Beam], float32
292
+ beam_idx = rf.reduce_argmax(scores_scaled, axis=batch.beam_dim) # [Batch] -> Beam, int32
293
+ hyps_eval = rf.gather(batch.hyps_eval, indices=beam_idx) # [Batch], float64
294
+ assert hyps_eval.dims_set == {batch.batch_dim}
295
+ hyps_eval_ts.append(hyps_eval.raw_tensor)
296
+ hyps_eval_t = torch.concatenate(hyps_eval_ts) # [NumSeqs], float64
297
+ hyps_eval_t = torch.mean(hyps_eval_t) # scalar, float64
298
+ return hyps_eval_t.cpu().item()
299
+
300
+ if search_mode == "single":
301
+ print("Search mode single, each dimension/scale optimized separately")
302
+ for iter_idx in range(args.num_iterations):
303
+ print("*** Iter", iter_idx)
304
+ has_change = False
305
+ for scale_idx in range(len(scales) - 1, -1, -1):
306
+ if scale_idx in fixed_scales:
307
+ continue
308
+ print(f"** Scale {names[scale_idx]} in range {scales_min[scale_idx]} - {scales_max[scale_idx]}")
309
+ print(
310
+ " Other scales:",
311
+ ", ".join(f"{names[i]}: {scales[i]}" for i in range(len(scales)) if i != scale_idx),
312
+ )
313
+ evals: List[Tuple[float, float]] = [] # (eval, scale)
314
+ for scale in np.linspace(scales_min[scale_idx], scales_max[scale_idx], num=args.num_steps):
315
+ scales[scale_idx] = scale
316
+ eval_ = _eval_for_scales(scales)
317
+ print(f"Scale {names[scale_idx]}: {scale}, {args.evaluation}: {eval_}")
318
+ evals.append((eval_, scale))
319
+ eval_p = np.percentile([eval_ for eval_, _ in evals], 5)
320
+ print("Eval p5:", eval_p, "best:", min(eval_ for eval_, _ in evals))
321
+ prev_min, prev_max = scales_min[scale_idx], scales_max[scale_idx]
322
+ scales_min[scale_idx] = min(scale for eval_, scale in evals if eval_ <= eval_p * 1.0001)
323
+ scales_max[scale_idx] = max(scale for eval_, scale in evals if eval_ <= eval_p * 1.0001)
324
+ print(f"New {names[scale_idx]} scales min/max:", scales_min[scale_idx], scales_max[scale_idx])
325
+ scales[scale_idx] = (scales_min[scale_idx] + scales_max[scale_idx]) / 2
326
+ if prev_min != scales_min[scale_idx] or prev_max != scales_max[scale_idx]:
327
+ has_change = True
328
+ else:
329
+ print(f"No change for scale {names[scale_idx]}")
330
+ if not has_change:
331
+ print(f"No change in this iteration {iter_idx}, stop")
332
+ break
333
+ elif search_mode == "grid":
334
+ print("Search mode grid, all dimensions/scales optimized together")
335
+ for iter_idx in range(args.num_iterations):
336
+ print("*** Iter", iter_idx)
337
+ has_change = False
338
+ scale_indices = []
339
+ spaces = []
340
+ for scale_idx in range(len(names)):
341
+ if scale_idx in fixed_scales:
342
+ continue
343
+ scale_indices.append(scale_idx)
344
+ spaces.append(np.linspace(scales_min[scale_idx], scales_max[scale_idx], num=args.num_steps))
345
+ evals: List[Tuple[float, List[float]]] = [] # (eval, scales)
346
+ best_eval_so_far = np.inf
347
+ spaces = np.meshgrid(*spaces)
348
+ it = np.nditer(spaces)
349
+ with it:
350
+ for scale_values in it:
351
+ assert len(scale_values) == len(scale_indices)
352
+ scales = [0.0] * len(names)
353
+ for scale_idx, scale in fixed_scales.items():
354
+ scales[scale_idx] = scale
355
+ for scale_idx, scale in zip(scale_indices, scale_values):
356
+ scales[scale_idx] = float(scale)
357
+ eval_ = _eval_for_scales(scales)
358
+ if eval_ < best_eval_so_far:
359
+ best_eval_so_far = eval_
360
+ print(f"New best {args.evaluation}: {eval_}, scales: {scales}")
361
+ evals.append((eval_, scales))
362
+ if args.output_grid_plot:
363
+ assert len(scale_indices) == 2, "only implemented for 2 scales"
364
+ _plot_grid(
365
+ evals,
366
+ scale_indices=scale_indices,
367
+ title="",
368
+ cbar_label=args.evaluation,
369
+ x_axis_name=names[scale_indices[0]],
370
+ y_axis_name=names[scale_indices[1]],
371
+ out_plot_filename=f"{args.output_grid_plot}.{iter_idx}.pdf",
372
+ )
373
+ eval_p = np.percentile([eval_ for eval_, _ in evals], 5)
374
+ print(
375
+ f"Evaluated grid size {len(evals)}, eval p5: {eval_p},",
376
+ f"best: {min(eval_ for eval_, _ in evals)},",
377
+ f"worst: {max(eval_ for eval_, _ in evals)}",
378
+ )
379
+ for scale_idx in scale_indices:
380
+ prev_min, prev_max = scales_min[scale_idx], scales_max[scale_idx]
381
+ scales_min[scale_idx] = min(scale[scale_idx] for eval_, scale in evals if eval_ <= eval_p * 1.0001)
382
+ scales_max[scale_idx] = max(scale[scale_idx] for eval_, scale in evals if eval_ <= eval_p * 1.0001)
383
+ print(f"New {names[scale_idx]} scales min/max:", scales_min[scale_idx], scales_max[scale_idx])
384
+ if prev_min != scales_min[scale_idx] or prev_max != scales_max[scale_idx]:
385
+ has_change = True
386
+ else:
387
+ print(f"No change for scale {names[scale_idx]}")
388
+ # Select current best.
389
+ scales = min(evals)[1]
390
+ if not has_change:
391
+ print(f"No change in this iteration {iter_idx}, stop")
392
+ break
393
+ else:
394
+ raise ValueError(f"unknown search mode {search_mode!r}")
395
+
396
+ print("Final scales:")
397
+ for name, scale, real_scale in zip(names, scales, _real_scales(scales)):
398
+ print(f"{name}: {scale} (real: {real_scale})")
399
+ eval_ = _eval_for_scales(scales)
400
+ print(f"Final {args.evaluation}: {eval_}")
401
+
402
+ if args.output_scales:
403
+ print("Writing scales to", args.output_scales)
404
+ with open(args.output_scales, "w") as f:
405
+ f.write(json.dumps(dict(zip(names, scales))) + "\n")
406
+ if args.output_real_scales:
407
+ print("Writing real scales to", args.output_real_scales)
408
+ with open(args.output_real_scales, "w") as f:
409
+ f.write(json.dumps(dict(zip(names, _real_scales(scales)))) + "\n")
410
+
411
+
412
+ @dataclass
413
+ class Batch:
414
+ """batch"""
415
+
416
+ seq_tags: List[str]
417
+ batch_dim: Dim # scalar, int32
418
+ beam_dim: Dim # [Batch], int32
419
+ hyps_eval: Tensor # [Batch,Beam], float64
420
+ names_dim: Dim # scalar, int32
421
+ scores: Tensor # [Names,Batch,Beam], float32
422
+
423
+
424
+ def _make_padded_tensor_2d(
425
+ lst: List[List[Union[int, float]]], *, dtype: torch.dtype, device: Optional[torch.device] = None
426
+ ) -> torch.Tensor:
427
+ max_len = max(len(l_) for l_ in lst)
428
+ res = torch.zeros((len(lst), max_len), dtype=dtype)
429
+ for i, l in enumerate(lst):
430
+ for j, v in enumerate(l):
431
+ res[i, j] = v
432
+ return res.to(device)
433
+
434
+
435
+ def _make_padded_tensor_3d(
436
+ lst: List[List[List[Union[int, float]]]], *, dtype: torch.dtype, device: torch.device
437
+ ) -> torch.Tensor:
438
+ max_len = max(len(l_) for l_ in lst)
439
+ max_len2 = max(len(l__) for l_ in lst for l__ in l_)
440
+ res = torch.zeros((len(lst), max_len, max_len2), dtype=dtype)
441
+ for i, l in enumerate(lst):
442
+ for j, l2 in enumerate(l):
443
+ for k, v in enumerate(l2):
444
+ res[i, j, k] = v
445
+ return res.to(device)
446
+
447
+
448
+ def _load_text_dict_hyps_file(
449
+ filename: str,
450
+ *,
451
+ name: str,
452
+ vocab: Dict[str, int],
453
+ expect_same_hyps: Optional[Dict[str, List[List[int]]]] = None,
454
+ ) -> Tuple[Dict[str, List[float]], Dict[str, List[List[int]]]]:
455
+ # See also code in RETURNN TextDictDataset.
456
+ print(f"Loading text dict file {name} from {filename} ...")
457
+
458
+ if filename.endswith(".gz"):
459
+ import gzip
460
+
461
+ txt = gzip.GzipFile(filename, "rb").read()
462
+ else:
463
+ txt = open(filename, "rb").read()
464
+ data: Dict[str, List[Tuple[float, str]]] = eval(txt)
465
+ assert isinstance(data, dict)
466
+ res_scores = {}
467
+ res_hyps = {}
468
+ if expect_same_hyps is not None:
469
+ assert set(data.keys()) == set(expect_same_hyps.keys())
470
+ for tag, hyps in data.items():
471
+ res_scores[tag], res_hyps[tag] = _hyps_to_indices(
472
+ hyps,
473
+ vocab=vocab,
474
+ expect_same_hyps=expect_same_hyps[tag] if expect_same_hyps else None,
475
+ )
476
+ return res_scores, res_hyps
477
+
478
+
479
+ def _load_text_dict_file(filename: str, *, name: str, vocab: Dict[str, int]) -> Dict[str, List[int]]:
480
+ # See also code in RETURNN TextDictDataset.
481
+ print(f"Loading text dict file {name} from {filename} ...")
482
+
483
+ if filename.endswith(".gz"):
484
+ import gzip
485
+
486
+ txt = gzip.GzipFile(filename, "rb").read()
487
+ else:
488
+ txt = open(filename, "rb").read()
489
+ data: Dict[str, str] = eval(txt)
490
+ assert isinstance(data, dict)
491
+ res = {}
492
+ for tag, hyps in data.items():
493
+ res[tag] = _hyp_to_indices(hyps, vocab=vocab)
494
+ return res
495
+
496
+
497
+ def _hyps_to_indices(
498
+ hyp: List[Tuple[float, str]],
499
+ *,
500
+ vocab: Dict[str, int],
501
+ expect_same_hyps: Optional[List[List[int]]] = None,
502
+ ) -> Tuple[List[float], List[List[int]]]:
503
+ assert isinstance(hyp, list)
504
+ if expect_same_hyps is not None:
505
+ assert len(expect_same_hyps) == len(hyp)
506
+ res_scores = []
507
+ res_hyps = []
508
+ for i, (score, hyp_) in enumerate(hyp):
509
+ assert isinstance(score, float) and isinstance(hyp_, str)
510
+ res_scores.append(score)
511
+ seq = _hyp_to_indices(hyp_, vocab=vocab, assert_in_vocab=expect_same_hyps is not None)
512
+ if expect_same_hyps is not None:
513
+ assert seq == expect_same_hyps[i]
514
+ res_hyps.append(seq)
515
+ return res_scores, res_hyps
516
+
517
+
518
+ def _hyp_to_indices(hyp: str, *, vocab: Dict[str, int], assert_in_vocab: bool = False) -> List[int]:
519
+ assert isinstance(hyp, str)
520
+ res = []
521
+ for label in hyp.split():
522
+ if label not in vocab:
523
+ assert not assert_in_vocab, f"unknown label {label}"
524
+ vocab[label] = len(vocab)
525
+ res.append(vocab[label])
526
+ return res
527
+
528
+
529
+ def _report_dev_memory(dev: torch.device):
530
+ import torch
531
+ from returnn.util import basic as rutil
532
+
533
+ if dev.type == "cuda":
534
+ stats = [
535
+ f"alloc cur {rutil.human_bytes_size(torch.cuda.memory_allocated(dev))}",
536
+ f"alloc peak {rutil.human_bytes_size(torch.cuda.max_memory_allocated(dev))}",
537
+ f"reserved cur {rutil.human_bytes_size(torch.cuda.memory_reserved(dev))}",
538
+ f"reserved peak {rutil.human_bytes_size(torch.cuda.max_memory_reserved(dev))}",
539
+ ]
540
+ print(f"Memory usage ({dev}):", " ".join(stats))
541
+
542
+
543
+ def _plot_grid(
544
+ evals: List[Tuple[float, List[float]]],
545
+ *,
546
+ scale_indices: List[int],
547
+ title: str,
548
+ cbar_label: str,
549
+ y_axis_name: str,
550
+ x_axis_name: str,
551
+ out_plot_filename: str,
552
+ ):
553
+ # noinspection PyPackageRequirements
554
+ import matplotlib.pyplot as plt
555
+
556
+ # noinspection PyPackageRequirements
557
+ import matplotlib.ticker as ticker
558
+
559
+ results = {} # (x,y) -> z
560
+ for eval_, scales in evals:
561
+ results[tuple([scales[i] for i in scale_indices])] = eval_
562
+ xs = sorted(set(scales[scale_indices[0]] for _, scales in evals))
563
+ ys = sorted(set(scales[scale_indices[1]] for _, scales in evals))
564
+
565
+ plt.figure(figsize=(8, 8))
566
+
567
+ zs = np.zeros((len(ys), len(xs)))
568
+ for y_idx, y in enumerate(ys):
569
+ for x_idx, x in enumerate(xs):
570
+ zs[y_idx, x_idx] = results[(x, y)]
571
+
572
+ best = np.min(zs.flatten())
573
+ worst_limit = best * 1.3
574
+
575
+ ax = plt.subplot(1, 1, 1)
576
+ plt.contourf(xs, ys, zs, levels=np.geomspace(best, worst_limit, 30))
577
+
578
+ ax.set_title(title)
579
+ ax.set_ylabel(y_axis_name)
580
+ ax.set_xlabel(x_axis_name)
581
+ ax.xaxis.set_major_locator(ticker.AutoLocator())
582
+ ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
583
+ ax.yaxis.set_major_locator(ticker.AutoLocator())
584
+ ax.yaxis.set_minor_locator(ticker.AutoMinorLocator())
585
+
586
+ cbar = plt.colorbar()
587
+ cbar.set_label(cbar_label)
588
+
589
+ print("Saving plot to", out_plot_filename)
590
+ plt.savefig(out_plot_filename)
591
+
592
+
593
+ if __name__ == "__main__":
594
+ main()
@@ -1,2 +0,0 @@
1
- version = '1.20250125.000618'
2
- long_version = '1.20250125.000618+git.b01684e'