returnn 1.20250204.160236__tar.gz → 1.20250206.151011__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (476) hide show
  1. {returnn-1.20250204.160236/returnn.egg-info → returnn-1.20250206.151011}/PKG-INFO +1 -1
  2. returnn-1.20250206.151011/_setup_info_generated.py +2 -0
  3. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/encoder/transformer.py +19 -11
  4. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/tensor_dict.py +1 -1
  5. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/engine.py +77 -18
  6. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/debug_inf_nan.py +37 -5
  7. {returnn-1.20250204.160236 → returnn-1.20250206.151011/returnn.egg-info}/PKG-INFO +1 -1
  8. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/rf_utils.py +43 -2
  9. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_array.py +28 -1
  10. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_torch_util.py +2 -2
  11. returnn-1.20250204.160236/_setup_info_generated.py +0 -2
  12. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/.editorconfig +0 -0
  13. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/.gitignore +0 -0
  14. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/.gitmodules +0 -0
  15. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/.kateconfig +0 -0
  16. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/CHANGELOG.md +0 -0
  17. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/CODEOWNERS +0 -0
  18. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/CONTRIBUTING.md +0 -0
  19. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/LICENSE +0 -0
  20. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/MANIFEST.in +0 -0
  21. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/README.rst +0 -0
  22. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/__init__.py +0 -0
  23. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/12AX.cluster_map +0 -0
  24. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/_setup_returnn_env.py +0 -0
  25. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-fwd.config +0 -0
  26. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-horovod-mpi.py +0 -0
  27. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-horovod-mpi.py.sh +0 -0
  28. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-horovod-mpi.sh +0 -0
  29. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-hyper-param-tuning.config +0 -0
  30. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-iter-dataset.py +0 -0
  31. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-list-devices.py +0 -0
  32. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-lua-torch-layer.config +0 -0
  33. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-pretrain.config +0 -0
  34. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-record-and-push-to-webserver.py +0 -0
  35. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-returnn-as-framework.py +0 -0
  36. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-rf-pt-benchmark.py +0 -0
  37. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-rf.config +0 -0
  38. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-rhn-enwik8.config +0 -0
  39. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-sprint-interface.py +0 -0
  40. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-att-copy.config +0 -0
  41. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-attention.config +0 -0
  42. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  43. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  44. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-enc-dec.config +0 -0
  45. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-hard-att-copy.config +0 -0
  46. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-lstm-benchmark.py +0 -0
  47. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  48. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  49. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-native-lstm.12ax.config +0 -0
  50. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  51. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  52. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  53. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  54. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  55. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-rec-self-att.config +0 -0
  56. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-search-compiled-graph.py +0 -0
  57. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  58. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-timit-lstm-ctc.config +0 -0
  59. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-torch.config +0 -0
  60. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  61. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/demo.sh +0 -0
  62. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  63. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  64. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  65. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/README.txt +0 -0
  66. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/chars.txt +0 -0
  67. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/config_demo +0 -0
  68. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/config_fwd +0 -0
  69. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/config_real +0 -0
  70. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  71. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/decode.py +0 -0
  72. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  73. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/go.sh +0 -0
  74. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/lines.txt +0 -0
  75. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/split/eval.txt +0 -0
  76. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/split/train.txt +0 -0
  77. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/IAM/split/valid.txt +0 -0
  78. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/README.md +0 -0
  79. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  80. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/artificial/forwardconfig +0 -0
  81. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/artificial/go.sh +0 -0
  82. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/artificial/trainconfig +0 -0
  83. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  84. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  85. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  86. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  87. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/pyproject.toml +0 -0
  88. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/requirements.txt +0 -0
  89. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/__init__.py +0 -0
  90. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/__main__.py +0 -0
  91. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/__old_mod_loader__.py +0 -0
  92. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/__setup__.py +0 -0
  93. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/config.py +0 -0
  94. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/__init__.py +0 -0
  95. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/audio.py +0 -0
  96. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/basic.py +0 -0
  97. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/bundle_file.py +0 -0
  98. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/cached.py +0 -0
  99. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/cached2.py +0 -0
  100. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/distrib_files.py +0 -0
  101. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/generating.py +0 -0
  102. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/hdf.py +0 -0
  103. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/lm.py +0 -0
  104. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/map.py +0 -0
  105. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/meta.py +0 -0
  106. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/multi_proc.py +0 -0
  107. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/normalization_data.py +0 -0
  108. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/numpy_dump.py +0 -0
  109. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/postprocessing.py +0 -0
  110. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/raw_wav.py +0 -0
  111. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/sprint.py +0 -0
  112. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/stereo.py +0 -0
  113. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/text_dict.py +0 -0
  114. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/util/__init__.py +0 -0
  115. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/util/feature_extraction.py +0 -0
  116. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/util/strings.py +0 -0
  117. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/datasets/util/vocabulary.py +0 -0
  118. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/engine/__init__.py +0 -0
  119. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/engine/base.py +0 -0
  120. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/engine/batch.py +0 -0
  121. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/__init__.py +0 -0
  122. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/__main__.py +0 -0
  123. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  124. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  125. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  126. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  127. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  128. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  129. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  130. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  131. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  132. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  133. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  134. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  135. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  136. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  137. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  138. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  139. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  140. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  141. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  142. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  143. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  144. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  145. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  146. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  147. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  148. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/__init__.py +0 -0
  149. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/graph_editor/README.md +0 -0
  150. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/graph_editor/__init__.py +0 -0
  151. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/graph_editor/edit.py +0 -0
  152. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/graph_editor/reroute.py +0 -0
  153. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/graph_editor/select.py +0 -0
  154. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/graph_editor/subgraph.py +0 -0
  155. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/graph_editor/transform.py +0 -0
  156. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/extern/graph_editor/util.py +0 -0
  157. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/forward_iface.py +0 -0
  158. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/__init__.py +0 -0
  159. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_backend.py +0 -0
  160. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_cache.py +0 -0
  161. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_native/__init__.py +0 -0
  162. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_native/backend.cpp +0 -0
  163. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_native/backend.hpp +0 -0
  164. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_native/module.cpp +0 -0
  165. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_native/module.hpp +0 -0
  166. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_native/py_utils.hpp +0 -0
  167. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  168. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  169. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_numpy_backend.py +0 -0
  170. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_random_journal.py +0 -0
  171. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/_utils.py +0 -0
  172. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/array_.py +0 -0
  173. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/attention.py +0 -0
  174. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/audio/__init__.py +0 -0
  175. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/audio/mel.py +0 -0
  176. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/audio/specaugment.py +0 -0
  177. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/backend.py +0 -0
  178. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/build_from_dict.py +0 -0
  179. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/cond.py +0 -0
  180. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/const.py +0 -0
  181. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/container.py +0 -0
  182. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/control_flow_ctx.py +0 -0
  183. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/conv.py +0 -0
  184. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/conversions/__init__.py +0 -0
  185. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  186. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/conversions/hf_llama.py +0 -0
  187. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/conversions/torch_nn.py +0 -0
  188. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/decoder/__init__.py +0 -0
  189. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/decoder/transformer.py +0 -0
  190. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/device.py +0 -0
  191. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/dims.py +0 -0
  192. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/dropout.py +0 -0
  193. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/dtype.py +0 -0
  194. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/encoder/__init__.py +0 -0
  195. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/encoder/base.py +0 -0
  196. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/encoder/conformer.py +0 -0
  197. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/encoder/conformer_v2.py +0 -0
  198. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/encoder/e_branchformer.py +0 -0
  199. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/gradient.py +0 -0
  200. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/graph.py +0 -0
  201. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/hooks.py +0 -0
  202. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/init.py +0 -0
  203. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/label_smoothing.py +0 -0
  204. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/linear.py +0 -0
  205. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/loop.py +0 -0
  206. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/loss.py +0 -0
  207. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/math_.py +0 -0
  208. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/matmul.py +0 -0
  209. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/module.py +0 -0
  210. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/nested.py +0 -0
  211. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/normalization.py +0 -0
  212. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/parameter.py +0 -0
  213. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/parametrizations.py +0 -0
  214. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/parametrize.py +0 -0
  215. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/piecewise_linear.py +0 -0
  216. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/rand.py +0 -0
  217. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/rec.py +0 -0
  218. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/reduce.py +0 -0
  219. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/run_ctx.py +0 -0
  220. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/signal.py +0 -0
  221. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/state.py +0 -0
  222. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/stepwise_scheduler.py +0 -0
  223. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/tensor_array.py +0 -0
  224. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/frontend/types.py +0 -0
  225. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/import_/__init__.py +0 -0
  226. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/import_/common.py +0 -0
  227. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/import_/git.py +0 -0
  228. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/import_/import_.py +0 -0
  229. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/learning_rate_control.py +0 -0
  230. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/log.py +0 -0
  231. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/native_op.cpp +0 -0
  232. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/native_op.py +0 -0
  233. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/pretrain.py +0 -0
  234. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/sprint/__init__.py +0 -0
  235. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/sprint/cache.py +0 -0
  236. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/sprint/control.py +0 -0
  237. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/sprint/error_signals.py +0 -0
  238. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/sprint/extern_interface.py +0 -0
  239. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/sprint/interface.py +0 -0
  240. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/README.md +0 -0
  241. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/__init__.py +0 -0
  242. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/_dim_extra.py +0 -0
  243. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/_tensor_extra.py +0 -0
  244. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/_tensor_mixin_base.py +0 -0
  245. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/_tensor_op_overloads.py +0 -0
  246. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/control_flow_ctx.py +0 -0
  247. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/dim.py +0 -0
  248. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/marked_dim.py +0 -0
  249. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/tensor.py +0 -0
  250. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tensor/utils.py +0 -0
  251. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/__init__.py +0 -0
  252. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/compat.py +0 -0
  253. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/data_pipeline.py +0 -0
  254. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/distributed.py +0 -0
  255. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/engine.py +0 -0
  256. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/README.md +0 -0
  257. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/__init__.py +0 -0
  258. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/_backend.py +0 -0
  259. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/_utils.py +0 -0
  260. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/cond.py +0 -0
  261. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  262. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  263. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/dims.py +0 -0
  264. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/layer.py +0 -0
  265. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/loop.py +0 -0
  266. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/make_layer.py +0 -0
  267. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  268. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  269. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  270. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_low_level/__init__.py +0 -0
  271. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/frontend_low_level/_backend.py +0 -0
  272. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/horovod.py +0 -0
  273. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/hyper_param_tuning.py +0 -0
  274. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/layers/__init__.py +0 -0
  275. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/layers/base.py +0 -0
  276. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/layers/basic.py +0 -0
  277. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/layers/rec.py +0 -0
  278. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/layers/segmental_model.py +0 -0
  279. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/layers/signal_processing.py +0 -0
  280. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/layers/variable.py +0 -0
  281. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/native_op.py +0 -0
  282. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/network.py +0 -0
  283. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/sprint.py +0 -0
  284. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/updater.py +0 -0
  285. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/util/__init__.py +0 -0
  286. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/util/basic.py +0 -0
  287. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/util/data.py +0 -0
  288. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/util/gradient_checkpoint.py +0 -0
  289. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/util/ken_lm.py +0 -0
  290. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/tf/util/open_fst.py +0 -0
  291. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/README.md +0 -0
  292. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/__init__.py +0 -0
  293. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/data/__init__.py +0 -0
  294. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/data/extern_data.py +0 -0
  295. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/data/pipeline.py +0 -0
  296. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/data/queued_data_iter.py +0 -0
  297. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  298. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/data/tensor_utils.py +0 -0
  299. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/distributed.py +0 -0
  300. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/frontend/__init__.py +0 -0
  301. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/frontend/_backend.py +0 -0
  302. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/frontend/_rand.py +0 -0
  303. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/frontend/bridge.py +0 -0
  304. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/frontend/raw_ops.py +0 -0
  305. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/optim/README.md +0 -0
  306. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/optim/__init__.py +0 -0
  307. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/optim/lion.py +0 -0
  308. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/updater.py +0 -0
  309. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/README.md +0 -0
  310. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/__init__.py +0 -0
  311. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/array_.py +0 -0
  312. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/diagnose_gpu.py +0 -0
  313. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/exception_helper.py +0 -0
  314. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/gradient_checkpoint.py +0 -0
  315. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/module.py +0 -0
  316. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/torch/util/scaled_gradient.py +0 -0
  317. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/__init__.py +0 -0
  318. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/basic.py +0 -0
  319. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/better_exchook.py +0 -0
  320. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/bpe.py +0 -0
  321. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/debug.py +0 -0
  322. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/debug_helpers.py +0 -0
  323. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/file_cache.py +0 -0
  324. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/fsa.py +0 -0
  325. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/literal_py_to_pickle.py +0 -0
  326. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/lru_cache.py +0 -0
  327. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/math.py +0 -0
  328. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  329. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/native_code_compiler.py +0 -0
  330. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/pprint.py +0 -0
  331. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/py-to-pickle.cpp +0 -0
  332. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/py_ext_mod_compiler.py +0 -0
  333. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/result_with_reason.py +0 -0
  334. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/sig_proc.py +0 -0
  335. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/task_system.py +0 -0
  336. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/train_proc_manager.py +0 -0
  337. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn/util/watch_memory.py +0 -0
  338. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn.egg-info/SOURCES.txt +0 -0
  339. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn.egg-info/dependency_links.txt +0 -0
  340. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn.egg-info/requires.txt +0 -0
  341. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/returnn.egg-info/top_level.txt +0 -0
  342. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/rnn.py +0 -0
  343. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/setup.cfg +0 -0
  344. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/setup.py +0 -0
  345. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/DummySprintExec.py +0 -0
  346. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm-inspection-profile.xml +0 -0
  347. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/.gitignore +0 -0
  348. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/.name +0 -0
  349. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  350. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  351. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  352. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  353. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  354. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/misc.xml +0 -0
  355. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/modules.xml +0 -0
  356. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/returnn.iml +0 -0
  357. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  358. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/_set_num_threads1.py +0 -0
  359. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/_setup_returnn_env.py +0 -0
  360. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/_setup_test_env.py +0 -0
  361. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/bpe-unicode-demo.codes +0 -0
  362. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/bpe-unicode-demo.vocab +0 -0
  363. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/lexicon_opt.fst +0 -0
  364. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/lexicon_opt.isyms +0 -0
  365. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/lexicon_opt.jpg +0 -0
  366. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/lexicon_opt.osyms +0 -0
  367. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/lint_common.py +0 -0
  368. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/pycharm-inspect.py +0 -0
  369. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/pylint.py +0 -0
  370. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/returnn-as-framework.py +0 -0
  371. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/spelling.dic +0 -0
  372. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_Config.py +0 -0
  373. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_Dataset.py +0 -0
  374. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_Fsa.py +0 -0
  375. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_GeneratingDataset.py +0 -0
  376. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_HDFDataset.py +0 -0
  377. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_LearningRateControl.py +0 -0
  378. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_Log.py +0 -0
  379. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_MultiProcDataset.py +0 -0
  380. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_Pretrain.py +0 -0
  381. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_ResNet.py +0 -0
  382. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_SprintDataset.py +0 -0
  383. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_SprintInterface.py +0 -0
  384. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TFEngine.py +0 -0
  385. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TFNativeOp.py +0 -0
  386. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TFNetworkLayer.py +0 -0
  387. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TFNetworkRecLayer.py +0 -0
  388. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TFNetworkSigProcLayer.py +0 -0
  389. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TFUpdater.py +0 -0
  390. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TFUtil.py +0 -0
  391. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TF_determinism.py +0 -0
  392. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TaskSystem.py +0 -0
  393. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TaskSystem_SharedMem.py +0 -0
  394. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_TranslationDataset.py +0 -0
  395. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_Util.py +0 -0
  396. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_demos.py +0 -0
  397. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_fork_exec.py +0 -0
  398. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_hdf_dump.py +0 -0
  399. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_attention.py +0 -0
  400. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_base.py +0 -0
  401. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_cond.py +0 -0
  402. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_const.py +0 -0
  403. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_container.py +0 -0
  404. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_conv.py +0 -0
  405. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_decoder_transformer.py +0 -0
  406. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_encoder_conformer.py +0 -0
  407. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_gradient.py +0 -0
  408. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_label_smoothing.py +0 -0
  409. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_loop.py +0 -0
  410. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_math.py +0 -0
  411. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_normalization.py +0 -0
  412. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_piecewise_linear.py +0 -0
  413. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_rec.py +0 -0
  414. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_reduce.py +0 -0
  415. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_rf_signal.py +0 -0
  416. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_tensor.py +0 -0
  417. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_threading.py +0 -0
  418. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_tools.py +0 -0
  419. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_torch_dataset.py +0 -0
  420. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_torch_engine.py +0 -0
  421. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_torch_frontend.py +0 -0
  422. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/test_torch_internal_frontend.py +0 -0
  423. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tests/torch_utils.py +0 -0
  424. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/_setup_returnn_env.py +0 -0
  425. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/analyze-dataset-batches.py +0 -0
  426. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/bliss-collect-seq-lens.py +0 -0
  427. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/bliss-dump-text.py +0 -0
  428. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/bliss-get-segment-names.py +0 -0
  429. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/bliss-to-ogg-zip.py +0 -0
  430. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/bpe-create-lexicon.py +0 -0
  431. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/calculate-word-error-rate.py +0 -0
  432. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/cleanup-old-models.py +0 -0
  433. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/collect-orth-symbols.py +0 -0
  434. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/collect-words.py +0 -0
  435. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/compile_native_op.py +0 -0
  436. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/compile_tf_graph.py +0 -0
  437. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/debug-dump-search-scores.py +0 -0
  438. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/debug-plot-search-scores.py +0 -0
  439. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/dump-dataset-raw-strings.py +0 -0
  440. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/dump-dataset.py +0 -0
  441. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/dump-forward-stats.py +0 -0
  442. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/dump-forward.py +0 -0
  443. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/dump-network-json.py +0 -0
  444. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/dump-pickle.py +0 -0
  445. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/extract_state_tying_from_dataset.py +0 -0
  446. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/get-attention-weights.py +0 -0
  447. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/get-best-model-epoch.py +0 -0
  448. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/hdf_dump.py +0 -0
  449. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/hdf_dump_translation_dataset.py +0 -0
  450. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/import-blocks-mt-model.py +0 -0
  451. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/import-t2t-mt-model.py +0 -0
  452. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/.gitignore +0 -0
  453. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/Makefile +0 -0
  454. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/README.md +0 -0
  455. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/example/README.md +0 -0
  456. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/example/libs_list +0 -0
  457. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  458. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  459. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  460. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/example/state_vars_list +0 -0
  461. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  462. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/file.h +0 -0
  463. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  464. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  465. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/main.cc +0 -0
  466. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/rescorer.h +0 -0
  467. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/vocabulary.cc +0 -0
  468. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/lattice_rescorer/vocabulary.h +0 -0
  469. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/tf_avg_checkpoints.py +0 -0
  470. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/tf_inspect_checkpoint.py +0 -0
  471. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/tf_inspect_summary_log.py +0 -0
  472. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/torch_avg_checkpoints.py +0 -0
  473. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/torch_export_to_onnx.py +0 -0
  474. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/torch_inspect_checkpoint.py +0 -0
  475. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
  476. {returnn-1.20250204.160236 → returnn-1.20250206.151011}/tools/torch_scale_tuning.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250204.160236
3
+ Version: 1.20250206.151011
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20250206.151011'
2
+ long_version = '1.20250206.151011+git.6fa4b38'
@@ -35,6 +35,7 @@ class TransformerEncoder(rf.Module):
35
35
  layer: Optional[Union[TransformerEncoderLayer, rf.Module, type, Dict[str, Any], Any]] = None,
36
36
  layer_opts: Optional[Dict[str, Any]] = None,
37
37
  embed_dim: Optional[Dim] = None,
38
+ input_embedding: Union[None, rf.Module, type, Dict[str, Any]] = rf.Embedding,
38
39
  input_embedding_scale: float = None,
39
40
  input_dropout: float = None,
40
41
  sequential=rf.Sequential,
@@ -53,6 +54,7 @@ class TransformerEncoder(rf.Module):
53
54
  :param layer: an instance of :class:`TransformerEncoderLayer` or similar
54
55
  :param layer_opts: options for the encoder layer
55
56
  :param embed_dim: if given, will first have an embedding [vocab,embed] and then a linear [embed,model].
57
+ :param input_embedding:
56
58
  :param input_embedding_scale:
57
59
  :param input_dropout:
58
60
  :param sequential:
@@ -77,9 +79,15 @@ class TransformerEncoder(rf.Module):
77
79
  self.model_dim = model_dim
78
80
  self.embed_dim = embed_dim
79
81
 
80
- # We could make this optional or configurable if we ever need to.
81
- # Or maybe you would just have another separate implementation of this module then...
82
- self.input_embedding = rf.Embedding(vocab_dim, embed_dim or model_dim)
82
+ if input_embedding is None or isinstance(input_embedding, rf.Module):
83
+ pass
84
+ elif isinstance(input_embedding, type):
85
+ input_embedding: rf.Embedding = input_embedding(vocab_dim, embed_dim or model_dim)
86
+ elif isinstance(input_embedding, dict):
87
+ input_embedding = rf.build_from_dict(input_embedding, vocab_dim, embed_dim or model_dim)
88
+ else:
89
+ raise TypeError(f"unexpected input_embedding {input_embedding!r} type {type(input_embedding)}")
90
+ self.input_embedding = input_embedding
83
91
 
84
92
  self.input_embedding_proj = None
85
93
  if embed_dim:
@@ -88,17 +96,13 @@ class TransformerEncoder(rf.Module):
88
96
  if pos_enc is None:
89
97
  pass
90
98
  elif isinstance(pos_enc, dict):
91
- pos_enc = rf.build_from_dict(
92
- pos_enc, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
93
- )
99
+ pos_enc = rf.build_from_dict(pos_enc, feat_dim=embed_dim or model_dim, dtype=rf.get_default_float_dtype())
94
100
  elif isinstance(pos_enc, rf.Module):
95
101
  pass
96
102
  elif isinstance(pos_enc, FunctionType):
97
- pos_enc = functools.partial(
98
- pos_enc, feat_dim=embed_dim or model_dim, dtype=self.input_embedding.weight.dtype
99
- )
103
+ pos_enc = functools.partial(pos_enc, feat_dim=embed_dim or model_dim, dtype=rf.get_default_float_dtype())
100
104
  else:
101
- raise TypeError(f"unexpected pos_enc type {pos_enc!r}")
105
+ raise TypeError(f"unexpected pos_enc {pos_enc!r} type {type(pos_enc)}")
102
106
  self.pos_enc = pos_enc
103
107
  if input_embedding_scale is None:
104
108
  input_embedding_scale = model_dim.dimension**0.5
@@ -157,7 +161,11 @@ class TransformerEncoder(rf.Module):
157
161
  :param collected_outputs:
158
162
  :return: final encoder output, after final layer norm
159
163
  """
160
- decoded = self.input_embedding(source) * self.input_embedding_scale
164
+ if self.input_embedding is not None:
165
+ decoded = self.input_embedding(source) * self.input_embedding_scale
166
+ else:
167
+ assert self.model_dim in source.dims
168
+ decoded = source
161
169
  if self.pos_enc is not None:
162
170
  decoded = decoded + self.pos_enc(spatial_dim=spatial_dim)
163
171
  decoded = rf.dropout(decoded, self.input_dropout)
@@ -139,7 +139,7 @@ class TensorDict:
139
139
  """
140
140
  visited_dims = set()
141
141
  for key, value in self.data.items():
142
- assert key in raw_tensor_dict
142
+ assert key in raw_tensor_dict, f"key {key} not in raw_tensor_dict {list(raw_tensor_dict.keys())}"
143
143
  value.raw_tensor = raw_tensor_dict[key]
144
144
  for i, dim in enumerate(value.dims):
145
145
  dim: Dim
@@ -3,7 +3,7 @@ Main engine for PyTorch
3
3
  """
4
4
 
5
5
  from __future__ import annotations
6
- from typing import Optional, Any, Union, Callable, Dict, Set
6
+ from typing import Optional, Any, Union, Callable, Dict, Set, Tuple
7
7
  from contextlib import nullcontext, ExitStack, contextmanager
8
8
 
9
9
  import gc
@@ -371,6 +371,7 @@ class Engine(EngineBase):
371
371
  total_data_size_packed = NumbersDict()
372
372
  total_data_size_padded = NumbersDict()
373
373
 
374
+ report_prefix = f"ep {self.epoch} train"
374
375
  try:
375
376
  while True:
376
377
  with torch.no_grad():
@@ -398,21 +399,13 @@ class Engine(EngineBase):
398
399
  {k: int(util.prod(extern_data_raw[k].shape[:2])) for k in keys_w_seq_len},
399
400
  )
400
401
 
401
- num_seqs_ = (
402
- int(extern_data_raw["num_seqs"]) if extern_data_raw.get("num_seqs", None) is not None else -1
402
+ num_seqs, last_seq_idx = _get_num_seqs_last_seq_idx(
403
+ report_prefix=report_prefix,
404
+ extern_data_raw=extern_data_raw,
405
+ step_idx=step_idx,
406
+ prev_num_seqs=num_seqs,
407
+ prev_last_seq_idx=last_seq_idx,
403
408
  )
404
- # Note: The batches might have been shuffled,
405
- # thus we cannot really assert that the seq_idx is always increasing.
406
- last_seq_idx = max(int(extern_data_raw["seq_idx"].max()), last_seq_idx)
407
- if step_idx == 0:
408
- if num_seqs_ >= 0:
409
- print(f"Epoch {self.epoch} num_seqs: {num_seqs_}", file=log.v5)
410
- num_seqs = num_seqs_
411
- elif num_seqs_ >= 0:
412
- assert num_seqs_ == num_seqs
413
- del num_seqs_
414
- if num_seqs is not None:
415
- assert last_seq_idx < num_seqs
416
409
  epoch_continuous = (self.epoch - 1 + (last_seq_idx + 1) / num_seqs) if num_seqs is not None else None
417
410
 
418
411
  # clear the gradients when every gradient accumulation loop starts
@@ -485,7 +478,7 @@ class Engine(EngineBase):
485
478
  accumulated_inv_norm_factors_dict += inv_norm_factors_dict
486
479
  eval_info = self._maybe_extend_losses_info(losses_dict / inv_norm_factors_dict)
487
480
  _print_process(
488
- f"ep {self.epoch} train",
481
+ report_prefix,
489
482
  step=step_idx,
490
483
  eval_info=dict(eval_info),
491
484
  step_duration=step_duration,
@@ -505,12 +498,35 @@ class Engine(EngineBase):
505
498
  file=log.v1,
506
499
  )
507
500
 
501
+ print("Checking for inf/nan in model parameters...", file=log.v1)
502
+ count_nan_inf_params = 0
503
+ for name, param in self._pt_model.named_parameters():
504
+ got_nan_inf_t = torch.stack([torch.isnan(param).any(), torch.isinf(param).any()]).cpu()
505
+ got_nan = got_nan_inf_t[0].item()
506
+ got_inf = got_nan_inf_t[1].item()
507
+ if got_nan or got_inf:
508
+ s = "/".join([s_ for s_, b in [("nan", got_nan), ("inf", got_inf)] if b])
509
+ print(f" {name} {param}: {s}", file=log.v1)
510
+ count_nan_inf_params += 1
511
+ if count_nan_inf_params == 0:
512
+ print("(No inf/nan in model parameters.)", file=log.v1)
513
+
508
514
  def _debug_func() -> torch.Tensor:
509
515
  self._run_step(extern_data, train_flag=True, train_func=True)
510
- return rf.get_run_ctx().total_loss()
516
+ loss = rf.get_run_ctx().total_loss()
517
+ assert isinstance(loss, Tensor)
518
+ return loss.raw_tensor
511
519
 
512
520
  print("Running debug_inf_nan...", file=log.v1)
513
521
  debug_inf_nan(_debug_func, with_grad=True)
522
+ if count_nan_inf_params > 0 and self.global_train_step == 1:
523
+ print(
524
+ "This was the second step, so likely the first step grad was broken."
525
+ " Try again with reset model...",
526
+ file=log.v1,
527
+ )
528
+ self._load_model()
529
+ debug_inf_nan(_debug_func, with_grad=True)
514
530
  raise Exception(f"Inf/nan score in step {step_idx}.")
515
531
 
516
532
  step_idx += 1
@@ -1253,6 +1269,8 @@ class Engine(EngineBase):
1253
1269
  new_dim.dyn_size_ext = _get_tensor_wo_batch_numpy(dim.dyn_size_ext)
1254
1270
  return new_dim
1255
1271
 
1272
+ num_seqs = None
1273
+ last_seq_idx = 0
1256
1274
  report_prefix = f"ep {self.epoch} {dataset.name} forward"
1257
1275
  with torch.no_grad():
1258
1276
  callback.init(model=self._orig_model)
@@ -1260,6 +1278,15 @@ class Engine(EngineBase):
1260
1278
  step_idx = 0
1261
1279
  for extern_data_raw in data_loader:
1262
1280
  step_begin_time = time.monotonic()
1281
+
1282
+ num_seqs, last_seq_idx = _get_num_seqs_last_seq_idx(
1283
+ report_prefix=report_prefix,
1284
+ extern_data_raw=extern_data_raw,
1285
+ step_idx=step_idx,
1286
+ prev_num_seqs=num_seqs,
1287
+ prev_last_seq_idx=last_seq_idx,
1288
+ )
1289
+
1263
1290
  if self._forward_step_expected_outputs:
1264
1291
  # Also resets any dyn dims, which might have been set in the prev step.
1265
1292
  self._forward_step_expected_outputs.reset_content()
@@ -1296,11 +1323,19 @@ class Engine(EngineBase):
1296
1323
  model_outputs_per_batch.data[k] = _get_tensor_wo_batch_numpy(v)
1297
1324
  callback.process_seq(seq_tag=seq_tag, outputs=model_outputs_per_batch)
1298
1325
 
1299
- elapsed_computation_time += time.monotonic() - step_begin_time
1326
+ step_end_time = time.monotonic()
1327
+ step_duration = step_end_time - step_begin_time
1328
+ elapsed_computation_time += step_duration
1329
+
1300
1330
  _print_process(
1301
1331
  report_prefix,
1302
1332
  step=step_idx,
1303
1333
  eval_info=None,
1334
+ step_duration=step_duration,
1335
+ start_elapsed=step_end_time - epoch_start_time,
1336
+ seq_idx=last_seq_idx,
1337
+ num_seqs=num_seqs,
1338
+ batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
1304
1339
  log_memory_usage_device=self._device if self._log_memory_usage else None,
1305
1340
  )
1306
1341
  step_idx += 1
@@ -1578,3 +1613,27 @@ def _get_total_grad_norm(model: torch.nn.Module, p: float) -> float:
1578
1613
  p=p,
1579
1614
  ).item()
1580
1615
  )
1616
+
1617
+
1618
+ def _get_num_seqs_last_seq_idx(
1619
+ *,
1620
+ report_prefix: str,
1621
+ extern_data_raw: Dict[str, Any],
1622
+ step_idx: int,
1623
+ prev_num_seqs: Optional[int],
1624
+ prev_last_seq_idx: int,
1625
+ ) -> Tuple[Optional[int], int]:
1626
+ num_seqs = prev_num_seqs
1627
+ num_seqs_ = int(extern_data_raw["num_seqs"]) if extern_data_raw.get("num_seqs", None) is not None else -1
1628
+ # Note: The batches might have been shuffled,
1629
+ # thus we cannot really assert that the seq_idx is always increasing.
1630
+ last_seq_idx = max(int(extern_data_raw["seq_idx"].max()), prev_last_seq_idx)
1631
+ if step_idx == 0:
1632
+ if num_seqs_ >= 0:
1633
+ print(f"{report_prefix} num_seqs: {num_seqs_}", file=log.v5)
1634
+ num_seqs = num_seqs_
1635
+ elif num_seqs_ >= 0:
1636
+ assert num_seqs_ == num_seqs
1637
+ if num_seqs is not None:
1638
+ assert last_seq_idx < num_seqs
1639
+ return num_seqs, last_seq_idx
@@ -52,6 +52,7 @@ def debug_inf_nan(
52
52
  *,
53
53
  with_grad: bool = False,
54
54
  report_every_op_call: bool = True,
55
+ stop_reporting_after_first_inf_nan: bool = True,
55
56
  file: Optional[Union[TextIO, TextIOBase]] = None,
56
57
  ):
57
58
  """
@@ -61,6 +62,7 @@ def debug_inf_nan(
61
62
  and we will call `loss = func(); loss.backward()`.
62
63
  :param with_grad: whether to compute and debug gradients for inf/nan.
63
64
  :param report_every_op_call: whether to report every op call.
65
+ :param stop_reporting_after_first_inf_nan: whether to stop reporting after the first inf/nan.
64
66
  :param file: where to write the output to. Default is stdout.
65
67
  """
66
68
 
@@ -69,13 +71,18 @@ def debug_inf_nan(
69
71
 
70
72
  # noinspection PyUnresolvedReferences,PyProtectedMember
71
73
  cur_frame: FrameType = sys._getframe()
72
- trace_ops = _TraceOps(root_frame=cur_frame, file=file, report_every_op_call=report_every_op_call)
74
+ trace_ops = _TraceOps(
75
+ root_frame=cur_frame,
76
+ file=file,
77
+ report_every_op_call=report_every_op_call,
78
+ stop_reporting_after_first_inf_nan=stop_reporting_after_first_inf_nan,
79
+ )
73
80
 
74
81
  if with_grad:
75
-
76
82
  with torch.autograd.detect_anomaly():
77
83
  with trace_ops: # currently only for forward (but we might want to trace the backward too)
78
84
  loss = func()
85
+ file.flush() # the backward detect_anomaly might screw up the output otherwise
79
86
  try:
80
87
  loss.backward()
81
88
  except RuntimeError as exc:
@@ -89,23 +96,46 @@ def debug_inf_nan(
89
96
 
90
97
  # For efficiency, and to be less spammy
91
98
  _TraceFuncNameBlacklist = {
92
- "aten::detach",
93
99
  "aten::zeros_like",
94
100
  "aten::ones_like",
101
+ "aten::full",
102
+ "aten::scalar_tensor", # when we deliberately create a scalar inf tensor
103
+ "aten::_local_scalar_dense",
104
+ "aten::where.self", # when we intentionally mask with inf
105
+ "aten::detach",
106
+ "aten::_to_copy",
107
+ "aten::clone",
108
+ "aten::stack",
109
+ "aten::view",
110
+ "aten::_unsafe_view",
111
+ "aten::permute",
112
+ "aten::t",
113
+ "aten::split_with_sizes",
114
+ "aten::slice.Tensor",
115
+ "aten::select.int",
95
116
  }
96
117
 
97
118
 
98
119
  class _TraceOps(TorchDispatchMode):
99
- def __init__(self, *, root_frame: FrameType, file: Union[TextIO, TextIOBase], report_every_op_call: bool = True):
120
+ def __init__(
121
+ self,
122
+ *,
123
+ root_frame: FrameType,
124
+ file: Union[TextIO, TextIOBase],
125
+ report_every_op_call: bool = True,
126
+ stop_reporting_after_first_inf_nan: bool = True,
127
+ ):
100
128
  super().__init__()
101
129
  self.root_frame = root_frame
102
130
  self.file = file
131
+ self.enabled = True
103
132
  self.report_every_op_call = report_every_op_call
133
+ self.stop_reporting_after_first_inf_nan = stop_reporting_after_first_inf_nan
104
134
 
105
135
  def __torch_dispatch__(self, func, types, args=(), kwargs=None):
106
136
  if kwargs is None:
107
137
  kwargs = {}
108
- if func.name() in _TraceFuncNameBlacklist:
138
+ if not self.enabled or func.name() in _TraceFuncNameBlacklist:
109
139
  return func(*args, **kwargs)
110
140
  if self.report_every_op_call:
111
141
  print(f"--- op {func.name()}", file=self.file)
@@ -121,6 +151,8 @@ class _TraceOps(TorchDispatchMode):
121
151
  traceback.print_list(
122
152
  _extract_stack_up_to(skip_top_num_frames=1, root_frame=self.root_frame), file=self.file
123
153
  )
154
+ if self.stop_reporting_after_first_inf_nan:
155
+ self.enabled = False
124
156
  return out
125
157
 
126
158
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20250204.160236
3
+ Version: 1.20250206.151011
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -55,6 +55,7 @@ def run_model(
55
55
  dyn_dim_max_sizes: Optional[Dict[Dim, int]] = None,
56
56
  dyn_dim_min_sizes: Optional[Dict[Dim, int]] = None,
57
57
  test_tensorflow: bool = True,
58
+ allow_inf_nan_in_output: bool = False,
58
59
  ) -> TensorDict:
59
60
  """run"""
60
61
  print(f"* run_model with dyn_dim_max_sizes={dyn_dim_max_sizes!r}")
@@ -70,6 +71,10 @@ def run_model(
70
71
  # get the values now because dims might get overwritten
71
72
  out_pt_raw = out_pt.as_raw_tensor_dict(include_const_sizes=True)
72
73
 
74
+ if not allow_inf_nan_in_output:
75
+ for k, v in out_pt.data.items():
76
+ assert numpy.isfinite(v.raw_tensor).all(), f"output {k!r} has non-finite values: {v.raw_tensor}"
77
+
73
78
  if not test_tensorflow:
74
79
  return out_pt
75
80
  if disable_tf:
@@ -86,8 +91,10 @@ def run_model(
86
91
  random_journal: RandomJournal
87
92
  assert random_journal.reached_end()
88
93
 
89
- print(out_pt, out_tf)
90
- assert set(out_pt.data.keys()) == set(out_tf.data.keys())
94
+ print("Output PT/TF:", out_pt, out_tf)
95
+ assert set(out_pt.data.keys()) == set(
96
+ out_tf.data.keys()
97
+ ), f"PT output {list(out_pt.data.keys())} vs TF output {list(out_tf.data.keys())}"
91
98
  for k, v_pt in out_pt.data.items():
92
99
  v_tf = out_tf[k]
93
100
  # We cannot really check the dims directly for equality,
@@ -123,11 +130,31 @@ def _run_model_torch(extern_data: TensorDict, get_model: rf.GetModelFunc, forwar
123
130
  # We recover extern_data in the end.
124
131
  tensor_dict_numpy_to_torch_(extern_data)
125
132
 
133
+ for k, v in extern_data.data.items():
134
+ if v.raw_tensor.dtype.is_floating_point:
135
+ v.raw_tensor.requires_grad = True
136
+
126
137
  model = get_model(epoch=1, step=0)
127
138
  rf.init_forward_step_run_ctx(epoch=1, step=0)
128
139
  forward_step(model=model, extern_data=extern_data)
129
140
  outputs = rf.get_run_ctx().outputs
130
141
  assert outputs.data
142
+
143
+ if "loss" in outputs.data:
144
+ loss = outputs.data["loss"]
145
+ assert isinstance(loss, Tensor)
146
+ assert loss.raw_tensor.dtype.is_floating_point
147
+ loss = rf.reduce_sum(loss, axis=loss.dims)
148
+ print("loss:", loss.raw_tensor.detach().numpy().item())
149
+ loss.raw_tensor.backward()
150
+ for k, v in list(extern_data.data.items()):
151
+ if v.raw_tensor.dtype.is_floating_point:
152
+ assert v.raw_tensor.grad is not None, f"no grad for {k}"
153
+ v_grad = v.copy_template()
154
+ v_grad.raw_tensor = v.raw_tensor.grad
155
+ assert f"{k}_grad" not in outputs.data
156
+ outputs.data[f"{k}_grad"] = v_grad
157
+
131
158
  tensor_dict_torch_to_numpy_(outputs)
132
159
 
133
160
  extern_data.assign_from_raw_tensor_dict_(extern_data_raw)
@@ -242,6 +269,20 @@ def _run_model_net_dict_tf(
242
269
  layer = net.get_layer(layer_name)
243
270
  outputs_tf.data[k] = layer.output.copy()
244
271
 
272
+ if "loss" in outputs_tf.data:
273
+ data_ = {name: data for name, data in net.extern_data.data.items() if data.dtype.startswith("float")}
274
+ loss = outputs_tf.data["loss"]
275
+ assert isinstance(loss, Tensor)
276
+ assert loss.dtype.startswith("float")
277
+ loss = rf.reduce_sum(loss, axis=loss.dims)
278
+ d_grads = tf.gradients(loss.raw_tensor, [d.raw_tensor for d in data_.values()])
279
+ for (name, data), d_grad_tf in zip(data_.items(), d_grads):
280
+ assert isinstance(data, Tensor)
281
+ assert isinstance(d_grad_tf, tf.Tensor)
282
+ d_grad = data.copy_template()
283
+ d_grad.raw_tensor = d_grad_tf
284
+ outputs_tf.data[f"{name}_grad"] = d_grad
285
+
245
286
  fetches = outputs_tf.as_raw_tensor_dict(expected_value_type=tf.Tensor)
246
287
  assert set(extern_data.data.keys()) == set(net.extern_data.data.keys())
247
288
  extern_data_tf_placeholders = net.extern_data.as_raw_tensor_dict(expected_value_type=tf.Tensor)
@@ -627,6 +627,27 @@ def test_gather_time_static_clip_to_valid():
627
627
  run_model(extern_data_template, lambda *, epoch, step: rf.Module(), _forward_step)
628
628
 
629
629
 
630
+ def test_gather_3d_embed():
631
+ time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
632
+ in_dim = Dim(7, name="in")
633
+ num_embeddings_dim = Dim(2, name="num_embeddings")
634
+ embed_dim = Dim(11, name="embed")
635
+ extern_data_template = TensorDict(
636
+ {
637
+ "data": Tensor("data", [batch_dim, time_dim], sparse_dim=in_dim, dtype="int32"),
638
+ "embed": Tensor("embed", [in_dim, num_embeddings_dim, embed_dim], dtype="float32"),
639
+ }
640
+ )
641
+
642
+ def _forward_step(*, extern_data: TensorDict, **_kwargs):
643
+ x, embed = extern_data["data"], extern_data["embed"]
644
+ out = rf.gather(embed, indices=x)
645
+ out.mark_as_default_output(shape=(batch_dim, time_dim, num_embeddings_dim, embed_dim))
646
+ rf.reduce_sum(out, axis=out.dims).mark_as_output("loss")
647
+
648
+ run_model(extern_data_template, lambda *, epoch, step: rf.Module(), _forward_step)
649
+
650
+
630
651
  def test_scatter_fill_inf():
631
652
  batch_dim_ = Dim(3, name="batch")
632
653
  states_dim = Dim(7, name="states")
@@ -645,7 +666,13 @@ def test_scatter_fill_inf():
645
666
  ) # [S], per state
646
667
  scores.mark_as_default_output(shape=[states_dim])
647
668
 
648
- res = run_model(TensorDict(), lambda *, epoch, step: rf.Module(), _forward_step, test_tensorflow=False)
669
+ res = run_model(
670
+ TensorDict(),
671
+ lambda *, epoch, step: rf.Module(),
672
+ _forward_step,
673
+ test_tensorflow=False,
674
+ allow_inf_nan_in_output=True,
675
+ )
649
676
  batch_size = res["batch_size"].raw_tensor.item()
650
677
  assert res["start_states"].raw_tensor.shape == (batch_size,)
651
678
  assert res["output"].raw_tensor.shape == (states_dim.dimension,)
@@ -286,12 +286,12 @@ def test_debug_inf_nan():
286
286
 
287
287
  # Run directly, to just test that it goes through without exception.
288
288
  # For some reason, the detect_anomaly does not print the forward op?
289
- debug_inf_nan(func, with_grad=True)
289
+ debug_inf_nan(func, with_grad=True, stop_reporting_after_first_inf_nan=False)
290
290
 
291
291
  from io import StringIO
292
292
 
293
293
  out = StringIO()
294
- debug_inf_nan(func, file=out)
294
+ debug_inf_nan(func, file=out, stop_reporting_after_first_inf_nan=False)
295
295
  assert "inf in aten.exp" in out.getvalue()
296
296
  assert "nan in aten.div" in out.getvalue()
297
297
  assert "mod5" in out.getvalue()
@@ -1,2 +0,0 @@
1
- version = '1.20250204.160236'
2
- long_version = '1.20250204.160236+git.e147886'