returnn 1.20241005.114831__tar.gz → 1.20241011.20141__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (465) hide show
  1. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/PKG-INFO +1 -1
  2. returnn-1.20241011.20141/_setup_info_generated.py +2 -0
  3. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/distrib_files.py +2 -0
  4. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/meta.py +16 -6
  5. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/multi_proc.py +2 -0
  6. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/tensor_dict.py +3 -0
  7. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/engine.py +95 -111
  8. returnn-1.20241011.20141/returnn/torch/util/exception_helper.py +111 -0
  9. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/basic.py +6 -1
  10. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/better_exchook.py +90 -12
  11. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn.egg-info/PKG-INFO +1 -1
  12. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn.egg-info/SOURCES.txt +1 -0
  13. returnn-1.20241005.114831/_setup_info_generated.py +0 -2
  14. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/.editorconfig +0 -0
  15. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/.gitignore +0 -0
  16. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/.gitmodules +0 -0
  17. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/.kateconfig +0 -0
  18. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/CHANGELOG.md +0 -0
  19. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/CODEOWNERS +0 -0
  20. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/CONTRIBUTING.md +0 -0
  21. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/LICENSE +0 -0
  22. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/MANIFEST.in +0 -0
  23. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/README.rst +0 -0
  24. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/__init__.py +0 -0
  25. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/12AX.cluster_map +0 -0
  26. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/_setup_returnn_env.py +0 -0
  27. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-fwd.config +0 -0
  28. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-horovod-mpi.py +0 -0
  29. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-horovod-mpi.py.sh +0 -0
  30. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-horovod-mpi.sh +0 -0
  31. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-hyper-param-tuning.config +0 -0
  32. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-iter-dataset.py +0 -0
  33. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-list-devices.py +0 -0
  34. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-lua-torch-layer.config +0 -0
  35. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-pretrain.config +0 -0
  36. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-record-and-push-to-webserver.py +0 -0
  37. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-returnn-as-framework.py +0 -0
  38. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-rf-pt-benchmark.py +0 -0
  39. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-rf.config +0 -0
  40. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-rhn-enwik8.config +0 -0
  41. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-sprint-interface.py +0 -0
  42. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-att-copy.config +0 -0
  43. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-attention.config +0 -0
  44. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  45. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  46. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-enc-dec.config +0 -0
  47. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-hard-att-copy.config +0 -0
  48. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-lstm-benchmark.py +0 -0
  49. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  50. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  51. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-native-lstm.12ax.config +0 -0
  52. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  53. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  54. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  55. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  56. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  57. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-rec-self-att.config +0 -0
  58. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-search-compiled-graph.py +0 -0
  59. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  60. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-timit-lstm-ctc.config +0 -0
  61. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-torch.config +0 -0
  62. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  63. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/demo.sh +0 -0
  64. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  65. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  66. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  67. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/README.txt +0 -0
  68. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/chars.txt +0 -0
  69. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/config_demo +0 -0
  70. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/config_fwd +0 -0
  71. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/config_real +0 -0
  72. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  73. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/decode.py +0 -0
  74. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  75. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/go.sh +0 -0
  76. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/lines.txt +0 -0
  77. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/split/eval.txt +0 -0
  78. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/split/train.txt +0 -0
  79. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/IAM/split/valid.txt +0 -0
  80. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/README.md +0 -0
  81. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  82. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/artificial/forwardconfig +0 -0
  83. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/artificial/go.sh +0 -0
  84. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/artificial/trainconfig +0 -0
  85. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  86. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  87. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  88. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  89. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/pyproject.toml +0 -0
  90. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/requirements.txt +0 -0
  91. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/__init__.py +0 -0
  92. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/__main__.py +0 -0
  93. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/__old_mod_loader__.py +0 -0
  94. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/__setup__.py +0 -0
  95. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/config.py +0 -0
  96. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/__init__.py +0 -0
  97. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/audio.py +0 -0
  98. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/basic.py +0 -0
  99. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/bundle_file.py +0 -0
  100. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/cached.py +0 -0
  101. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/cached2.py +0 -0
  102. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/generating.py +0 -0
  103. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/hdf.py +0 -0
  104. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/lm.py +0 -0
  105. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/map.py +0 -0
  106. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/normalization_data.py +0 -0
  107. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/numpy_dump.py +0 -0
  108. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/postprocessing.py +0 -0
  109. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/raw_wav.py +0 -0
  110. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/sprint.py +0 -0
  111. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/stereo.py +0 -0
  112. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/util/__init__.py +0 -0
  113. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/util/feature_extraction.py +0 -0
  114. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/util/strings.py +0 -0
  115. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/datasets/util/vocabulary.py +0 -0
  116. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/engine/__init__.py +0 -0
  117. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/engine/base.py +0 -0
  118. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/engine/batch.py +0 -0
  119. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/__init__.py +0 -0
  120. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/__main__.py +0 -0
  121. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  122. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  123. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  124. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  125. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  126. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  127. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  128. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  129. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  130. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  131. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  132. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  133. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  134. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  135. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  136. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  137. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  138. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  139. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  140. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  141. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  142. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  143. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  144. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  145. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  146. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/__init__.py +0 -0
  147. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/graph_editor/README.md +0 -0
  148. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/graph_editor/__init__.py +0 -0
  149. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/graph_editor/edit.py +0 -0
  150. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/graph_editor/reroute.py +0 -0
  151. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/graph_editor/select.py +0 -0
  152. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/graph_editor/subgraph.py +0 -0
  153. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/graph_editor/transform.py +0 -0
  154. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/extern/graph_editor/util.py +0 -0
  155. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/forward_iface.py +0 -0
  156. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/__init__.py +0 -0
  157. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_backend.py +0 -0
  158. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_native/__init__.py +0 -0
  159. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_native/backend.cpp +0 -0
  160. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_native/backend.hpp +0 -0
  161. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_native/module.cpp +0 -0
  162. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_native/module.hpp +0 -0
  163. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_native/py_utils.hpp +0 -0
  164. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  165. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  166. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_numpy_backend.py +0 -0
  167. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_random_journal.py +0 -0
  168. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/_utils.py +0 -0
  169. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/array_.py +0 -0
  170. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/attention.py +0 -0
  171. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/audio/__init__.py +0 -0
  172. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/audio/mel.py +0 -0
  173. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/audio/specaugment.py +0 -0
  174. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/backend.py +0 -0
  175. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/build_from_dict.py +0 -0
  176. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/cond.py +0 -0
  177. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/const.py +0 -0
  178. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/container.py +0 -0
  179. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/control_flow_ctx.py +0 -0
  180. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/conv.py +0 -0
  181. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/conversions/__init__.py +0 -0
  182. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  183. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/conversions/hf_llama.py +0 -0
  184. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/conversions/torch_nn.py +0 -0
  185. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/decoder/__init__.py +0 -0
  186. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/decoder/transformer.py +0 -0
  187. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/device.py +0 -0
  188. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/dims.py +0 -0
  189. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/dropout.py +0 -0
  190. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/dtype.py +0 -0
  191. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/encoder/__init__.py +0 -0
  192. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/encoder/base.py +0 -0
  193. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/encoder/conformer.py +0 -0
  194. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/encoder/e_branchformer.py +0 -0
  195. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/encoder/transformer.py +0 -0
  196. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/gradient.py +0 -0
  197. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/graph.py +0 -0
  198. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/hooks.py +0 -0
  199. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/init.py +0 -0
  200. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/label_smoothing.py +0 -0
  201. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/linear.py +0 -0
  202. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/loop.py +0 -0
  203. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/loss.py +0 -0
  204. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/math_.py +0 -0
  205. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/matmul.py +0 -0
  206. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/module.py +0 -0
  207. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/normalization.py +0 -0
  208. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/parameter.py +0 -0
  209. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/parametrizations.py +0 -0
  210. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/parametrize.py +0 -0
  211. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/piecewise_linear.py +0 -0
  212. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/rand.py +0 -0
  213. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/rec.py +0 -0
  214. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/reduce.py +0 -0
  215. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/run_ctx.py +0 -0
  216. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/signal.py +0 -0
  217. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/state.py +0 -0
  218. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/stepwise_scheduler.py +0 -0
  219. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/tensor_array.py +0 -0
  220. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/frontend/types.py +0 -0
  221. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/import_/__init__.py +0 -0
  222. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/import_/common.py +0 -0
  223. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/import_/git.py +0 -0
  224. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/import_/import_.py +0 -0
  225. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/learning_rate_control.py +0 -0
  226. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/log.py +0 -0
  227. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/native_op.cpp +0 -0
  228. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/native_op.py +0 -0
  229. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/pretrain.py +0 -0
  230. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/sprint/__init__.py +0 -0
  231. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/sprint/cache.py +0 -0
  232. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/sprint/control.py +0 -0
  233. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/sprint/error_signals.py +0 -0
  234. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/sprint/extern_interface.py +0 -0
  235. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/sprint/interface.py +0 -0
  236. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/README.md +0 -0
  237. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/__init__.py +0 -0
  238. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/_dim_extra.py +0 -0
  239. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/_tensor_extra.py +0 -0
  240. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/_tensor_mixin_base.py +0 -0
  241. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/_tensor_op_overloads.py +0 -0
  242. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/control_flow_ctx.py +0 -0
  243. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/dim.py +0 -0
  244. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/marked_dim.py +0 -0
  245. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/tensor.py +0 -0
  246. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tensor/utils.py +0 -0
  247. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/__init__.py +0 -0
  248. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/compat.py +0 -0
  249. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/data_pipeline.py +0 -0
  250. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/distributed.py +0 -0
  251. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/engine.py +0 -0
  252. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/README.md +0 -0
  253. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/__init__.py +0 -0
  254. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/_backend.py +0 -0
  255. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/_utils.py +0 -0
  256. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/cond.py +0 -0
  257. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  258. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  259. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/dims.py +0 -0
  260. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/layer.py +0 -0
  261. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/loop.py +0 -0
  262. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/make_layer.py +0 -0
  263. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  264. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  265. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  266. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_low_level/__init__.py +0 -0
  267. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/frontend_low_level/_backend.py +0 -0
  268. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/horovod.py +0 -0
  269. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/hyper_param_tuning.py +0 -0
  270. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/layers/__init__.py +0 -0
  271. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/layers/base.py +0 -0
  272. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/layers/basic.py +0 -0
  273. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/layers/rec.py +0 -0
  274. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/layers/segmental_model.py +0 -0
  275. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/layers/signal_processing.py +0 -0
  276. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/layers/variable.py +0 -0
  277. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/native_op.py +0 -0
  278. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/network.py +0 -0
  279. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/sprint.py +0 -0
  280. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/updater.py +0 -0
  281. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/util/__init__.py +0 -0
  282. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/util/basic.py +0 -0
  283. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/util/data.py +0 -0
  284. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/util/gradient_checkpoint.py +0 -0
  285. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/util/ken_lm.py +0 -0
  286. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/tf/util/open_fst.py +0 -0
  287. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/README.md +0 -0
  288. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/__init__.py +0 -0
  289. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/data/__init__.py +0 -0
  290. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/data/extern_data.py +0 -0
  291. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/data/pipeline.py +0 -0
  292. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/data/queued_data_iter.py +0 -0
  293. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  294. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/data/tensor_utils.py +0 -0
  295. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/distributed.py +0 -0
  296. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/frontend/__init__.py +0 -0
  297. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/frontend/_backend.py +0 -0
  298. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/frontend/_rand.py +0 -0
  299. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/frontend/bridge.py +0 -0
  300. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/frontend/raw_ops.py +0 -0
  301. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/updater.py +0 -0
  302. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/util/README.md +0 -0
  303. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/util/__init__.py +0 -0
  304. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/util/array_.py +0 -0
  305. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/util/diagnose_gpu.py +0 -0
  306. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/util/gradient_checkpoint.py +0 -0
  307. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/util/module.py +0 -0
  308. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/torch/util/scaled_gradient.py +0 -0
  309. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/__init__.py +0 -0
  310. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/bpe.py +0 -0
  311. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/debug.py +0 -0
  312. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/debug_helpers.py +0 -0
  313. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/file_cache.py +0 -0
  314. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/fsa.py +0 -0
  315. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/literal_py_to_pickle.py +0 -0
  316. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/math.py +0 -0
  317. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  318. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/native_code_compiler.py +0 -0
  319. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/pprint.py +0 -0
  320. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/py-to-pickle.cpp +0 -0
  321. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/py_compat.py +0 -0
  322. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/py_ext_mod_compiler.py +0 -0
  323. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/result_with_reason.py +0 -0
  324. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/sig_proc.py +0 -0
  325. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/task_system.py +0 -0
  326. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/train_proc_manager.py +0 -0
  327. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn/util/watch_memory.py +0 -0
  328. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn.egg-info/dependency_links.txt +0 -0
  329. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/returnn.egg-info/top_level.txt +0 -0
  330. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/rnn.py +0 -0
  331. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/setup.cfg +0 -0
  332. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/setup.py +0 -0
  333. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/DummySprintExec.py +0 -0
  334. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm-inspection-profile.xml +0 -0
  335. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/.gitignore +0 -0
  336. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/.name +0 -0
  337. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  338. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  339. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  340. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  341. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  342. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/misc.xml +0 -0
  343. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/modules.xml +0 -0
  344. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/returnn.iml +0 -0
  345. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  346. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/_set_num_threads1.py +0 -0
  347. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/_setup_returnn_env.py +0 -0
  348. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/_setup_test_env.py +0 -0
  349. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/bpe-unicode-demo.codes +0 -0
  350. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/bpe-unicode-demo.vocab +0 -0
  351. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/lexicon_opt.fst +0 -0
  352. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/lexicon_opt.isyms +0 -0
  353. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/lexicon_opt.jpg +0 -0
  354. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/lexicon_opt.osyms +0 -0
  355. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/lint_common.py +0 -0
  356. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/pycharm-inspect.py +0 -0
  357. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/pylint.py +0 -0
  358. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/returnn-as-framework.py +0 -0
  359. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/rf_utils.py +0 -0
  360. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/spelling.dic +0 -0
  361. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_Config.py +0 -0
  362. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_Dataset.py +0 -0
  363. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_Fsa.py +0 -0
  364. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_GeneratingDataset.py +0 -0
  365. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_HDFDataset.py +0 -0
  366. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_LearningRateControl.py +0 -0
  367. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_Log.py +0 -0
  368. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_MultiProcDataset.py +0 -0
  369. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_Pretrain.py +0 -0
  370. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_ResNet.py +0 -0
  371. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_SprintDataset.py +0 -0
  372. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_SprintInterface.py +0 -0
  373. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TFEngine.py +0 -0
  374. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TFNativeOp.py +0 -0
  375. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TFNetworkLayer.py +0 -0
  376. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TFNetworkRecLayer.py +0 -0
  377. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TFNetworkSigProcLayer.py +0 -0
  378. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TFUpdater.py +0 -0
  379. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TFUtil.py +0 -0
  380. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TF_determinism.py +0 -0
  381. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TaskSystem.py +0 -0
  382. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TaskSystem_SharedMem.py +0 -0
  383. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_TranslationDataset.py +0 -0
  384. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_Util.py +0 -0
  385. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_demos.py +0 -0
  386. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_fork_exec.py +0 -0
  387. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_hdf_dump.py +0 -0
  388. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_array.py +0 -0
  389. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_attention.py +0 -0
  390. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_base.py +0 -0
  391. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_cond.py +0 -0
  392. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_const.py +0 -0
  393. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_container.py +0 -0
  394. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_conv.py +0 -0
  395. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_decoder_transformer.py +0 -0
  396. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_encoder_conformer.py +0 -0
  397. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_gradient.py +0 -0
  398. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_label_smoothing.py +0 -0
  399. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_loop.py +0 -0
  400. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_math.py +0 -0
  401. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_normalization.py +0 -0
  402. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_piecewise_linear.py +0 -0
  403. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_rec.py +0 -0
  404. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_reduce.py +0 -0
  405. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_rf_signal.py +0 -0
  406. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_tensor.py +0 -0
  407. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_tools.py +0 -0
  408. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_torch_dataset.py +0 -0
  409. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_torch_engine.py +0 -0
  410. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_torch_frontend.py +0 -0
  411. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_torch_internal_frontend.py +0 -0
  412. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/test_torch_util.py +0 -0
  413. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tests/torch_utils.py +0 -0
  414. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/_setup_returnn_env.py +0 -0
  415. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/analyze-dataset-batches.py +0 -0
  416. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/bliss-collect-seq-lens.py +0 -0
  417. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/bliss-dump-text.py +0 -0
  418. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/bliss-get-segment-names.py +0 -0
  419. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/bliss-to-ogg-zip.py +0 -0
  420. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/bpe-create-lexicon.py +0 -0
  421. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/calculate-word-error-rate.py +0 -0
  422. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/cleanup-old-models.py +0 -0
  423. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/collect-orth-symbols.py +0 -0
  424. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/collect-words.py +0 -0
  425. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/compile_native_op.py +0 -0
  426. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/compile_tf_graph.py +0 -0
  427. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/debug-dump-search-scores.py +0 -0
  428. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/debug-plot-search-scores.py +0 -0
  429. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/dump-dataset-raw-strings.py +0 -0
  430. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/dump-dataset.py +0 -0
  431. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/dump-forward-stats.py +0 -0
  432. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/dump-forward.py +0 -0
  433. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/dump-network-json.py +0 -0
  434. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/dump-pickle.py +0 -0
  435. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/extract_state_tying_from_dataset.py +0 -0
  436. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/get-attention-weights.py +0 -0
  437. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/get-best-model-epoch.py +0 -0
  438. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/hdf_dump.py +0 -0
  439. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/hdf_dump_translation_dataset.py +0 -0
  440. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/import-blocks-mt-model.py +0 -0
  441. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/import-t2t-mt-model.py +0 -0
  442. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/.gitignore +0 -0
  443. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/Makefile +0 -0
  444. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/README.md +0 -0
  445. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/example/README.md +0 -0
  446. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/example/libs_list +0 -0
  447. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  448. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  449. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  450. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/example/state_vars_list +0 -0
  451. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  452. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/file.h +0 -0
  453. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  454. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  455. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/main.cc +0 -0
  456. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/rescorer.h +0 -0
  457. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/vocabulary.cc +0 -0
  458. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/lattice_rescorer/vocabulary.h +0 -0
  459. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/tf_avg_checkpoints.py +0 -0
  460. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/tf_inspect_checkpoint.py +0 -0
  461. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/tf_inspect_summary_log.py +0 -0
  462. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/torch_avg_checkpoints.py +0 -0
  463. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/torch_export_to_onnx.py +0 -0
  464. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/torch_inspect_checkpoint.py +0 -0
  465. {returnn-1.20241005.114831 → returnn-1.20241011.20141}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241005.114831
3
+ Version: 1.20241011.20141
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20241011.020141'
2
+ long_version = '1.20241011.020141+git.0032b46'
@@ -11,6 +11,7 @@ import os
11
11
  import sys
12
12
  import numpy
13
13
  from returnn.log import log
14
+ from returnn.util import better_exchook
14
15
  from returnn.util.basic import override_env_var, try_run
15
16
  from returnn.util.multi_proc_non_daemonic_spawn import NonDaemonicSpawnContext
16
17
  from returnn.config import SubProcCopyGlobalConfigPreInitFunc
@@ -573,6 +574,7 @@ def _worker_proc_loop(
573
574
  if sys.platform == "linux":
574
575
  with open("/proc/self/comm", "w") as f:
575
576
  f.write(f"CFD worker {epoch}")
577
+ better_exchook.setup_all()
576
578
 
577
579
  assert isinstance(epoch, int) and isinstance(buffer_size, int)
578
580
  assert isinstance(dataset_dict, dict)
@@ -1832,25 +1832,35 @@ class VariableDataset(Dataset):
1832
1832
  based on a user-provided function.
1833
1833
  """
1834
1834
 
1835
- def __init__(self, *, get_dataset, **kwargs):
1835
+ def __init__(self, *, get_dataset, dataset_lru_cache_size: int = 1, **kwargs):
1836
1836
  """
1837
1837
  :param get_dataset: function (*, epoch: int, **_) -> Dict[str,Any], will be called for every sub-epoch.
1838
- It will cache the dict from the prev call, and if the dict is the same, it will not recreate the dataset.
1838
+ It will cache the dataset(s) from the prev call (dataset_lru_cache_size),
1839
+ and if the dict is the same of those, it will not recreate the dataset.
1840
+ :param dataset_lru_cache_size
1839
1841
  """
1842
+ from functools import lru_cache
1843
+
1840
1844
  super().__init__(**kwargs)
1841
1845
  self._get_dataset = get_dataset
1842
1846
  self._dataset_dict: Optional[Dict[str, Any]] = None
1843
1847
  self._dataset: Optional[Dataset] = None
1848
+ self._dataset_lru_cache_size = dataset_lru_cache_size
1849
+ self._make_dataset = lru_cache(maxsize=self._dataset_lru_cache_size)(
1850
+ lambda dataset_dict: init_dataset(dataset_dict, parent_dataset=self)
1851
+ )
1844
1852
  self._load_dataset(epoch=1)
1845
1853
  self.num_inputs = self._dataset.num_inputs
1846
1854
  self.num_outputs = self._dataset.num_outputs
1847
1855
  self.labels = self._dataset.labels
1848
1856
 
1849
1857
  def _load_dataset(self, epoch: int):
1850
- dataset_dict = self._get_dataset(epoch=epoch)
1851
- if dataset_dict != self._dataset_dict:
1852
- self._dataset_dict = dataset_dict
1853
- self._dataset = init_dataset(dataset_dict, parent_dataset=self)
1858
+ from returnn.util.basic import get_fwd_compat_kwargs, make_hashable
1859
+
1860
+ dataset_dict = self._get_dataset(self=self, epoch=epoch, **get_fwd_compat_kwargs())
1861
+ assert isinstance(dataset_dict, dict)
1862
+ dataset_dict = make_hashable(dataset_dict)
1863
+ self._dataset = self._make_dataset(dataset_dict)
1854
1864
 
1855
1865
  def init_seq_order(self, epoch=None, seq_list=None, seq_order=None):
1856
1866
  """init seq order"""
@@ -7,6 +7,7 @@ from typing import Optional, Any, Dict, List
7
7
  import sys
8
8
  import gc
9
9
  import multiprocessing as mp
10
+ from returnn.util import better_exchook
10
11
  from returnn.util.basic import try_run
11
12
  from returnn.config import SubProcCopyGlobalConfigPreInitFunc
12
13
  from returnn.util.multi_proc_non_daemonic_spawn import NonDaemonicSpawnContext
@@ -168,6 +169,7 @@ class MultiProcDataset(CachedDataset2):
168
169
  if sys.platform == "linux":
169
170
  with open("/proc/self/comm", "w") as f:
170
171
  f.write(f"MPD worker {worker_index}")
172
+ better_exchook.setup_all()
171
173
 
172
174
  dataset: Optional[Dataset] = None
173
175
 
@@ -52,6 +52,9 @@ class TensorDict:
52
52
  else:
53
53
  raise TypeError(f"invalid `data` type: {type(data)}")
54
54
 
55
+ def __contains__(self, item: str) -> bool:
56
+ return item in self.data
57
+
55
58
  def __getitem__(self, item: str) -> Tensor:
56
59
  return self.data[item]
57
60
 
@@ -12,6 +12,7 @@ import time
12
12
  import socket
13
13
  import fnmatch
14
14
  import re
15
+ import math
15
16
 
16
17
  import torch
17
18
  import torch.distributed
@@ -19,7 +20,7 @@ from torch.nn.parallel import DistributedDataParallel
19
20
  from torch.utils.data import DataLoader
20
21
  from torch import autocast
21
22
  from torch.cuda import amp
22
- import math
23
+ import numpy as np
23
24
 
24
25
  import returnn
25
26
  from returnn.config import Config
@@ -43,6 +44,7 @@ from .data.queued_data_iter import QueuedDataIter
43
44
  from .frontend.bridge import rf_module_to_pt_module
44
45
  from .util import diagnose_gpu
45
46
  from .util import module as util_module
47
+ from .util.exception_helper import help_on_torch_exception
46
48
  from .distributed import DistributedContext, get_ctx as dist_get_ctx
47
49
 
48
50
 
@@ -125,6 +127,7 @@ class Engine(EngineBase):
125
127
  self._calculate_exp_loss = config.bool("calculate_exp_loss", False)
126
128
  self._reset_dev_memory_caches = config.bool("reset_dev_memory_caches", False)
127
129
  self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
130
+ self._stop_on_nonfinite_train_score = config.bool("stop_on_nonfinite_train_score", True)
128
131
 
129
132
  amp_options = self.config.opt_typed_value("torch_amp")
130
133
  grad_scaler_opts = self.config.typed_value("grad_scaler", NotSpecified)
@@ -237,44 +240,10 @@ class Engine(EngineBase):
237
240
  self._epoch_mp_shared.value = self.epoch
238
241
 
239
242
  self.init_train_epoch()
240
- try:
241
- self.train_epoch()
242
- except Exception as exc:
243
- self._handle_run_exception(exc)
244
- raise
243
+ self.train_epoch()
245
244
 
246
245
  print(f"Finished training at epoch {self.epoch}, global train step {self.global_train_step}", file=log.v3)
247
246
 
248
- def _handle_run_exception(self, exc: Exception, *, always_direct_print: bool = False):
249
- from returnn.util.better_exchook import get_func_from_code_object, iter_traceback
250
-
251
- print(f"{type(exc).__name__}: {exc}", file=log.v1)
252
-
253
- # Extend exception message by module call stack.
254
- module_names_by_id = {} # id -> name
255
- for name, mod in self._orig_model.named_modules():
256
- if id(mod) not in module_names_by_id:
257
- module_names_by_id[id(mod)] = name or "(root)"
258
- exc_ext = []
259
- for frame in iter_traceback(exc.__traceback__):
260
- if frame.f_code.co_nlocals == 0:
261
- continue
262
- frame_self = frame.f_locals.get("self")
263
- if isinstance(frame_self, (torch.nn.Module, rf.Module)):
264
- func = get_func_from_code_object(frame.f_code, frame=frame)
265
- if func and func.__name__ and func.__name__.startswith("_") and not func.__name__.startswith("__"):
266
- continue
267
- func_name = (func and func.__qualname__) or type(frame_self).__name__
268
- exc_ext.append(f"({func_name}) {module_names_by_id.get(id(frame_self), '(unknown)')}")
269
- if not exc_ext:
270
- exc_ext.append("(No module call frames.)")
271
- if len(exc.args) == 1 and isinstance(exc.args[0], str) and not always_direct_print:
272
- exc.args = ("\n".join([exc.args[0], "", "Module call stack:"] + exc_ext),)
273
- else:
274
- print("Module call stack:", file=log.v3)
275
- for msg in exc_ext:
276
- print(msg, file=log.v3)
277
-
278
247
  def init_train_epoch(self):
279
248
  """
280
249
  init train (sub)epoch. LR etc
@@ -369,88 +338,103 @@ class Engine(EngineBase):
369
338
 
370
339
  zero_grad_next_step = True
371
340
  cur_count_grad_accum = 0
372
- while True:
373
- with torch.no_grad():
374
- extern_data_raw = next(data_iter, None)
341
+ extern_data = None
342
+ try:
343
+ while True:
344
+ with torch.no_grad():
345
+ extern_data_raw = next(data_iter, None)
375
346
 
376
- step_begin_time = time.time()
347
+ step_begin_time = time.time()
377
348
 
378
- _has_data = torch.tensor([extern_data_raw is not None], dtype=torch.int8)
379
- if self._torch_distributed_ctx:
380
- # use all reduce to check if all workers have data, if at least one worker does not have data,
381
- # all workers finish this epoch
382
- torch.distributed.all_reduce(_has_data, op=torch.distributed.ReduceOp.MIN)
383
- if not _has_data[0]:
384
- break
349
+ _has_data = torch.tensor([extern_data_raw is not None], dtype=torch.int8)
350
+ if self._torch_distributed_ctx:
351
+ # use all reduce to check if all workers have data, if at least one worker does not have data,
352
+ # all workers finish this epoch
353
+ torch.distributed.all_reduce(_has_data, op=torch.distributed.ReduceOp.MIN)
354
+ if not _has_data[0]:
355
+ break
385
356
 
386
- # clear the gradients when every gradient accumulation loop starts
387
- if zero_grad_next_step:
388
- self._updater.get_optimizer().zero_grad()
389
- cur_count_grad_accum = 0
357
+ # clear the gradients when every gradient accumulation loop starts
358
+ if zero_grad_next_step:
359
+ self._updater.get_optimizer().zero_grad()
360
+ cur_count_grad_accum = 0
390
361
 
391
- extern_data = extern_data_util.raw_dict_to_extern_data(
392
- extern_data_raw, extern_data_template=self.extern_data, device=self._device
393
- )
394
- self._run_step(extern_data, train_flag=True, train_func=True)
395
-
396
- train_ctx = rf.get_run_ctx()
397
- total_loss = train_ctx.total_loss()
398
- losses_dict = NumbersDict(
399
- {
400
- name: (
401
- float(loss.get_summed_loss().raw_tensor.detach().cpu().numpy())
402
- if self._device != "meta"
403
- else float("nan")
362
+ extern_data = extern_data_util.raw_dict_to_extern_data(
363
+ extern_data_raw, extern_data_template=self.extern_data, device=self._device
364
+ )
365
+ self._run_step(extern_data, train_flag=True, train_func=True)
366
+
367
+ train_ctx = rf.get_run_ctx()
368
+ total_loss = train_ctx.total_loss()
369
+ losses_dict = NumbersDict(
370
+ {
371
+ name: (
372
+ float(loss.get_summed_loss().raw_tensor.detach().cpu().numpy())
373
+ if self._device != "meta"
374
+ else float("nan")
375
+ )
376
+ for name, loss in train_ctx.losses.items()
377
+ }
378
+ )
379
+ inv_norm_factors_dict = NumbersDict(
380
+ {name: float(_to_raw(loss.get_inv_norm_factor())) for name, loss in train_ctx.losses.items()}
381
+ )
382
+
383
+ if accum_grad_multiple_step_dyn:
384
+ accum_grad_multiple_step = accum_grad_multiple_step_dyn(
385
+ epoch=self.epoch, global_train_step=self.global_train_step
404
386
  )
405
- for name, loss in train_ctx.losses.items()
406
- }
407
- )
408
- inv_norm_factors_dict = NumbersDict(
409
- {name: float(_to_raw(loss.get_inv_norm_factor())) for name, loss in train_ctx.losses.items()}
410
- )
387
+ cur_count_grad_accum += 1
388
+ perform_update_step = cur_count_grad_accum >= accum_grad_multiple_step
389
+ with (
390
+ self._ddp_pt_model.no_sync()
391
+ if (self._ddp_pt_model is not None and not perform_update_step)
392
+ else nullcontext()
393
+ ):
394
+ if self._grad_scaler is not None:
395
+ self._grad_scaler.scale(total_loss.raw_tensor).backward()
396
+ else:
397
+ total_loss.raw_tensor.backward()
411
398
 
412
- if accum_grad_multiple_step_dyn:
413
- accum_grad_multiple_step = accum_grad_multiple_step_dyn(
414
- epoch=self.epoch, global_train_step=self.global_train_step
415
- )
416
- cur_count_grad_accum += 1
417
- perform_update_step = cur_count_grad_accum >= accum_grad_multiple_step
418
- with (
419
- self._ddp_pt_model.no_sync()
420
- if (self._ddp_pt_model is not None and not perform_update_step)
421
- else nullcontext()
422
- ):
423
- if self._grad_scaler is not None:
424
- self._grad_scaler.scale(total_loss.raw_tensor).backward()
425
- else:
426
- total_loss.raw_tensor.backward()
399
+ # only update the weights when every gradient accumulation loop ends
400
+ if perform_update_step:
401
+ self._updater.step(grad_scaler=self._grad_scaler)
402
+ zero_grad_next_step = perform_update_step
427
403
 
428
- # only update the weights when every gradient accumulation loop ends
429
- if perform_update_step:
430
- self._updater.step(grad_scaler=self._grad_scaler)
431
- zero_grad_next_step = perform_update_step
404
+ if self._torch_distributed_ctx:
405
+ self._torch_distributed_ctx.step_after_param_update(module=self._pt_model, epoch_step_idx=step_idx)
432
406
 
433
- if self._torch_distributed_ctx:
434
- self._torch_distributed_ctx.step_after_param_update(module=self._pt_model, epoch_step_idx=step_idx)
435
-
436
- step_duration = time.time() - step_begin_time
437
- elapsed_computation_time += step_duration
438
-
439
- accumulated_losses_dict += losses_dict
440
- accumulated_inv_norm_factors_dict += inv_norm_factors_dict
441
- eval_info = self._maybe_extend_losses_info(losses_dict / inv_norm_factors_dict)
442
- _print_process(
443
- f"ep {self.epoch} train",
444
- step=step_idx,
445
- eval_info=dict(eval_info),
446
- step_duration=step_duration,
447
- batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
448
- log_memory_usage_device=self._device if self._log_memory_usage else None,
449
- )
407
+ step_duration = time.time() - step_begin_time
408
+ elapsed_computation_time += step_duration
450
409
 
451
- step_idx += 1
452
- self.global_train_step += 1
453
- self._updater.set_current_train_step(global_train_step=self.global_train_step, epoch=self.epoch)
410
+ accumulated_losses_dict += losses_dict
411
+ accumulated_inv_norm_factors_dict += inv_norm_factors_dict
412
+ eval_info = self._maybe_extend_losses_info(losses_dict / inv_norm_factors_dict)
413
+ _print_process(
414
+ f"ep {self.epoch} train",
415
+ step=step_idx,
416
+ eval_info=dict(eval_info),
417
+ step_duration=step_duration,
418
+ batch_size_info=_get_batch_size_info(extern_data) if self._log_batch_size else None,
419
+ log_memory_usage_device=self._device if self._log_memory_usage else None,
420
+ )
421
+
422
+ if self._stop_on_nonfinite_train_score:
423
+ if any(np.isinf(v) or np.isnan(v) for v in accumulated_losses_dict.values()):
424
+ print("Model seems broken, got inf or nan score.", file=log.v1)
425
+ print(
426
+ "Accumulated scores:",
427
+ accumulated_losses_dict / accumulated_inv_norm_factors_dict,
428
+ file=log.v1,
429
+ )
430
+ raise Exception(f"Inf/nan score in step {step_idx}.")
431
+
432
+ step_idx += 1
433
+ self.global_train_step += 1
434
+ self._updater.set_current_train_step(global_train_step=self.global_train_step, epoch=self.epoch)
435
+ except Exception as exc:
436
+ help_on_torch_exception(exc, step_idx=step_idx, model=self._orig_model, extern_data=extern_data)
437
+ raise
454
438
 
455
439
  elapsed = time.time() - epoch_start_time
456
440
  elapsed_computation_percentage = elapsed_computation_time / elapsed
@@ -1118,13 +1102,13 @@ class Engine(EngineBase):
1118
1102
  and self._forward_auto_split_batch_on_oom
1119
1103
  and extern_data_util.raw_dict_can_split_batch(extern_data_raw)
1120
1104
  ):
1121
- self._handle_run_exception(exc, always_direct_print=True)
1105
+ help_on_torch_exception(exc, model=self._orig_model, always_direct_print=True)
1122
1106
  util.traceback_clear_frames(exc.__traceback__)
1123
1107
  diagnose_gpu.garbage_collect()
1124
1108
  print(f"{report_prefix}, split step {step_idx} batch and try again...", file=log.v3)
1125
1109
  data_loader.extend(extern_data_util.raw_dict_split_batch(extern_data_raw, splits=2))
1126
1110
  continue
1127
- self._handle_run_exception(exc)
1111
+ help_on_torch_exception(exc, model=self._orig_model)
1128
1112
  raise
1129
1113
  ctx = rf.get_run_ctx()
1130
1114
  ctx.check_outputs_complete()
@@ -0,0 +1,111 @@
1
+ """
2
+ Helper for any type of PyTorch exceptions
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Optional, Union, Tuple
8
+
9
+ import torch
10
+ import numpy as np
11
+
12
+ from returnn.log import log
13
+ import returnn.frontend as rf
14
+ from returnn.tensor import TensorDict
15
+
16
+
17
+ def help_on_torch_exception(
18
+ exc: Exception,
19
+ *,
20
+ step_idx: Optional[int] = None,
21
+ extern_data: Optional[TensorDict] = None,
22
+ model: Union[rf.Module, torch.nn.Module],
23
+ always_direct_print: bool = False,
24
+ ):
25
+ """
26
+ Gather some information which might be helpful for debugging a PyTorch exception.
27
+ """
28
+ from returnn.util.better_exchook import get_func_from_code_object, iter_traceback
29
+
30
+ print(f"{type(exc).__name__}: {exc}", file=log.v1)
31
+
32
+ exc_ext = [f"Step idx: {step_idx}"]
33
+ if extern_data:
34
+ exc_ext.append("Extern data:")
35
+ if "seq_tag" in extern_data:
36
+ exc_ext.append(f" Seq tags: {extern_data['seq_tag'].raw_tensor}")
37
+ covered_dim_tags = set()
38
+ for data_key, data in extern_data.data.items():
39
+ info, v_minmax = _help_data_or_array(data.raw_tensor)
40
+ exc_ext.append(f" {data_key}: {info}, {data}")
41
+ if data.sparse:
42
+ if v_minmax[0] < 0 or v_minmax[1] >= data.dim:
43
+ exc_ext.append(f" WARNING, invalid label for data sparse dim {data.sparse_dim}")
44
+ for dim in data.dims:
45
+ if dim in covered_dim_tags:
46
+ continue
47
+ covered_dim_tags.add(dim)
48
+ if not dim.dyn_size_ext:
49
+ continue
50
+ info, _ = _help_data_or_array(dim.dyn_size_ext.raw_tensor)
51
+ exc_ext.append(f" dim {dim.short_repr()} size: {info}")
52
+
53
+ # Extend exception message by module call stack.
54
+ exc_ext.append("Module call stack:")
55
+ module_names_by_id = {} # id -> name
56
+ count_frames = 0
57
+ for name, mod in model.named_modules():
58
+ if id(mod) not in module_names_by_id:
59
+ module_names_by_id[id(mod)] = name or "(root)"
60
+ for frame in iter_traceback(exc.__traceback__):
61
+ if frame.f_code.co_nlocals == 0:
62
+ continue
63
+ frame_self = frame.f_locals.get("self")
64
+ if isinstance(frame_self, (torch.nn.Module, rf.Module)):
65
+ func = get_func_from_code_object(frame.f_code, frame=frame)
66
+ if func and func.__name__ and func.__name__.startswith("_") and not func.__name__.startswith("__"):
67
+ continue
68
+ func_name = (func and func.__qualname__) or type(frame_self).__name__
69
+ exc_ext.append(f"({func_name}) {module_names_by_id.get(id(frame_self), '(unknown)')}")
70
+ count_frames += 1
71
+ if not count_frames:
72
+ exc_ext.append("(No module call frames.)")
73
+
74
+ if len(exc.args) == 1 and isinstance(exc.args[0], str) and not always_direct_print:
75
+ exc.args = ("\n".join([exc.args[0], ""] + exc_ext),)
76
+ else:
77
+ for msg in exc_ext:
78
+ print(msg, file=log.v3)
79
+
80
+
81
+ def _help_data_or_array(
82
+ value: Union[torch.Tensor, np.ndarray, bool, object]
83
+ ) -> Tuple[str, Tuple[Union[int, float], Union[int, float]]]:
84
+ """
85
+ :param value:
86
+ :return: (info,(min,max))
87
+ """
88
+ if isinstance(value, torch.Tensor):
89
+ value = value.detach().cpu().numpy()
90
+ v_minmax = -1, -1
91
+ if isinstance(value, np.ndarray):
92
+ info = "shape %s, dtype %s" % (value.shape, value.dtype)
93
+ if value.dtype.kind in "biuf":
94
+ if value.size > 1:
95
+ v_minmax = np.min(value), np.max(value)
96
+ info += ", min/max %s/%s" % v_minmax
97
+ if value.dtype.kind == "f":
98
+ info += ", mean/stddev %s/%s" % (np.mean(value), np.std(value))
99
+ if value.ndim <= 1:
100
+ info += " (%s)" % np.array2string(value)
101
+ elif value.size == 1:
102
+ info += " (%s)" % np.array2string(value)
103
+ else:
104
+ info += ", EMPTY"
105
+ elif isinstance(value, (np.floating, np.integer, np.bool_, float, int, bool, str, bytes)):
106
+ info = "%s(%s)" % (type(value).__name__, value)
107
+ elif value is None:
108
+ info = "None"
109
+ else:
110
+ info = "type %r" % type(value)
111
+ return info, v_minmax
@@ -2469,7 +2469,12 @@ def make_hashable(obj):
2469
2469
 
2470
2470
  if isinstance(obj, tf.Tensor):
2471
2471
  return RefIdEq(obj)
2472
- assert False, "don't know how to make hashable: %r (%r)" % (obj, type(obj))
2472
+ # Try if this is already hashable.
2473
+ try:
2474
+ hash(obj)
2475
+ except Exception:
2476
+ raise TypeError("don't know how to make hashable: %r (%r)" % (obj, type(obj)))
2477
+ return obj
2473
2478
 
2474
2479
 
2475
2480
  class RefIdEq(Generic[T]):
@@ -1258,6 +1258,8 @@ def format_tb(tb=None, limit=None, allLocals=None, allGlobals=None, withTitle=Fa
1258
1258
 
1259
1259
  def print_tb(tb, file=None, **kwargs):
1260
1260
  """
1261
+ Replacement for traceback.print_tb.
1262
+
1261
1263
  :param types.TracebackType|types.FrameType|StackSummary tb:
1262
1264
  :param io.TextIOBase|io.StringIO|typing.TextIO|None file: stderr by default
1263
1265
  :return: nothing, prints to ``file``
@@ -1269,8 +1271,43 @@ def print_tb(tb, file=None, **kwargs):
1269
1271
  file.flush()
1270
1272
 
1271
1273
 
1274
+ def print_exception(etype, value, tb, limit=None, file=None, chain=True):
1275
+ """
1276
+ Replacement for traceback.print_exception.
1277
+
1278
+ :param etype: exception type
1279
+ :param value: exception value
1280
+ :param tb: traceback
1281
+ :param int|None limit:
1282
+ :param io.TextIOBase|io.StringIO|typing.TextIO|None file: stderr by default
1283
+ :param bool chain: whether to print the chain of exceptions
1284
+ """
1285
+ better_exchook(etype, value, tb, autodebugshell=False, file=file, limit=limit, chain=chain)
1286
+
1287
+
1288
+ def print_exc(limit=None, file=None, chain=True):
1289
+ """
1290
+ Replacement for traceback.print_exc.
1291
+ Shorthand for 'print_exception(*sys.exc_info(), limit, file)'.
1292
+
1293
+ :param int|None limit:
1294
+ :param io.TextIOBase|io.StringIO|typing.TextIO|None file: stderr by default
1295
+ :param bool chain:
1296
+ """
1297
+ print_exception(*sys.exc_info(), limit=limit, file=file, chain=chain)
1298
+
1299
+
1272
1300
  def better_exchook(
1273
- etype, value, tb, debugshell=False, autodebugshell=True, file=None, with_color=None, with_preamble=True
1301
+ etype,
1302
+ value,
1303
+ tb,
1304
+ debugshell=False,
1305
+ autodebugshell=True,
1306
+ file=None,
1307
+ with_color=None,
1308
+ with_preamble=True,
1309
+ limit=None,
1310
+ chain=True,
1274
1311
  ):
1275
1312
  """
1276
1313
  Replacement for sys.excepthook.
@@ -1284,6 +1321,8 @@ def better_exchook(
1284
1321
  and exception information. stderr by default.
1285
1322
  :param bool|None with_color: whether to use ANSI escape codes for colored output
1286
1323
  :param bool with_preamble: print a short preamble for the exception
1324
+ :param int|None limit:
1325
+ :param bool chain: whether to print the chain of exceptions
1287
1326
  """
1288
1327
  if file is None:
1289
1328
  file = sys.stderr
@@ -1292,16 +1331,17 @@ def better_exchook(
1292
1331
  output = _OutputLinesCollector(color=color)
1293
1332
 
1294
1333
  rec_args = dict(autodebugshell=False, file=file, with_color=with_color, with_preamble=with_preamble)
1295
- if getattr(value, "__cause__", None):
1296
- better_exchook(type(value.__cause__), value.__cause__, value.__cause__.__traceback__, **rec_args)
1297
- output("")
1298
- output("The above exception was the direct cause of the following exception:")
1299
- output("")
1300
- elif getattr(value, "__context__", None):
1301
- better_exchook(type(value.__context__), value.__context__, value.__context__.__traceback__, **rec_args)
1302
- output("")
1303
- output("During handling of the above exception, another exception occurred:")
1304
- output("")
1334
+ if chain:
1335
+ if getattr(value, "__cause__", None):
1336
+ better_exchook(type(value.__cause__), value.__cause__, value.__cause__.__traceback__, **rec_args)
1337
+ output("")
1338
+ output("The above exception was the direct cause of the following exception:")
1339
+ output("")
1340
+ elif getattr(value, "__context__", None):
1341
+ better_exchook(type(value.__context__), value.__context__, value.__context__.__traceback__, **rec_args)
1342
+ output("")
1343
+ output("During handling of the above exception, another exception occurred:")
1344
+ output("")
1305
1345
 
1306
1346
  def format_filename(s):
1307
1347
  """
@@ -1320,7 +1360,14 @@ def better_exchook(
1320
1360
  all_locals, all_globals = {}, {}
1321
1361
  if tb is not None:
1322
1362
  output.lines.extend(
1323
- format_tb(tb=tb, allLocals=all_locals, allGlobals=all_globals, withTitle=True, with_color=color.enable)
1363
+ format_tb(
1364
+ tb=tb,
1365
+ limit=limit,
1366
+ allLocals=all_locals,
1367
+ allGlobals=all_globals,
1368
+ withTitle=True,
1369
+ with_color=color.enable,
1370
+ )
1324
1371
  )
1325
1372
  else:
1326
1373
  output(color("better_exchook: traceback unknown", color.fg_colors[1]))
@@ -1710,3 +1757,34 @@ def replace_traceback_format_tb():
1710
1757
  if hasattr(traceback, "StackSummary"):
1711
1758
  traceback.StackSummary.format = format_tb
1712
1759
  traceback.StackSummary.extract = _StackSummary_extract
1760
+
1761
+
1762
+ def replace_traceback_print_tb():
1763
+ """
1764
+ Replaces these functions from the traceback module by our own:
1765
+
1766
+ - traceback.print_tb
1767
+ - traceback.print_exception
1768
+ - traceback.print_exc
1769
+
1770
+ Note that this kind of monkey patching might not be safe under all circumstances
1771
+ and is not officially supported by Python.
1772
+ """
1773
+ import traceback
1774
+
1775
+ traceback.print_tb = print_tb
1776
+ traceback.print_exception = print_exception
1777
+ traceback.print_exc = print_exc
1778
+
1779
+
1780
+ def setup_all():
1781
+ """
1782
+ Calls:
1783
+
1784
+ - :func:`install`
1785
+ - :func:`replace_traceback_format_tb`
1786
+ - :func:`replace_traceback_print_tb`
1787
+ """
1788
+ install()
1789
+ replace_traceback_format_tb()
1790
+ replace_traceback_print_tb()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20241005.114831
3
+ Version: 1.20241011.20141
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -302,6 +302,7 @@ returnn/torch/util/README.md
302
302
  returnn/torch/util/__init__.py
303
303
  returnn/torch/util/array_.py
304
304
  returnn/torch/util/diagnose_gpu.py
305
+ returnn/torch/util/exception_helper.py
305
306
  returnn/torch/util/gradient_checkpoint.py
306
307
  returnn/torch/util/module.py
307
308
  returnn/torch/util/scaled_gradient.py
@@ -1,2 +0,0 @@
1
- version = '1.20241005.114831'
2
- long_version = '1.20241005.114831+git.c53ebb4'