returnn 1.20240905.172412__tar.gz → 1.20240906.140550__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of returnn might be problematic. Click here for more details.

Files changed (464) hide show
  1. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/PKG-INFO +1 -1
  2. returnn-1.20240906.140550/_setup_info_generated.py +2 -0
  3. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_utils.py +1 -1
  4. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/array_.py +9 -8
  5. returnn-1.20240906.140550/returnn/frontend/loss.py +181 -0
  6. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/parameter.py +0 -2
  7. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/layers/base.py +5 -4
  8. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/layers/basic.py +2 -0
  9. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/frontend/_backend.py +1 -2
  10. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn.egg-info/PKG-INFO +1 -1
  11. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TFNetworkLayer.py +82 -0
  12. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_base.py +115 -0
  13. returnn-1.20240905.172412/_setup_info_generated.py +0 -2
  14. returnn-1.20240905.172412/returnn/frontend/loss.py +0 -93
  15. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/.editorconfig +0 -0
  16. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/.gitignore +0 -0
  17. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/.gitmodules +0 -0
  18. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/.kateconfig +0 -0
  19. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/CHANGELOG.md +0 -0
  20. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/CODEOWNERS +0 -0
  21. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/CONTRIBUTING.md +0 -0
  22. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/LICENSE +0 -0
  23. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/MANIFEST.in +0 -0
  24. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/README.rst +0 -0
  25. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/__init__.py +0 -0
  26. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/12AX.cluster_map +0 -0
  27. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/_setup_returnn_env.py +0 -0
  28. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-fwd.config +0 -0
  29. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-horovod-mpi.py +0 -0
  30. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-horovod-mpi.py.sh +0 -0
  31. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-horovod-mpi.sh +0 -0
  32. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-hyper-param-tuning.config +0 -0
  33. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-iter-dataset.py +0 -0
  34. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-list-devices.py +0 -0
  35. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-lua-torch-layer.config +0 -0
  36. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-pretrain.config +0 -0
  37. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-record-and-push-to-webserver.py +0 -0
  38. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-returnn-as-framework.py +0 -0
  39. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-rf-pt-benchmark.py +0 -0
  40. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-rf.config +0 -0
  41. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-rhn-enwik8.config +0 -0
  42. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-sprint-interface.py +0 -0
  43. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-att-copy.config +0 -0
  44. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-attention.config +0 -0
  45. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  46. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  47. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-enc-dec.config +0 -0
  48. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-hard-att-copy.config +0 -0
  49. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-lstm-benchmark.py +0 -0
  50. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  51. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  52. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-native-lstm.12ax.config +0 -0
  53. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  54. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  55. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  56. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  57. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  58. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-rec-self-att.config +0 -0
  59. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-search-compiled-graph.py +0 -0
  60. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  61. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-timit-lstm-ctc.config +0 -0
  62. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-torch.config +0 -0
  63. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  64. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/demo.sh +0 -0
  65. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  66. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  67. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  68. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/README.txt +0 -0
  69. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/chars.txt +0 -0
  70. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/config_demo +0 -0
  71. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/config_fwd +0 -0
  72. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/config_real +0 -0
  73. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  74. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/decode.py +0 -0
  75. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  76. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/go.sh +0 -0
  77. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/lines.txt +0 -0
  78. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/split/eval.txt +0 -0
  79. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/split/train.txt +0 -0
  80. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/IAM/split/valid.txt +0 -0
  81. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/README.md +0 -0
  82. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  83. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/artificial/forwardconfig +0 -0
  84. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/artificial/go.sh +0 -0
  85. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/artificial/trainconfig +0 -0
  86. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  87. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  88. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  89. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  90. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/pyproject.toml +0 -0
  91. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/requirements.txt +0 -0
  92. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/__init__.py +0 -0
  93. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/__main__.py +0 -0
  94. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/__old_mod_loader__.py +0 -0
  95. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/__setup__.py +0 -0
  96. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/config.py +0 -0
  97. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/__init__.py +0 -0
  98. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/audio.py +0 -0
  99. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/basic.py +0 -0
  100. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/bundle_file.py +0 -0
  101. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/cached.py +0 -0
  102. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/cached2.py +0 -0
  103. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/distrib_files.py +0 -0
  104. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/generating.py +0 -0
  105. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/hdf.py +0 -0
  106. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/lm.py +0 -0
  107. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/map.py +0 -0
  108. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/meta.py +0 -0
  109. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/multi_proc.py +0 -0
  110. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/normalization_data.py +0 -0
  111. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/numpy_dump.py +0 -0
  112. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/postprocessing.py +0 -0
  113. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/raw_wav.py +0 -0
  114. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/sprint.py +0 -0
  115. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/stereo.py +0 -0
  116. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/util/__init__.py +0 -0
  117. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/util/feature_extraction.py +0 -0
  118. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/util/strings.py +0 -0
  119. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/datasets/util/vocabulary.py +0 -0
  120. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/engine/__init__.py +0 -0
  121. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/engine/base.py +0 -0
  122. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/engine/batch.py +0 -0
  123. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/__init__.py +0 -0
  124. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/__main__.py +0 -0
  125. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  126. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  127. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  128. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  129. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  130. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  131. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  132. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  133. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  134. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  135. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  136. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  137. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  138. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  139. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  140. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  141. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  142. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  143. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  144. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  145. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  146. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  147. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  148. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  149. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  150. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/__init__.py +0 -0
  151. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/graph_editor/README.md +0 -0
  152. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/graph_editor/__init__.py +0 -0
  153. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/graph_editor/edit.py +0 -0
  154. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/graph_editor/reroute.py +0 -0
  155. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/graph_editor/select.py +0 -0
  156. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/graph_editor/subgraph.py +0 -0
  157. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/graph_editor/transform.py +0 -0
  158. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/extern/graph_editor/util.py +0 -0
  159. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/forward_iface.py +0 -0
  160. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/__init__.py +0 -0
  161. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_backend.py +0 -0
  162. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_native/__init__.py +0 -0
  163. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_native/backend.cpp +0 -0
  164. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_native/backend.hpp +0 -0
  165. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_native/module.cpp +0 -0
  166. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_native/module.hpp +0 -0
  167. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_native/py_utils.hpp +0 -0
  168. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_native/tensor_ops.cpp +0 -0
  169. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_native/tensor_ops.hpp +0 -0
  170. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_numpy_backend.py +0 -0
  171. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/_random_journal.py +0 -0
  172. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/attention.py +0 -0
  173. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/audio/__init__.py +0 -0
  174. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/audio/mel.py +0 -0
  175. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/audio/specaugment.py +0 -0
  176. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/backend.py +0 -0
  177. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/build_from_dict.py +0 -0
  178. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/cond.py +0 -0
  179. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/const.py +0 -0
  180. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/container.py +0 -0
  181. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/control_flow_ctx.py +0 -0
  182. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/conv.py +0 -0
  183. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/conversions/__init__.py +0 -0
  184. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
  185. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/conversions/hf_llama.py +0 -0
  186. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/conversions/torch_nn.py +0 -0
  187. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/decoder/__init__.py +0 -0
  188. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/decoder/transformer.py +0 -0
  189. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/device.py +0 -0
  190. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/dims.py +0 -0
  191. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/dropout.py +0 -0
  192. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/dtype.py +0 -0
  193. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/encoder/__init__.py +0 -0
  194. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/encoder/base.py +0 -0
  195. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/encoder/conformer.py +0 -0
  196. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/encoder/e_branchformer.py +0 -0
  197. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/gradient.py +0 -0
  198. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/graph.py +0 -0
  199. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/hooks.py +0 -0
  200. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/init.py +0 -0
  201. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/label_smoothing.py +0 -0
  202. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/linear.py +0 -0
  203. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/loop.py +0 -0
  204. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/math_.py +0 -0
  205. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/matmul.py +0 -0
  206. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/module.py +0 -0
  207. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/normalization.py +0 -0
  208. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/parametrizations.py +0 -0
  209. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/parametrize.py +0 -0
  210. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/piecewise_linear.py +0 -0
  211. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/rand.py +0 -0
  212. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/rec.py +0 -0
  213. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/reduce.py +0 -0
  214. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/run_ctx.py +0 -0
  215. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/signal.py +0 -0
  216. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/state.py +0 -0
  217. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/stepwise_scheduler.py +0 -0
  218. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/tensor_array.py +0 -0
  219. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/frontend/types.py +0 -0
  220. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/import_/__init__.py +0 -0
  221. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/import_/common.py +0 -0
  222. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/import_/git.py +0 -0
  223. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/import_/import_.py +0 -0
  224. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/learning_rate_control.py +0 -0
  225. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/log.py +0 -0
  226. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/native_op.cpp +0 -0
  227. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/native_op.py +0 -0
  228. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/pretrain.py +0 -0
  229. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/sprint/__init__.py +0 -0
  230. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/sprint/cache.py +0 -0
  231. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/sprint/control.py +0 -0
  232. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/sprint/error_signals.py +0 -0
  233. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/sprint/extern_interface.py +0 -0
  234. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/sprint/interface.py +0 -0
  235. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/README.md +0 -0
  236. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/__init__.py +0 -0
  237. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/_dim_extra.py +0 -0
  238. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/_tensor_extra.py +0 -0
  239. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/_tensor_mixin_base.py +0 -0
  240. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/_tensor_op_overloads.py +0 -0
  241. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/control_flow_ctx.py +0 -0
  242. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/dim.py +0 -0
  243. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/marked_dim.py +0 -0
  244. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/tensor.py +0 -0
  245. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/tensor_dict.py +0 -0
  246. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tensor/utils.py +0 -0
  247. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/__init__.py +0 -0
  248. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/compat.py +0 -0
  249. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/data_pipeline.py +0 -0
  250. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/distributed.py +0 -0
  251. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/engine.py +0 -0
  252. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/README.md +0 -0
  253. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/__init__.py +0 -0
  254. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/_backend.py +0 -0
  255. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/_utils.py +0 -0
  256. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/cond.py +0 -0
  257. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  258. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  259. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/dims.py +0 -0
  260. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/layer.py +0 -0
  261. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/loop.py +0 -0
  262. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/make_layer.py +0 -0
  263. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/masked_computation.py +0 -0
  264. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  265. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  266. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_low_level/__init__.py +0 -0
  267. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/frontend_low_level/_backend.py +0 -0
  268. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/horovod.py +0 -0
  269. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/hyper_param_tuning.py +0 -0
  270. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/layers/__init__.py +0 -0
  271. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/layers/rec.py +0 -0
  272. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/layers/segmental_model.py +0 -0
  273. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/layers/signal_processing.py +0 -0
  274. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/layers/variable.py +0 -0
  275. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/native_op.py +0 -0
  276. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/network.py +0 -0
  277. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/sprint.py +0 -0
  278. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/updater.py +0 -0
  279. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/util/__init__.py +0 -0
  280. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/util/basic.py +0 -0
  281. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/util/data.py +0 -0
  282. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/util/gradient_checkpoint.py +0 -0
  283. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/util/ken_lm.py +0 -0
  284. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/tf/util/open_fst.py +0 -0
  285. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/README.md +0 -0
  286. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/__init__.py +0 -0
  287. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/data/__init__.py +0 -0
  288. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/data/extern_data.py +0 -0
  289. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/data/pipeline.py +0 -0
  290. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/data/queued_data_iter.py +0 -0
  291. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  292. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/data/tensor_utils.py +0 -0
  293. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/distributed.py +0 -0
  294. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/engine.py +0 -0
  295. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/frontend/__init__.py +0 -0
  296. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/frontend/_rand.py +0 -0
  297. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/frontend/bridge.py +0 -0
  298. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/frontend/raw_ops.py +0 -0
  299. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/updater.py +0 -0
  300. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/util/README.md +0 -0
  301. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/util/__init__.py +0 -0
  302. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/util/array_.py +0 -0
  303. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/util/diagnose_gpu.py +0 -0
  304. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/util/gradient_checkpoint.py +0 -0
  305. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/util/module.py +0 -0
  306. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/torch/util/scaled_gradient.py +0 -0
  307. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/__init__.py +0 -0
  308. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/basic.py +0 -0
  309. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/better_exchook.py +0 -0
  310. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/bpe.py +0 -0
  311. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/debug.py +0 -0
  312. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/debug_helpers.py +0 -0
  313. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/file_cache.py +0 -0
  314. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/fsa.py +0 -0
  315. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/literal_py_to_pickle.py +0 -0
  316. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/math.py +0 -0
  317. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
  318. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/native_code_compiler.py +0 -0
  319. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/pprint.py +0 -0
  320. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/py-to-pickle.cpp +0 -0
  321. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/py_compat.py +0 -0
  322. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/py_ext_mod_compiler.py +0 -0
  323. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/result_with_reason.py +0 -0
  324. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/sig_proc.py +0 -0
  325. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/task_system.py +0 -0
  326. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/train_proc_manager.py +0 -0
  327. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn/util/watch_memory.py +0 -0
  328. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn.egg-info/SOURCES.txt +0 -0
  329. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn.egg-info/dependency_links.txt +0 -0
  330. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/returnn.egg-info/top_level.txt +0 -0
  331. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/rnn.py +0 -0
  332. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/setup.cfg +0 -0
  333. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/setup.py +0 -0
  334. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/DummySprintExec.py +0 -0
  335. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm-inspection-profile.xml +0 -0
  336. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/.gitignore +0 -0
  337. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/.name +0 -0
  338. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  339. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  340. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  341. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  342. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  343. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/misc.xml +0 -0
  344. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/modules.xml +0 -0
  345. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/returnn.iml +0 -0
  346. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  347. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/_set_num_threads1.py +0 -0
  348. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/_setup_returnn_env.py +0 -0
  349. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/_setup_test_env.py +0 -0
  350. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/bpe-unicode-demo.codes +0 -0
  351. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/bpe-unicode-demo.vocab +0 -0
  352. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/lexicon_opt.fst +0 -0
  353. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/lexicon_opt.isyms +0 -0
  354. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/lexicon_opt.jpg +0 -0
  355. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/lexicon_opt.osyms +0 -0
  356. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/lint_common.py +0 -0
  357. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/pycharm-inspect.py +0 -0
  358. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/pylint.py +0 -0
  359. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/returnn-as-framework.py +0 -0
  360. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/rf_utils.py +0 -0
  361. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/spelling.dic +0 -0
  362. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_Config.py +0 -0
  363. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_Dataset.py +0 -0
  364. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_Fsa.py +0 -0
  365. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_GeneratingDataset.py +0 -0
  366. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_HDFDataset.py +0 -0
  367. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_LearningRateControl.py +0 -0
  368. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_Log.py +0 -0
  369. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_MultiProcDataset.py +0 -0
  370. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_Pretrain.py +0 -0
  371. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_ResNet.py +0 -0
  372. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_SprintDataset.py +0 -0
  373. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_SprintInterface.py +0 -0
  374. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TFEngine.py +0 -0
  375. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TFNativeOp.py +0 -0
  376. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TFNetworkRecLayer.py +0 -0
  377. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TFNetworkSigProcLayer.py +0 -0
  378. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TFUpdater.py +0 -0
  379. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TFUtil.py +0 -0
  380. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TF_determinism.py +0 -0
  381. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TaskSystem.py +0 -0
  382. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TaskSystem_SharedMem.py +0 -0
  383. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_TranslationDataset.py +0 -0
  384. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_Util.py +0 -0
  385. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_demos.py +0 -0
  386. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_fork_exec.py +0 -0
  387. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_hdf_dump.py +0 -0
  388. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_array.py +0 -0
  389. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_attention.py +0 -0
  390. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_cond.py +0 -0
  391. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_const.py +0 -0
  392. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_container.py +0 -0
  393. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_conv.py +0 -0
  394. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_decoder_transformer.py +0 -0
  395. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_encoder_conformer.py +0 -0
  396. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_gradient.py +0 -0
  397. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_label_smoothing.py +0 -0
  398. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_loop.py +0 -0
  399. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_math.py +0 -0
  400. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_normalization.py +0 -0
  401. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_piecewise_linear.py +0 -0
  402. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_rec.py +0 -0
  403. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_reduce.py +0 -0
  404. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_rf_signal.py +0 -0
  405. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_tensor.py +0 -0
  406. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_tools.py +0 -0
  407. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_torch_dataset.py +0 -0
  408. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_torch_engine.py +0 -0
  409. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_torch_frontend.py +0 -0
  410. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_torch_internal_frontend.py +0 -0
  411. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/test_torch_util.py +0 -0
  412. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tests/torch_utils.py +0 -0
  413. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/_setup_returnn_env.py +0 -0
  414. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/analyze-dataset-batches.py +0 -0
  415. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/bliss-collect-seq-lens.py +0 -0
  416. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/bliss-dump-text.py +0 -0
  417. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/bliss-get-segment-names.py +0 -0
  418. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/bliss-to-ogg-zip.py +0 -0
  419. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/bpe-create-lexicon.py +0 -0
  420. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/calculate-word-error-rate.py +0 -0
  421. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/cleanup-old-models.py +0 -0
  422. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/collect-orth-symbols.py +0 -0
  423. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/collect-words.py +0 -0
  424. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/compile_native_op.py +0 -0
  425. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/compile_tf_graph.py +0 -0
  426. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/debug-dump-search-scores.py +0 -0
  427. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/debug-plot-search-scores.py +0 -0
  428. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/dump-dataset-raw-strings.py +0 -0
  429. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/dump-dataset.py +0 -0
  430. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/dump-forward-stats.py +0 -0
  431. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/dump-forward.py +0 -0
  432. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/dump-network-json.py +0 -0
  433. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/dump-pickle.py +0 -0
  434. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/extract_state_tying_from_dataset.py +0 -0
  435. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/get-attention-weights.py +0 -0
  436. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/get-best-model-epoch.py +0 -0
  437. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/hdf_dump.py +0 -0
  438. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/hdf_dump_translation_dataset.py +0 -0
  439. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/import-blocks-mt-model.py +0 -0
  440. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/import-t2t-mt-model.py +0 -0
  441. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/.gitignore +0 -0
  442. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/Makefile +0 -0
  443. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/README.md +0 -0
  444. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/example/README.md +0 -0
  445. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/example/libs_list +0 -0
  446. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  447. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  448. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  449. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/example/state_vars_list +0 -0
  450. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  451. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/file.h +0 -0
  452. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  453. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  454. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/main.cc +0 -0
  455. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/rescorer.h +0 -0
  456. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/vocabulary.cc +0 -0
  457. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/lattice_rescorer/vocabulary.h +0 -0
  458. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/tf_avg_checkpoints.py +0 -0
  459. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/tf_inspect_checkpoint.py +0 -0
  460. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/tf_inspect_summary_log.py +0 -0
  461. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/torch_avg_checkpoints.py +0 -0
  462. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/torch_export_to_onnx.py +0 -0
  463. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/torch_inspect_checkpoint.py +0 -0
  464. {returnn-1.20240905.172412 → returnn-1.20240906.140550}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20240905.172412
3
+ Version: 1.20240906.140550
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20240906.140550'
2
+ long_version = '1.20240906.140550+git.fd95009'
@@ -292,5 +292,5 @@ def _slice_value_is_reduce(v: Union[None, slice, int, numpy.number, numpy.ndarra
292
292
  return v.ndim == 0
293
293
  if isinstance(v, Tensor):
294
294
  assert len(v.dims) <= 1, f"strided_slice: expect scalar or vector, got Tensor with dims {v.dims}"
295
- return v.dims == 0
295
+ return len(v.dims) == 0
296
296
  raise TypeError(f"strided_slice: got unexpected value of type {type(v).__name__}")
@@ -412,16 +412,17 @@ def pad(
412
412
  :return: padded tensor, out_dims. out dims are for each dim in axes
413
413
  """
414
414
  assert len(axes) == len(padding)
415
- if not out_dims:
416
- for left, right in padding:
417
- if isinstance(left, Dim):
418
- assert not left.need_masking(), f"padding {padding} does not support dynamic left padding"
419
- if isinstance(right, Dim):
420
- assert not right.need_masking(), f"padding {padding} does not support dynamic right padding"
421
- # Note that even dynamic middle dims is not exactly correct...
422
- out_dims = [left + middle + right for middle, (left, right) in zip(axes, padding)]
423
415
  if handle_dynamic_dims is None:
424
416
  handle_dynamic_dims = _pad_handle_dynamic_dims_default(axes, padding, mode=mode)
417
+ if not out_dims:
418
+ if handle_dynamic_dims:
419
+ for left, right in padding:
420
+ if isinstance(left, Dim):
421
+ assert not left.need_masking(), f"padding {padding} does not support dynamic left padding"
422
+ if isinstance(right, Dim):
423
+ assert not right.need_masking(), f"padding {padding} does not support dynamic right padding"
424
+ # Note that even dynamic middle dims is not exactly correct...
425
+ out_dims = [left + middle + right for middle, (left, right) in zip(axes, padding)]
425
426
  # noinspection PyProtectedMember
426
427
  return (
427
428
  source._raw_backend.pad(
@@ -0,0 +1,181 @@
1
+ """
2
+ Loss functions
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from returnn.tensor import Tensor, Dim
7
+ import returnn.frontend as rf
8
+
9
+
10
+ __all__ = ["cross_entropy", "ctc_loss", "edit_distance"]
11
+
12
+
13
+ def cross_entropy(
14
+ *,
15
+ estimated: Tensor,
16
+ target: Tensor,
17
+ axis: Dim,
18
+ estimated_type: str,
19
+ ) -> Tensor:
20
+ """
21
+ ``target`` is supposed to be in probability space (normalized). It can also be sparse, i.e. contain class indices.
22
+ ``estimated`` can be probs, log-probs or logits, specified via ``estimated_type``.
23
+
24
+ Assuming both are in probability space, the cross entropy is:
25
+
26
+ H(target,estimated) = -reduce_sum(target * log(estimated), axis=axis)
27
+ = -matmul(target, log(estimated), reduce=axis)
28
+
29
+ In case you want label smoothing, you can use e.g.::
30
+
31
+ ce = nn.cross_entropy(
32
+ target=nn.label_smoothing(target, 0.1),
33
+ estimated=estimated)
34
+
35
+ :param estimated: probs, log-probs or logits, specified via ``estimated_type``
36
+ :param target: probs, normalized, can also be sparse
37
+ :param axis: class labels dim over which softmax is computed
38
+ :param estimated_type: "probs", "log-probs" or "logits"
39
+ :return: cross entropy (same Dims as 'estimated' but without 'axis')
40
+ """
41
+
42
+ if estimated_type == "logits":
43
+ # This is a common case and most backends provide optimized functions for it.
44
+ # noinspection PyProtectedMember
45
+ return estimated._raw_backend.softmax_cross_entropy_with_logits(logits=estimated, targets=target, axis=axis)
46
+ if estimated_type == "probs":
47
+ log_prob = rf.log(estimated) # TODO: make numerically stable
48
+ elif estimated_type == "log-probs":
49
+ log_prob = estimated
50
+ else:
51
+ raise ValueError("estimated_type must be 'probs', 'log-probs' or 'logits'")
52
+ if target.sparse_dim:
53
+ return -rf.gather(log_prob, indices=target, axis=axis)
54
+ return -rf.matmul(target, log_prob, reduce=axis)
55
+
56
+
57
+ def ctc_loss(
58
+ *,
59
+ logits: Tensor,
60
+ logits_normalized: bool = False,
61
+ targets: Tensor,
62
+ input_spatial_dim: Dim,
63
+ targets_spatial_dim: Dim,
64
+ blank_index: int,
65
+ max_approx: bool = False,
66
+ ) -> Tensor:
67
+ """
68
+ Calculates the CTC loss.
69
+
70
+ Internally, this uses :func:`returnn.tf.native_op.ctc_loss`
71
+ which is equivalent to tf.nn.ctc_loss but more efficient.
72
+
73
+ Output is of shape [B].
74
+
75
+ :param logits: (before softmax). shape [B...,input_spatial,C]
76
+ :param logits_normalized: whether the logits are already normalized (e.g. via log-softmax)
77
+ :param targets: sparse. shape [B...,targets_spatial] -> C
78
+ :param input_spatial_dim: spatial dim of input logits
79
+ :param targets_spatial_dim: spatial dim of targets
80
+ :param blank_index: vocab index of the blank symbol
81
+ :param max_approx: if True, use max instead of sum over alignments (max approx, Viterbi)
82
+ :return: loss shape [B...]
83
+ """
84
+ # noinspection PyProtectedMember
85
+ return logits._raw_backend.ctc_loss(
86
+ logits=logits,
87
+ logits_normalized=logits_normalized,
88
+ targets=targets,
89
+ input_spatial_dim=input_spatial_dim,
90
+ targets_spatial_dim=targets_spatial_dim,
91
+ blank_index=blank_index,
92
+ max_approx=max_approx,
93
+ )
94
+
95
+
96
+ def edit_distance(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim, *, dtype: str = "int32") -> Tensor:
97
+ """
98
+ :param a: [B,Ta]
99
+ :param a_spatial_dim: Ta
100
+ :param b: [B,Tb]
101
+ :param b_spatial_dim: Tb
102
+ :param dtype:
103
+ :return: [B]
104
+ """
105
+ import numpy # just for iinfo on dtype to get max value
106
+
107
+ # The axis permutation is just an efficiency optimization.
108
+ a = a.copy_transpose([a_spatial_dim] + a.remaining_dims(a_spatial_dim))
109
+ b = b.copy_transpose([b_spatial_dim] + b.remaining_dims(b_spatial_dim))
110
+ dev = a.device
111
+ max_dist_err = numpy.iinfo(dtype).max
112
+ n_a_max_len = a_spatial_dim.get_dim_value()
113
+ n_b_max_len = b_spatial_dim.get_dim_value()
114
+ if int(n_a_max_len) < int(n_b_max_len):
115
+ a, b = b, a
116
+ a_spatial_dim, b_spatial_dim = b_spatial_dim, a_spatial_dim
117
+ n_a_max_len, n_b_max_len = n_b_max_len, n_a_max_len
118
+ # Now n_a_max_len >= n_b_max_len.
119
+ batch_dims = a.remaining_dims(a_spatial_dim)
120
+ for dim in b.remaining_dims(b_spatial_dim):
121
+ if dim not in batch_dims:
122
+ batch_dims.append(dim)
123
+ a_seq_len = a_spatial_dim.get_dyn_size_ext_for_device(dev) # [B]
124
+ b_seq_len = b_spatial_dim.get_dyn_size_ext_for_device(dev) # [B]
125
+ a_tensor_ext, (a_spatial_dim_ext,) = rf.pad(
126
+ a, axes=[a_spatial_dim], padding=[(b_spatial_dim, b_spatial_dim)], handle_dynamic_dims=False
127
+ ) # [Tb+Ta+Tb,B]
128
+ a_spatial_dim_ext: Dim
129
+ b_tensor_flipped = rf.reverse_sequence(b, axis=b_spatial_dim, handle_dynamic_dims=False) # [Tb,B]
130
+ entry_idx_ = rf.range_over_dim(b_spatial_dim, device=dev) # [Tb]->Tb
131
+ b_spatial_dim1 = b_spatial_dim + 1
132
+ buffer_dim = Dim(3 * b_spatial_dim1.get_dim_value_tensor(), name="buffer")
133
+ buffer = rf.Parameter([buffer_dim] + batch_dims, device=dev, dtype=dtype, auxiliary=True) # [3*(Tb+1),B]
134
+ buffer_offsets = [0, b_spatial_dim1.get_dim_value_tensor(), b_spatial_dim1.get_dim_value_tensor() * 2]
135
+ result = rf.where((a_seq_len == 0) & (b_seq_len == 0), 0, max_dist_err) # [B] # noqa
136
+
137
+ # We are going diagonal over (Ta+1) and (Tb+1). (Similar as RETURNN native EditDistanceOp.)
138
+ # You need to draw the grid on paper to understand all the index math...
139
+ for u in range(1, n_a_max_len + n_b_max_len + 1):
140
+
141
+ prev2_dist, _ = rf.slice(
142
+ buffer, axis=buffer_dim, start=buffer_offsets[u % 3], size=b_spatial_dim1, out_dim=b_spatial_dim1
143
+ ) # [Tb+1,B]
144
+ prev_dist, _ = rf.slice(
145
+ buffer, axis=buffer_dim, start=buffer_offsets[(u + 1) % 3], size=b_spatial_dim1, out_dim=b_spatial_dim1
146
+ ) # [Tb+1,B]
147
+ cur_dist_start_offset = buffer_offsets[(u + 2) % 3]
148
+
149
+ del_cost = (
150
+ rf.slice(prev_dist, axis=b_spatial_dim1, end=b_spatial_dim.get_dim_value_tensor(), out_dim=b_spatial_dim)[0]
151
+ + 1
152
+ ) # [Tb,B]
153
+ ins_cost = rf.slice(prev_dist, axis=b_spatial_dim1, start=1, out_dim=b_spatial_dim)[0] + 1 # [Tb,B]
154
+ sub_cost = rf.slice(prev2_dist, axis=b_spatial_dim1, start=1, out_dim=b_spatial_dim)[0] + rf.cast(
155
+ rf.slice(a_tensor_ext, axis=a_spatial_dim_ext, start=u - 1, size=b_spatial_dim, out_dim=b_spatial_dim)[0]
156
+ != b_tensor_flipped,
157
+ dtype=dtype,
158
+ )
159
+ min_cost = rf.minimum(del_cost, ins_cost, sub_cost) # [Tb,B]
160
+ t_a_gt_zero_mask = entry_idx_ > n_b_max_len - u # [Tb]
161
+
162
+ buffer.assign_key(
163
+ axis=buffer_dim,
164
+ key=slice(cur_dist_start_offset, cur_dist_start_offset + b_spatial_dim.get_dim_value_tensor()),
165
+ key_dim=b_spatial_dim,
166
+ value=rf.where(t_a_gt_zero_mask, min_cost, u),
167
+ )
168
+ # last entry in cur_dist, that is where t_b == 0
169
+ buffer.assign_key(
170
+ axis=buffer_dim, key=cur_dist_start_offset + b_spatial_dim.get_dim_value_tensor(), key_dim=None, value=u
171
+ )
172
+
173
+ end_offset_a = n_b_max_len + a_seq_len - u # [B]
174
+ end_offset_b = n_b_max_len - b_seq_len # [B]
175
+ result = rf.where(
176
+ end_offset_a == end_offset_b,
177
+ rf.gather(buffer, axis=buffer_dim, indices=cur_dist_start_offset + end_offset_a, clip_to_valid=True),
178
+ result,
179
+ )
180
+
181
+ return result
@@ -75,8 +75,6 @@ class Parameter(Tensor[T]):
75
75
  raise TypeError(f"rf.Parameter: invalid type for dims_or_tensor: {type(dims_or_tensor)}")
76
76
  if not all(isinstance(dim, Dim) for dim in dims):
77
77
  raise TypeError(f"rf.Parameter: shape {dims} must be a sequence of Dim")
78
- if not all(isinstance(dim.dimension, int) for dim in dims):
79
- raise ValueError(f"rf.Parameter: shape {dims} must be static")
80
78
  if len(dims) != len(set((d, d.match_priority) for d in dims)):
81
79
  raise ValueError(f"rf.Parameter: shape {dims} dims must be unique")
82
80
  super(Parameter, self).__init__(
@@ -1259,12 +1259,13 @@ class LayerBase(object):
1259
1259
  getter.__qualname__ += f"(base_var_scope.custom_getter={base_var_scope.custom_getter})"
1260
1260
 
1261
1261
  param = getter(**getter_kwargs)
1262
+ param_ = param
1262
1263
 
1263
1264
  # Only apply this if we get a variable. Otherwise, maybe variational noise was already applied
1264
1265
  # (by some parent var scope), and we don't want to apply it twice.
1265
- if param_variational_noise and param.dtype.is_floating and isinstance(param, tf.Variable):
1266
+ if param_variational_noise and param.dtype.is_floating and isinstance(param_, tf.Variable):
1266
1267
  with default_control_flow_ctx(): # make independent from loop/cond
1267
- with reuse_name_scope_of_tensor(param, postfix="_variational_noise", add_tensor_name=True):
1268
+ with reuse_name_scope_of_tensor(param_, postfix="_variational_noise", add_tensor_name=True):
1268
1269
 
1269
1270
  def _apply_var_noise():
1270
1271
  rnd_state = tf_util.StatelessRandomSeed.create(shape=tf_util.get_shape(param))
@@ -1280,11 +1281,11 @@ class LayerBase(object):
1280
1281
  if (
1281
1282
  param_dropout
1282
1283
  and param.dtype.is_floating
1283
- and isinstance(param, tf.Variable)
1284
+ and isinstance(param_, tf.Variable)
1284
1285
  and param.shape.ndims >= param_dropout_min_ndim
1285
1286
  ):
1286
1287
  with default_control_flow_ctx(): # make independent from loop/cond
1287
- with reuse_name_scope_of_tensor(param, postfix="_weight_dropout", add_tensor_name=True):
1288
+ with reuse_name_scope_of_tensor(param_, postfix="_weight_dropout", add_tensor_name=True):
1288
1289
  param = self.network.cond_on_train(
1289
1290
  fn_train=lambda: tf_util.dropout(
1290
1291
  param,
@@ -2139,6 +2139,7 @@ class LinearLayer(_ConcatInputLayer):
2139
2139
  name="W", shape=weights_shape, dtype=tf.float32, initializer=fwd_weights_initializer
2140
2140
  )
2141
2141
  )
2142
+ self.weights = weights
2142
2143
  weights_ = weights
2143
2144
  if in_split_info:
2144
2145
  tf_util.set_param_axes_split_info(
@@ -2160,6 +2161,7 @@ class LinearLayer(_ConcatInputLayer):
2160
2161
  else:
2161
2162
  assert not bias_init
2162
2163
  b = None
2164
+ self.bias = b
2163
2165
 
2164
2166
  with tf.name_scope("linear"):
2165
2167
  from returnn.tf.util.basic import dot, to_int32_64, is_gpu_available_in_session, move_axis
@@ -690,9 +690,8 @@ class TorchBackend(Backend[torch.Tensor]):
690
690
  """
691
691
  :return: parameter
692
692
  """
693
- assert all(d.is_static() for d in tensor.dims)
694
693
  data = torch.zeros(
695
- [d.dimension for d in tensor.dims],
694
+ [d.get_dim_value() for d in tensor.dims],
696
695
  dtype=TorchBackend.as_dtype_raw(tensor.dtype),
697
696
  device=device or rf.get_default_device(),
698
697
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20240905.172412
3
+ Version: 1.20240906.140550
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -10992,6 +10992,88 @@ def test_param_weight_dropout_and_reuse_params():
10992
10992
  session.run(opt_op, feed_dict=make_feed_dict(network.extern_data))
10993
10993
 
10994
10994
 
10995
+ def test_param_weight_dropout_and_variational_noise():
10996
+ from returnn.tensor import Dim, batch_dim
10997
+ from returnn.tf.util.basic import print_graph_output, find_ops_with_tensor_input
10998
+ from returnn.tf.util.gradient_checkpoint import prepare_gradient_checkpointing
10999
+
11000
+ time_dim = Dim(None, name="time")
11001
+ feature_dim = Dim(7, name="feature")
11002
+ classes_dim = Dim(13, name="classes")
11003
+
11004
+ config = Config(
11005
+ {
11006
+ "param_dropout": 0.1,
11007
+ "param_variational_noise": 0.075,
11008
+ "extern_data": {
11009
+ "data": {
11010
+ "dim_tags": [batch_dim, time_dim, feature_dim],
11011
+ "time_dim_axis": 1,
11012
+ "feature_dim": feature_dim,
11013
+ "dtype": "float32",
11014
+ },
11015
+ "classes": {"dim_tags": [batch_dim, time_dim], "sparse_dim": classes_dim, "dtype": "int32"},
11016
+ },
11017
+ }
11018
+ )
11019
+ with make_scope() as session:
11020
+ network = TFNetwork(config=config, train_flag=True)
11021
+ # Do subnetwork by intention, to test when we have multiple variable scopes.
11022
+ network.construct_from_dict(
11023
+ {
11024
+ "output": {
11025
+ "class": "linear",
11026
+ "out_dim": classes_dim,
11027
+ "activation": "softmax",
11028
+ "from": "data",
11029
+ "loss": "ce",
11030
+ "target": "classes",
11031
+ }
11032
+ }
11033
+ )
11034
+ loss = network.get_total_loss()
11035
+
11036
+ prepare_gradient_checkpointing()
11037
+ opt = tf_compat.v1.train.GradientDescentOptimizer(learning_rate=0.1)
11038
+ opt_op = opt.minimize(loss)
11039
+ print("optimizer:")
11040
+ print_graph_output(opt_op)
11041
+
11042
+ tf_log_dir = tempfile.mkdtemp()
11043
+ print("TF log dir:", tf_log_dir)
11044
+ writer = tf_compat.v1.summary.FileWriter(logdir=tf_log_dir, graph=session.graph, session=session)
11045
+ params = network.get_params_list()
11046
+ print("params:", params)
11047
+ assert len(params) == 2 # weights and bias
11048
+ for param in params:
11049
+ print("param:", param)
11050
+ ops = find_ops_with_tensor_input(param, fetches=opt_op)
11051
+ print("param graph:")
11052
+ print_graph_output(ops)
11053
+ # There can be multiple ops due to gradient checkpointing.
11054
+ assert (
11055
+ 1 <= len(ops)
11056
+ and all("_variational_noise/" in op.name or "/ResourceApply" in op.name for op in ops)
11057
+ and any("_variational_noise/" in op.name for op in ops)
11058
+ ), f"ops: {ops}"
11059
+
11060
+ layer = network.layers["output"]
11061
+ assert isinstance(layer, LinearLayer)
11062
+ print("weights:", layer.weights)
11063
+ assert layer.weights.name.startswith("output/W_weight_dropout/")
11064
+
11065
+ network.initialize_params(session=session)
11066
+
11067
+ run_metadata = tf_compat.v1.RunMetadata()
11068
+ run_options = tf_compat.v1.RunOptions(trace_level=tf_compat.v1.RunOptions.FULL_TRACE)
11069
+ session.run(
11070
+ opt_op, feed_dict=make_feed_dict(network.extern_data), options=run_options, run_metadata=run_metadata
11071
+ )
11072
+ writer.add_run_metadata(run_metadata, tag="step_0")
11073
+ writer.close()
11074
+ print("TF log dir:", tf_log_dir)
11075
+
11076
+
10995
11077
  def test_LinearLayer_simple_train():
10996
11078
  config = Config()
10997
11079
  n_in, n_out = 7, 3
@@ -560,3 +560,118 @@ def test_weight_noise():
560
560
  rf.weight_noise(conv, "filter", std=0.1)
561
561
  time_dim = Dim(11, name="time")
562
562
  conv(rf.random_normal([time_dim, in_dim]), in_spatial_dim=time_dim)
563
+
564
+
565
+ def test_edit_distance():
566
+ import numpy
567
+ import torch
568
+ from typing import Sequence
569
+ from collections import namedtuple
570
+ import itertools
571
+
572
+ def _edit_distance_ref_b1(a: Sequence[int], b: Sequence[int]) -> int:
573
+ """
574
+ Reference implementation for edit distance.
575
+ """
576
+ n = len(a) + 1
577
+ m = len(b) + 1
578
+ d = torch.zeros((n, m), dtype=torch.int32)
579
+ for i in range(n):
580
+ d[i, 0] = i
581
+ for j in range(m):
582
+ d[0, j] = j
583
+ for j in range(1, m):
584
+ for i in range(1, n):
585
+ if a[i - 1] == b[j - 1]:
586
+ d[i, j] = d[i - 1, j - 1]
587
+ else:
588
+ d[i, j] = min(
589
+ d[i - 1, j] + 1, # deletion
590
+ d[i, j - 1] + 1, # insertion
591
+ d[i - 1, j - 1] + 1, # substitution
592
+ )
593
+ return int(d[n - 1, m - 1])
594
+
595
+ # noinspection PyShadowingNames
596
+ def _edit_distance_ref(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim) -> torch.Tensor:
597
+ """
598
+ Reference implementation for edit distance.
599
+ """
600
+ batch_dim = a.dims[0]
601
+ assert a.dims == (batch_dim, a_spatial_dim) and b.dims == (batch_dim, b_spatial_dim)
602
+ res = []
603
+ for i in range(batch_dim.dimension):
604
+ assert a_spatial_dim.dyn_size[i] <= a.raw_tensor.size(1)
605
+ assert b_spatial_dim.dyn_size[i] <= b.raw_tensor.size(1)
606
+ res.append(
607
+ _edit_distance_ref_b1(
608
+ a.raw_tensor[i, : a_spatial_dim.dyn_size[i]], b.raw_tensor[i, : b_spatial_dim.dyn_size[i]]
609
+ )
610
+ )
611
+ return torch.tensor(res, dtype=torch.int32)
612
+
613
+ # noinspection PyShadowingNames
614
+ def _check_edit_distance(a: Tensor, a_spatial_dim: Dim, b: Tensor, b_spatial_dim: Dim):
615
+ ref = _edit_distance_ref(a, a_spatial_dim, b, b_spatial_dim)
616
+ res = rf.edit_distance(a, a_spatial_dim, b, b_spatial_dim)
617
+ assert res.raw_tensor.shape == ref.shape == a_spatial_dim.dyn_size.shape == b_spatial_dim.dyn_size.shape
618
+ assert len(ref.shape) == 1
619
+ print("ref:", ref, "res:", res.raw_tensor)
620
+ batch_size = ref.shape[0]
621
+ for i in range(batch_size):
622
+ assert res.raw_tensor[i] == ref[i], (
623
+ f"batch idx i={i}, a[i]={a.raw_tensor[i]} len {a_spatial_dim.dyn_size[i]},"
624
+ f" b[i]={b.raw_tensor[i]} len {b_spatial_dim.dyn_size[i]},"
625
+ f" ref[i]={ref[i]}, res[i]={res.raw_tensor[i]};\n"
626
+ f" a={a.raw_tensor} lens {a_spatial_dim.dyn_size},"
627
+ f" b={b.raw_tensor} lens {b_spatial_dim.dyn_size}"
628
+ )
629
+ assert (res.raw_tensor == ref).all()
630
+
631
+ SizedTensor = namedtuple("SizedTensor", ["tensor", "seq_lens"])
632
+
633
+ _SeqsB1 = [
634
+ SizedTensor(torch.tensor([[1, 2, 3, 4]]), torch.tensor([4])),
635
+ SizedTensor(torch.tensor([[1, 2, 3]]), torch.tensor([3])),
636
+ SizedTensor(torch.tensor([[1, 2, 4]]), torch.tensor([3])),
637
+ SizedTensor(torch.tensor([[1, 4]]), torch.tensor([2])),
638
+ SizedTensor(torch.tensor([[5, 2, 4]]), torch.tensor([3])),
639
+ SizedTensor(torch.tensor([[]], dtype=torch.int64), torch.tensor([0])),
640
+ ]
641
+
642
+ for a, b in itertools.product(_SeqsB1, _SeqsB1):
643
+ a: SizedTensor
644
+ b: SizedTensor
645
+ # noinspection PyShadowingNames
646
+ batch_dim = Dim(1, name="batch")
647
+ a_spatial_dim = Dim(Tensor("a_sizes", [batch_dim], dtype="int64", raw_tensor=a.seq_lens))
648
+ b_spatial_dim = Dim(Tensor("b_sizes", [batch_dim], dtype="int64", raw_tensor=b.seq_lens))
649
+ a_ = Tensor("a", [batch_dim, a_spatial_dim], dtype="int64", raw_tensor=a.tensor)
650
+ b_ = Tensor("b", [batch_dim, b_spatial_dim], dtype="int64", raw_tensor=b.tensor)
651
+ _check_edit_distance(a_, a_spatial_dim, b_, b_spatial_dim)
652
+
653
+ rnd = numpy.random.RandomState(42)
654
+ for a, b in itertools.product(_SeqsB1, _SeqsB1):
655
+ batch_size = rnd.randint(2, 11)
656
+ a_max_len = rnd.randint(a.seq_lens[0], a.seq_lens[0] + 5)
657
+ b_max_len = rnd.randint(b.seq_lens[0], b.seq_lens[0] + 5)
658
+ a_sizes = rnd.randint(0, a_max_len + 1, size=(batch_size,))
659
+ b_sizes = rnd.randint(0, b_max_len + 1, size=(batch_size,))
660
+ a_sizes[0] = a.seq_lens[0]
661
+ b_sizes[0] = b.seq_lens[0]
662
+ a_max_len = max(a_sizes)
663
+ b_max_len = max(b_sizes)
664
+ a_values = rnd.randint(0, 10, (batch_size, a_max_len))
665
+ b_values = rnd.randint(0, 10, (batch_size, b_max_len))
666
+ a_values[0, : a.seq_lens[0]] = a.tensor[0, : a.seq_lens[0]]
667
+ b_values[0, : b.seq_lens[0]] = b.tensor[0, : b.seq_lens[0]]
668
+ a_sizes = torch.tensor(a_sizes, dtype=torch.int32)
669
+ b_sizes = torch.tensor(b_sizes, dtype=torch.int32)
670
+
671
+ # noinspection PyShadowingNames
672
+ batch_dim = Dim(batch_size, name="batch")
673
+ a_spatial_dim = Dim(Tensor("a_sizes", [batch_dim], dtype="int32", raw_tensor=a_sizes))
674
+ b_spatial_dim = Dim(Tensor("b_sizes", [batch_dim], dtype="int32", raw_tensor=b_sizes))
675
+ a_ = Tensor("a", [batch_dim, a_spatial_dim], dtype="int64", raw_tensor=torch.tensor(a_values))
676
+ b_ = Tensor("b", [batch_dim, b_spatial_dim], dtype="int64", raw_tensor=torch.tensor(b_values))
677
+ _check_edit_distance(a_, a_spatial_dim, b_, b_spatial_dim)
@@ -1,2 +0,0 @@
1
- version = '1.20240905.172412'
2
- long_version = '1.20240905.172412+git.fb9d5c3'
@@ -1,93 +0,0 @@
1
- """
2
- Loss functions
3
- """
4
-
5
- from __future__ import annotations
6
- from returnn.tensor import Tensor, Dim
7
- import returnn.frontend as rf
8
-
9
-
10
- __all__ = ["cross_entropy", "ctc_loss"]
11
-
12
-
13
- def cross_entropy(
14
- *,
15
- estimated: Tensor,
16
- target: Tensor,
17
- axis: Dim,
18
- estimated_type: str,
19
- ) -> Tensor:
20
- """
21
- ``target`` is supposed to be in probability space (normalized). It can also be sparse, i.e. contain class indices.
22
- ``estimated`` can be probs, log-probs or logits, specified via ``estimated_type``.
23
-
24
- Assuming both are in probability space, the cross entropy is:
25
-
26
- H(target,estimated) = -reduce_sum(target * log(estimated), axis=axis)
27
- = -matmul(target, log(estimated), reduce=axis)
28
-
29
- In case you want label smoothing, you can use e.g.::
30
-
31
- ce = nn.cross_entropy(
32
- target=nn.label_smoothing(target, 0.1),
33
- estimated=estimated)
34
-
35
- :param estimated: probs, log-probs or logits, specified via ``estimated_type``
36
- :param target: probs, normalized, can also be sparse
37
- :param axis: class labels dim over which softmax is computed
38
- :param estimated_type: "probs", "log-probs" or "logits"
39
- :return: cross entropy (same Dims as 'estimated' but without 'axis')
40
- """
41
-
42
- if estimated_type == "logits":
43
- # This is a common case and most backends provide optimized functions for it.
44
- # noinspection PyProtectedMember
45
- return estimated._raw_backend.softmax_cross_entropy_with_logits(logits=estimated, targets=target, axis=axis)
46
- if estimated_type == "probs":
47
- log_prob = rf.log(estimated) # TODO: make numerically stable
48
- elif estimated_type == "log-probs":
49
- log_prob = estimated
50
- else:
51
- raise ValueError("estimated_type must be 'probs', 'log-probs' or 'logits'")
52
- if target.sparse_dim:
53
- return -rf.gather(log_prob, indices=target, axis=axis)
54
- return -rf.matmul(target, log_prob, reduce=axis)
55
-
56
-
57
- def ctc_loss(
58
- *,
59
- logits: Tensor,
60
- logits_normalized: bool = False,
61
- targets: Tensor,
62
- input_spatial_dim: Dim,
63
- targets_spatial_dim: Dim,
64
- blank_index: int,
65
- max_approx: bool = False,
66
- ) -> Tensor:
67
- """
68
- Calculates the CTC loss.
69
-
70
- Internally, this uses :func:`returnn.tf.native_op.ctc_loss`
71
- which is equivalent to tf.nn.ctc_loss but more efficient.
72
-
73
- Output is of shape [B].
74
-
75
- :param logits: (before softmax). shape [B...,input_spatial,C]
76
- :param logits_normalized: whether the logits are already normalized (e.g. via log-softmax)
77
- :param targets: sparse. shape [B...,targets_spatial] -> C
78
- :param input_spatial_dim: spatial dim of input logits
79
- :param targets_spatial_dim: spatial dim of targets
80
- :param blank_index: vocab index of the blank symbol
81
- :param max_approx: if True, use max instead of sum over alignments (max approx, Viterbi)
82
- :return: loss shape [B...]
83
- """
84
- # noinspection PyProtectedMember
85
- return logits._raw_backend.ctc_loss(
86
- logits=logits,
87
- logits_normalized=logits_normalized,
88
- targets=targets,
89
- input_spatial_dim=input_spatial_dim,
90
- targets_spatial_dim=targets_spatial_dim,
91
- blank_index=blank_index,
92
- max_approx=max_approx,
93
- )