returnn 1.20230814.164933__tar.gz → 1.20230815.191535__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (410) hide show
  1. {returnn-1.20230814.164933/returnn.egg-info → returnn-1.20230815.191535}/PKG-INFO +1 -1
  2. returnn-1.20230815.191535/_setup_info_generated.py +2 -0
  3. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/__init__.py +1 -0
  4. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/_backend.py +38 -0
  5. returnn-1.20230815.191535/returnn/frontend/gradient.py +74 -0
  6. returnn-1.20230815.191535/returnn/frontend/label_smoothing.py +114 -0
  7. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/signal.py +50 -0
  8. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/_backend.py +32 -0
  9. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/basic.py +39 -0
  10. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/frontend/_backend.py +39 -0
  11. returnn-1.20230815.191535/returnn/torch/functional/scaled_gradient.py +79 -0
  12. returnn-1.20230815.191535/returnn/util/math.py +11 -0
  13. {returnn-1.20230814.164933 → returnn-1.20230815.191535/returnn.egg-info}/PKG-INFO +1 -1
  14. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn.egg-info/SOURCES.txt +5 -0
  15. returnn-1.20230815.191535/tests/test_rf_gradient.py +33 -0
  16. returnn-1.20230815.191535/tests/test_rf_label_smoothing.py +39 -0
  17. returnn-1.20230814.164933/_setup_info_generated.py +0 -2
  18. returnn-1.20230814.164933/returnn/frontend/gradient.py +0 -15
  19. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/.editorconfig +0 -0
  20. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/.gitignore +0 -0
  21. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/.gitmodules +0 -0
  22. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/.kateconfig +0 -0
  23. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/CHANGELOG.md +0 -0
  24. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/CODEOWNERS +0 -0
  25. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/CONTRIBUTING.md +0 -0
  26. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/LICENSE +0 -0
  27. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/MANIFEST.in +0 -0
  28. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/README.rst +0 -0
  29. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/__init__.py +0 -0
  30. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/12AX.cluster_map +0 -0
  31. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/_setup_returnn_env.py +0 -0
  32. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-fwd.config +0 -0
  33. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-horovod-mpi.py +0 -0
  34. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-horovod-mpi.py.sh +0 -0
  35. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-horovod-mpi.sh +0 -0
  36. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-hyper-param-tuning.config +0 -0
  37. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-iter-dataset.py +0 -0
  38. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-list-devices.py +0 -0
  39. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-lua-torch-layer.config +0 -0
  40. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-pretrain.config +0 -0
  41. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-record-and-push-to-webserver.py +0 -0
  42. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-returnn-as-framework.py +0 -0
  43. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-rf.config +0 -0
  44. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-rhn-enwik8.config +0 -0
  45. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-sprint-interface.py +0 -0
  46. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-att-copy.config +0 -0
  47. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-attention.config +0 -0
  48. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
  49. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
  50. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-enc-dec.config +0 -0
  51. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-hard-att-copy.config +0 -0
  52. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-lstm-benchmark.py +0 -0
  53. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
  54. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
  55. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-native-lstm.12ax.config +0 -0
  56. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-native-lstm2.12ax.config +0 -0
  57. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
  58. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-neural-transducer.12ax.config +0 -0
  59. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-rec-explicit-lstm.config +0 -0
  60. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-rec-explicit-rnn.config +0 -0
  61. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-rec-self-att.config +0 -0
  62. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-search-compiled-graph.py +0 -0
  63. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
  64. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-timit-lstm-ctc.config +0 -0
  65. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-torch.config +0 -0
  66. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
  67. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo.sh +0 -0
  68. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
  69. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
  70. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
  71. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/README.txt +0 -0
  72. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/chars.txt +0 -0
  73. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/config_demo +0 -0
  74. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/config_fwd +0 -0
  75. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/config_real +0 -0
  76. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
  77. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/decode.py +0 -0
  78. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
  79. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/go.sh +0 -0
  80. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/lines.txt +0 -0
  81. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/split/eval.txt +0 -0
  82. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/split/train.txt +0 -0
  83. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/split/valid.txt +0 -0
  84. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/README.md +0 -0
  85. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial/create_test_h5.py +0 -0
  86. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial/forwardconfig +0 -0
  87. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial/go.sh +0 -0
  88. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial/trainconfig +0 -0
  89. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
  90. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
  91. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial_rgb/go.sh +0 -0
  92. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
  93. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/pyproject.toml +0 -0
  94. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/requirements.txt +0 -0
  95. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/__init__.py +0 -0
  96. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/__main__.py +0 -0
  97. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/__old_mod_loader__.py +0 -0
  98. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/__setup__.py +0 -0
  99. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/config.py +0 -0
  100. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/__init__.py +0 -0
  101. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/audio.py +0 -0
  102. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/basic.py +0 -0
  103. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/bundle_file.py +0 -0
  104. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/cached.py +0 -0
  105. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/cached2.py +0 -0
  106. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/generating.py +0 -0
  107. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/hdf.py +0 -0
  108. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/lm.py +0 -0
  109. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/map.py +0 -0
  110. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/meta.py +0 -0
  111. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/multi_proc.py +0 -0
  112. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/normalization_data.py +0 -0
  113. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/numpy_dump.py +0 -0
  114. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/raw_wav.py +0 -0
  115. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/sprint.py +0 -0
  116. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/stereo.py +0 -0
  117. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/util/__init__.py +0 -0
  118. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/util/feature_extraction.py +0 -0
  119. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/util/strings.py +0 -0
  120. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/util/vocabulary.py +0 -0
  121. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/engine/__init__.py +0 -0
  122. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/engine/base.py +0 -0
  123. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/engine/batch.py +0 -0
  124. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/__init__.py +0 -0
  125. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/__main__.py +0 -0
  126. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/.git +0 -0
  127. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
  128. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
  129. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
  130. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
  131. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
  132. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
  133. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
  134. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
  135. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
  136. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
  137. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
  138. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
  139. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
  140. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
  141. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
  142. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
  143. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
  144. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
  145. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
  146. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
  147. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
  148. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
  149. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
  150. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
  151. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/__init__.py +0 -0
  152. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/README.md +0 -0
  153. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/__init__.py +0 -0
  154. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/edit.py +0 -0
  155. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/reroute.py +0 -0
  156. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/select.py +0 -0
  157. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/subgraph.py +0 -0
  158. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/transform.py +0 -0
  159. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/util.py +0 -0
  160. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/forward_iface.py +0 -0
  161. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/_numpy_backend.py +0 -0
  162. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/_utils.py +0 -0
  163. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/array_.py +0 -0
  164. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/attention.py +0 -0
  165. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/cond.py +0 -0
  166. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/const.py +0 -0
  167. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/container.py +0 -0
  168. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/control_flow_ctx.py +0 -0
  169. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/conv.py +0 -0
  170. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/device.py +0 -0
  171. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/dims.py +0 -0
  172. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/dropout.py +0 -0
  173. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/dtype.py +0 -0
  174. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/encoder/__init__.py +0 -0
  175. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/encoder/base.py +0 -0
  176. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/encoder/conformer.py +0 -0
  177. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/init.py +0 -0
  178. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/linear.py +0 -0
  179. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/loop.py +0 -0
  180. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/loss.py +0 -0
  181. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/math_.py +0 -0
  182. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/matmul.py +0 -0
  183. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/module.py +0 -0
  184. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/normalization.py +0 -0
  185. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/parameter.py +0 -0
  186. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/rand.py +0 -0
  187. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/rec.py +0 -0
  188. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/reduce.py +0 -0
  189. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/run_ctx.py +0 -0
  190. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/state.py +0 -0
  191. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/tensor_array.py +0 -0
  192. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/types.py +0 -0
  193. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/import_/__init__.py +0 -0
  194. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/import_/common.py +0 -0
  195. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/import_/git.py +0 -0
  196. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/import_/import_.py +0 -0
  197. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/learning_rate_control.py +0 -0
  198. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/log.py +0 -0
  199. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/native_op.cpp +0 -0
  200. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/native_op.py +0 -0
  201. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/pretrain.py +0 -0
  202. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/__init__.py +0 -0
  203. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/cache.py +0 -0
  204. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/control.py +0 -0
  205. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/error_signals.py +0 -0
  206. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/extern_interface.py +0 -0
  207. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/interface.py +0 -0
  208. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/README.md +0 -0
  209. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/__init__.py +0 -0
  210. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/_dim_extra.py +0 -0
  211. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/_tensor_extra.py +0 -0
  212. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/_tensor_mixin_base.py +0 -0
  213. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/_tensor_op_overloads.py +0 -0
  214. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/control_flow_ctx.py +0 -0
  215. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/dim.py +0 -0
  216. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/marked_dim.py +0 -0
  217. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/tensor.py +0 -0
  218. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/tensor_dict.py +0 -0
  219. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/utils.py +0 -0
  220. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/__init__.py +0 -0
  221. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/compat.py +0 -0
  222. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/data_pipeline.py +0 -0
  223. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/distributed.py +0 -0
  224. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/engine.py +0 -0
  225. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/__init__.py +0 -0
  226. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/_utils.py +0 -0
  227. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/cond.py +0 -0
  228. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
  229. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
  230. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/dims.py +0 -0
  231. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/layer.py +0 -0
  232. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/make_layer.py +0 -0
  233. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
  234. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
  235. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_low_level/__init__.py +0 -0
  236. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_low_level/_backend.py +0 -0
  237. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/horovod.py +0 -0
  238. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/hyper_param_tuning.py +0 -0
  239. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/__init__.py +0 -0
  240. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/base.py +0 -0
  241. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/rec.py +0 -0
  242. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/segmental_model.py +0 -0
  243. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/signal_processing.py +0 -0
  244. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/variable.py +0 -0
  245. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/native_op.py +0 -0
  246. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/network.py +0 -0
  247. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/sprint.py +0 -0
  248. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/updater.py +0 -0
  249. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/__init__.py +0 -0
  250. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/basic.py +0 -0
  251. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/data.py +0 -0
  252. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/gradient_checkpoint.py +0 -0
  253. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/ken_lm.py +0 -0
  254. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/open_fst.py +0 -0
  255. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/README.md +0 -0
  256. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/__init__.py +0 -0
  257. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/data/__init__.py +0 -0
  258. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/data/pipeline.py +0 -0
  259. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
  260. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/data/tensor_utils.py +0 -0
  261. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/distributed.py +0 -0
  262. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/engine.py +0 -0
  263. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/frontend/__init__.py +0 -0
  264. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/frontend/_rand.py +0 -0
  265. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/frontend/bridge.py +0 -0
  266. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/functional/README.md +0 -0
  267. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/functional/__init__.py +0 -0
  268. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/updater.py +0 -0
  269. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/__init__.py +0 -0
  270. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/basic.py +0 -0
  271. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/better_exchook.py +0 -0
  272. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/bpe.py +0 -0
  273. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/debug.py +0 -0
  274. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/debug_helpers.py +0 -0
  275. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/fsa.py +0 -0
  276. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/literal_py_to_pickle.py +0 -0
  277. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/pprint.py +0 -0
  278. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/py-to-pickle.cpp +0 -0
  279. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/py_compat.py +0 -0
  280. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/sig_proc.py +0 -0
  281. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/task_system.py +0 -0
  282. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn.egg-info/dependency_links.txt +0 -0
  283. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn.egg-info/top_level.txt +0 -0
  284. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/rnn.py +0 -0
  285. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/setup.cfg +0 -0
  286. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/setup.py +0 -0
  287. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/DummySprintExec.py +0 -0
  288. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm-inspection-profile.xml +0 -0
  289. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/.gitignore +0 -0
  290. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/.name +0 -0
  291. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
  292. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
  293. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
  294. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
  295. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
  296. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/misc.xml +0 -0
  297. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/modules.xml +0 -0
  298. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/returnn.iml +0 -0
  299. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
  300. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/_set_num_threads1.py +0 -0
  301. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/_setup_returnn_env.py +0 -0
  302. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/_setup_test_env.py +0 -0
  303. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/bpe-unicode-demo.codes +0 -0
  304. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/bpe-unicode-demo.vocab +0 -0
  305. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lexicon_opt.fst +0 -0
  306. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lexicon_opt.isyms +0 -0
  307. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lexicon_opt.jpg +0 -0
  308. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lexicon_opt.osyms +0 -0
  309. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lint_common.py +0 -0
  310. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/pycharm-inspect.py +0 -0
  311. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/pylint.py +0 -0
  312. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/returnn-as-framework.py +0 -0
  313. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/rf_utils.py +0 -0
  314. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/spelling.dic +0 -0
  315. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Config.py +0 -0
  316. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Dataset.py +0 -0
  317. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Fsa.py +0 -0
  318. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_GeneratingDataset.py +0 -0
  319. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_HDFDataset.py +0 -0
  320. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_LearningRateControl.py +0 -0
  321. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Log.py +0 -0
  322. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_MultiProcDataset.py +0 -0
  323. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_PTDataset.py +0 -0
  324. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Pretrain.py +0 -0
  325. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_ResNet.py +0 -0
  326. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_SprintDataset.py +0 -0
  327. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_SprintInterface.py +0 -0
  328. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFEngine.py +0 -0
  329. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFNativeOp.py +0 -0
  330. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFNetworkLayer.py +0 -0
  331. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFNetworkRecLayer.py +0 -0
  332. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFNetworkSigProcLayer.py +0 -0
  333. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFUpdater.py +0 -0
  334. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFUtil.py +0 -0
  335. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TF_determinism.py +0 -0
  336. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TaskSystem.py +0 -0
  337. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TaskSystem_SharedMem.py +0 -0
  338. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TranslationDataset.py +0 -0
  339. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Util.py +0 -0
  340. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_demos.py +0 -0
  341. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_fork_exec.py +0 -0
  342. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_hdf_dump.py +0 -0
  343. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_array.py +0 -0
  344. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_attention.py +0 -0
  345. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_base.py +0 -0
  346. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_cond.py +0 -0
  347. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_const.py +0 -0
  348. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_container.py +0 -0
  349. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_conv.py +0 -0
  350. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_encoder_conformer.py +0 -0
  351. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_loop.py +0 -0
  352. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_math.py +0 -0
  353. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_normalization.py +0 -0
  354. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_rec.py +0 -0
  355. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_reduce.py +0 -0
  356. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_signal.py +0 -0
  357. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_tensor.py +0 -0
  358. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_tools.py +0 -0
  359. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_torch_engine.py +0 -0
  360. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_torch_frontend.py +0 -0
  361. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_torch_internal_frontend.py +0 -0
  362. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/_setup_returnn_env.py +0 -0
  363. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/analyze-dataset-batches.py +0 -0
  364. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bliss-collect-seq-lens.py +0 -0
  365. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bliss-dump-text.py +0 -0
  366. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bliss-get-segment-names.py +0 -0
  367. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bliss-to-ogg-zip.py +0 -0
  368. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bpe-create-lexicon.py +0 -0
  369. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/calculate-word-error-rate.py +0 -0
  370. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/cleanup-old-models.py +0 -0
  371. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/collect-orth-symbols.py +0 -0
  372. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/collect-words.py +0 -0
  373. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/compile_native_op.py +0 -0
  374. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/compile_tf_graph.py +0 -0
  375. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/debug-dump-search-scores.py +0 -0
  376. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/debug-plot-search-scores.py +0 -0
  377. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-dataset-raw-strings.py +0 -0
  378. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-dataset.py +0 -0
  379. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-forward-stats.py +0 -0
  380. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-forward.py +0 -0
  381. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-network-json.py +0 -0
  382. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-pickle.py +0 -0
  383. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/extract_state_tying_from_dataset.py +0 -0
  384. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/get-attention-weights.py +0 -0
  385. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/get-best-model-epoch.py +0 -0
  386. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/hdf_dump.py +0 -0
  387. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/hdf_dump_translation_dataset.py +0 -0
  388. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/import-blocks-mt-model.py +0 -0
  389. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/import-t2t-mt-model.py +0 -0
  390. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/.gitignore +0 -0
  391. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/Makefile +0 -0
  392. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/README.md +0 -0
  393. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/README.md +0 -0
  394. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/libs_list +0 -0
  395. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
  396. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
  397. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
  398. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/state_vars_list +0 -0
  399. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/tensor_names_list +0 -0
  400. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/file.h +0 -0
  401. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
  402. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
  403. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/main.cc +0 -0
  404. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/rescorer.h +0 -0
  405. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/vocabulary.cc +0 -0
  406. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/vocabulary.h +0 -0
  407. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/tf_avg_checkpoints.py +0 -0
  408. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/tf_inspect_checkpoint.py +0 -0
  409. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/tf_inspect_summary_log.py +0 -0
  410. {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/torch_export_to_onnx.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20230814.164933
3
+ Version: 1.20230815.191535
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -0,0 +1,2 @@
1
+ version = '1.20230815.191535'
2
+ long_version = '1.20230815.191535+git.2a78f6e'
@@ -29,6 +29,7 @@ from .dims import *
29
29
  from .dropout import *
30
30
  from .dtype import *
31
31
  from .gradient import *
32
+ from .label_smoothing import *
32
33
  from .linear import *
33
34
  from .loop import *
34
35
  from .loss import *
@@ -288,6 +288,22 @@ class Backend(Generic[T]):
288
288
  res.raw_tensor = tensor._raw_backend.cast_raw(tensor.raw_tensor, dtype)
289
289
  return res
290
290
 
291
+ @staticmethod
292
+ def set_requires_gradient(tensor: Tensor):
293
+ """
294
+ :param tensor:
295
+ """
296
+ raise NotImplementedError
297
+
298
+ @staticmethod
299
+ def gradient(y: Tensor, x: Tensor) -> Tensor:
300
+ """
301
+ :param y:
302
+ :param x:
303
+ :return: gradient of y w.r.t. x
304
+ """
305
+ raise NotImplementedError
306
+
291
307
  @staticmethod
292
308
  def stop_gradient(tensor: Tensor) -> Tensor:
293
309
  """
@@ -296,6 +312,28 @@ class Backend(Generic[T]):
296
312
  """
297
313
  raise NotImplementedError
298
314
 
315
+ @staticmethod
316
+ def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
317
+ """
318
+ :param tensor:
319
+ :param scale:
320
+ :return: tensor with scaled gradient
321
+ """
322
+ raise NotImplementedError
323
+
324
+ @staticmethod
325
+ def scaled_gradient_ext(
326
+ x: Tensor, *, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[Dim] = None
327
+ ):
328
+ """
329
+ :param x:
330
+ :param scale: will scale gradient by this value
331
+ :param shift: will shift gradient by this value
332
+ :param scale_shift_by_sum_over_axis: if given, will scale and shift by the sum over the given axis
333
+ :return: just x, but gradient in backward pass will be transformed accordingly
334
+ """
335
+ raise NotImplementedError
336
+
299
337
  @staticmethod
300
338
  def merge_dims(
301
339
  source: Tensor,
@@ -0,0 +1,74 @@
1
+ """
2
+ Utilities which affect the gradient
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from typing import Optional, Union
7
+ from returnn.tensor import Tensor, Dim
8
+
9
+
10
+ __all__ = ["set_requires_gradient", "gradient", "stop_gradient", "scaled_gradient", "scaled_gradient_ext"]
11
+
12
+
13
+ def set_requires_gradient(source: Tensor):
14
+ """
15
+ :param source:
16
+ :return: nothing, modifies source in-place
17
+ """
18
+ # noinspection PyProtectedMember
19
+ return source._raw_backend.set_requires_gradient(source)
20
+
21
+
22
+ def gradient(y: Tensor, x: Tensor) -> Tensor:
23
+ """
24
+ :param y: some scalar
25
+ :param x: some tensor
26
+ :return: gradient of y w.r.t. x
27
+ """
28
+ # noinspection PyProtectedMember
29
+ return y._raw_backend.gradient(y, x)
30
+
31
+
32
+ def stop_gradient(source: Tensor) -> Tensor:
33
+ """wraps tf.stop_gradient or torch detach"""
34
+ # noinspection PyProtectedMember
35
+ return source._raw_backend.stop_gradient(source)
36
+
37
+
38
+ def scaled_gradient(source: Tensor, scale: Union[float, Tensor]) -> Tensor:
39
+ """
40
+ :param source:
41
+ :param scale: if constant 0., will use :func:`stop_gradient`.
42
+ Can be used as gradient reversal layer (with negative factor).
43
+ :return: source with scaled gradient
44
+ """
45
+ if not isinstance(scale, Tensor) and scale == 0.0:
46
+ return stop_gradient(source)
47
+ # noinspection PyProtectedMember
48
+ return source._raw_backend.scaled_gradient(source, scale)
49
+
50
+
51
+ def scaled_gradient_ext(
52
+ source: Tensor,
53
+ *,
54
+ scale: Union[float, Tensor],
55
+ shift: Optional[Union[float, Tensor]] = None,
56
+ scale_shift_by_sum_over_axis: Optional[Dim] = None,
57
+ ) -> Tensor:
58
+ """
59
+ Just `identity` in the forward pass.
60
+ Scales the gradient by some factor in backprop.
61
+ Can be used as gradient reversal layer (with negative factor).
62
+ For TF, uses :func:`returnn.tf.util.basic.scaled_gradient`, or :func:`tf.stop_gradient`
63
+
64
+ :param source:
65
+ :param scale: if constant 0. and no shift, will use :func:`stop_gradient`
66
+ :param shift:
67
+ :param scale_shift_by_sum_over_axis: if given, calculates the sum over this axis (absolute values)
68
+ and multiplies the shift value by this sum.
69
+ :return: source with transformed gradient
70
+ """
71
+ # noinspection PyProtectedMember
72
+ return source._raw_backend.scaled_gradient_ext(
73
+ source, scale=scale, shift=shift, scale_shift_by_sum_over_axis=scale_shift_by_sum_over_axis
74
+ )
@@ -0,0 +1,114 @@
1
+ """
2
+ Label smoothing
3
+ """
4
+
5
+ from __future__ import annotations
6
+ from typing import Optional, Union, Sequence
7
+ from returnn.tensor import Tensor, Dim
8
+ import returnn.frontend as rf
9
+
10
+
11
+ __all__ = ["label_smoothing", "smooth_one_hot", "label_smoothed_log_prob_gradient"]
12
+
13
+
14
+ def label_smoothing(prob: Tensor, smoothing: Union[Tensor, float], *, axis: Optional[Dim] = None) -> Tensor:
15
+ """
16
+ Label smoothing, often used for cross entropy.
17
+
18
+ In case of sparse data, it will become dense (via :func:`smooth_one_hot`)
19
+ and the target label will get probability (1 - smoothing).
20
+ """
21
+ if not axis:
22
+ assert prob.feature_dim or prob.sparse_dim
23
+ axis = prob.feature_dim or prob.sparse_dim
24
+ if prob.sparse_dim:
25
+ assert prob.sparse_dim == axis
26
+ return rf.smooth_one_hot(prob, label_prob=1.0 - smoothing)
27
+ else:
28
+ assert axis in prob.dims_set
29
+ # Make it consistent to the sparse case.
30
+ # Value of 1.0 should result in (1 - smoothing).
31
+ # Value of 0.0 should result in smoothing / (dim - 1).
32
+ # Sum over all should still remain 1.0.
33
+ dim = axis.dimension
34
+ floor_prob = smoothing / (dim - 1)
35
+ factor = 1.0 - dim * floor_prob
36
+ # Case for prob[i] == 0 is clear.
37
+ # Case for prob[i] == 1: 1 - dim * floor_prob + floor_prob = 1 + (1 - dim) * floor_prob = 1 - smoothing
38
+ # Sum over all: 1 - dim * floor_prob + floor_prob * dim = 1
39
+ return prob * factor + floor_prob
40
+
41
+
42
+ def smooth_one_hot(source: Tensor, *, label_prob: Union[Tensor, float]) -> Tensor:
43
+ """
44
+ Smooth variant of :func:`one_hot`.
45
+ Uses ``label_prob`` for the labels and ``(1 - label_prob) / (dim - 1)`` for the remaining values.
46
+ This is used for label smoothing.
47
+ """
48
+ assert source.sparse_dim
49
+ if source.sparse_dim.dimension is None:
50
+ raise NotImplementedError(f"smooth_one_hot({source}) not implemented for dynamic dims")
51
+ return rf.sparse_to_dense(
52
+ source, label_value=label_prob, other_value=(1.0 - label_prob) / (source.sparse_dim.dimension - 1)
53
+ )
54
+
55
+
56
+ def label_smoothed_log_prob_gradient(
57
+ log_prob: Tensor,
58
+ smoothing: Union[Tensor, float],
59
+ *,
60
+ axis: Optional[Dim] = None,
61
+ exclude_labels: Optional[Sequence[int]] = None,
62
+ ) -> Tensor:
63
+ """
64
+ :param log_prob: shape [...,D] (not necessarily the same as loss)
65
+ :param smoothing: smoothing factor, for :func:`label_smoothing`
66
+ :param axis: label axis. uses feature_dim by default
67
+ :param exclude_labels: list of labels to exclude from smoothing (e.g. blank)
68
+
69
+ Assume some cross-entropy-like loss:
70
+
71
+ loss = - sum_i target_prob[i] * log_prob[i] .
72
+
73
+ The sum is over the label indices i (corresponding to the ``axis`` argument).
74
+ Then the gradient of loss w.r.t. log_prob[i] is:
75
+
76
+ grad_logprob[i] loss = -target_prob[i] .
77
+
78
+ We assume that the negative gradient is a probability distribution, and apply :func:`label_smoothing` on it.
79
+ More specifically, we apply the same scale and shift as in the :func:`label_smoothing` function
80
+ via :func:`scaled_gradient`.
81
+
82
+ Just as a side remark: assume
83
+
84
+ log_prob = log_softmax(z) .
85
+
86
+ The gradient of log_softmax is:
87
+
88
+ grad_z[j] log_prob[i] = delta(i==j) - softmax(z)[j] .
89
+
90
+ Then the gradient w.r.t. z[j] is:
91
+
92
+ grad_z[j] loss = sum_i (grad_logprob[i] loss) (grad_z[j] logprob[i])
93
+ = sum_i -target_prob[i] delta(i==j) + target_prob[i] softmax(z)[j]
94
+ = -target_prob[j] + (sum_i target_prob[i]) softmax(z)[j]
95
+ = softmax(z)[j] - target_prob[j] # assuming (sum_i target_prob[i]) == 1
96
+
97
+ """
98
+ if not axis:
99
+ assert log_prob.feature_dim
100
+ axis = log_prob.feature_dim
101
+ # See formula above for label_smoothing.
102
+ dim = axis.dimension
103
+ floor_prob = smoothing / (dim - 1)
104
+ factor = 1.0 - dim * floor_prob
105
+ if exclude_labels:
106
+ indices = rf.range_over_dim(axis)
107
+ mask = True
108
+ for label in exclude_labels:
109
+ mask = mask & (indices != label)
110
+ factor = rf.where(mask, factor, 1.0)
111
+ floor_prob = rf.where(mask, floor_prob, 0.0)
112
+ # The gradient is expected to be the negative target prob, thus negative floor_prob.
113
+ # The gradient is expected to be 0. for masked frames, thus the clipping logic.
114
+ return rf.scaled_gradient_ext(log_prob, scale=factor, shift=-floor_prob, scale_shift_by_sum_over_axis=axis)
@@ -5,12 +5,17 @@ stft etc
5
5
 
6
6
  from __future__ import annotations
7
7
  from typing import Optional, Union, Tuple
8
+ import math
8
9
  import numpy
9
10
  import functools
11
+ from returnn.util import math as util_math
10
12
  from returnn.tensor import Tensor, Dim
11
13
  import returnn.frontend as rf
12
14
 
13
15
 
16
+ __all__ = ["stft", "mel_filterbank", "log_mel_filterbank_from_raw"]
17
+
18
+
14
19
  def stft(
15
20
  x: Tensor,
16
21
  *,
@@ -230,3 +235,48 @@ def _mel_filter_bank_matrix_np(
230
235
  f_mat[i1, i2 - 1] = el_val
231
236
 
232
237
  return f_mat
238
+
239
+
240
+ def log_mel_filterbank_from_raw(
241
+ raw_audio: Tensor,
242
+ *,
243
+ in_spatial_dim: Dim,
244
+ out_dim: Dim,
245
+ sampling_rate: int = 16_000,
246
+ window_len: float = 0.025,
247
+ step_len: float = 0.010,
248
+ n_fft: Optional[int] = None,
249
+ log_base: Union[int, float] = 10,
250
+ ) -> Tuple[Tensor, Dim]:
251
+ """
252
+ log mel filterbank features
253
+
254
+ :param raw_audio: (..., in_spatial_dim, ...). if it has a feature_dim with dimension 1, it is squeezed away.
255
+ :param in_spatial_dim:
256
+ :param out_dim: nr of mel filters.
257
+ :param sampling_rate: samples per second
258
+ :param window_len: in seconds
259
+ :param step_len: in seconds
260
+ :param n_fft: fft_size, n_fft. Should match fft_length from :func:`stft`.
261
+ If not provided, next power-of-two from window_num_frames.
262
+ :param log_base: e.g. 10 or math.e
263
+ """
264
+ if raw_audio.feature_dim and raw_audio.feature_dim.dimension == 1:
265
+ raw_audio = rf.squeeze(raw_audio, axis=raw_audio.feature_dim)
266
+ window_num_frames = int(window_len * sampling_rate)
267
+ step_num_frames = int(step_len * sampling_rate)
268
+ if not n_fft:
269
+ n_fft = util_math.next_power_of_two(window_num_frames)
270
+ spectrogram, out_spatial_dim, in_dim_ = rf.stft(
271
+ raw_audio,
272
+ in_spatial_dim=in_spatial_dim,
273
+ frame_step=step_num_frames,
274
+ frame_length=window_num_frames,
275
+ fft_length=n_fft,
276
+ )
277
+ power_spectrogram = rf.abs(spectrogram) ** 2.0
278
+ mel_fbank = rf.mel_filterbank(power_spectrogram, in_dim=in_dim_, out_dim=out_dim, sampling_rate=sampling_rate)
279
+ log_mel_fbank = rf.safe_log(mel_fbank, eps=1e-10)
280
+ if log_base != math.e:
281
+ log_mel_fbank = log_mel_fbank * (1.0 / math.log(log_base))
282
+ return log_mel_fbank, out_spatial_dim
@@ -141,11 +141,43 @@ class ReturnnLayersBackend(Backend[Layer]):
141
141
  """cast"""
142
142
  return rfl.make_layer({"class": "cast", "from": tensor, "dtype": dtype}, name="cast")
143
143
 
144
+ @staticmethod
145
+ def set_requires_gradient(tensor: Tensor):
146
+ """
147
+ set requires gradient; not needed for TensorFlow, will always calculate whatever is needed
148
+ """
149
+
150
+ @staticmethod
151
+ def gradient(y: Tensor, x: Tensor) -> Tensor:
152
+ """gradient"""
153
+ return rfl.make_layer({"class": "gradient", "y": y, "x": x}, name="gradient")
154
+
144
155
  @staticmethod
145
156
  def stop_gradient(tensor: Tensor) -> Tensor:
146
157
  """stop grad"""
147
158
  return rfl.make_layer({"class": "scaled_grad", "from": tensor, "scale": 0}, name="stop_gradient")
148
159
 
160
+ @staticmethod
161
+ def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
162
+ """scaled gradient"""
163
+ return rfl.make_layer({"class": "scaled_grad", "from": tensor, "scale": scale}, name="scaled_gradient")
164
+
165
+ @staticmethod
166
+ def scaled_gradient_ext(
167
+ x: Tensor, *, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[Dim] = None
168
+ ):
169
+ """scaled gradient ext"""
170
+ return rfl.make_layer(
171
+ {
172
+ "class": "scaled_grad",
173
+ "from": x,
174
+ "scale": scale,
175
+ "shift": shift,
176
+ "scale_shift_by_sum_over_axis": scale_shift_by_sum_over_axis,
177
+ },
178
+ name="scaled_gradient_ext",
179
+ )
180
+
149
181
  @staticmethod
150
182
  def merge_dims(
151
183
  source: Tensor,
@@ -11153,6 +11153,45 @@ class FastBaumWelchLayer(_ConcatInputLayer):
11153
11153
  return get_concat_sources_data_template(sources, name="%s_output" % name).copy_as_time_major()
11154
11154
 
11155
11155
 
11156
+ class GradientLayer(_ConcatInputLayer):
11157
+ """
11158
+ Calculates the gradient of y w.r.t. x.
11159
+ """
11160
+
11161
+ layer_class = "gradient"
11162
+
11163
+ def __init__(self, y: LayerBase, x: LayerBase, **kwargs):
11164
+ """
11165
+ :param y:
11166
+ :param x:
11167
+ """
11168
+ super(GradientLayer, self).__init__(**kwargs)
11169
+ self.output.placeholder = tf.gradients(ys=y.output.placeholder, xs=x.output.placeholder)[0]
11170
+
11171
+ @classmethod
11172
+ def transform_config_dict(cls, d, network, get_layer):
11173
+ """
11174
+ :param dict[str] d:
11175
+ :param returnn.tf.network.TFNetwork network:
11176
+ :param get_layer:
11177
+ """
11178
+ d.setdefault("from", [])
11179
+ super(GradientLayer, cls).transform_config_dict(d, network=network, get_layer=get_layer)
11180
+ d["y"] = get_layer(d["y"])
11181
+ d["x"] = get_layer(d["x"])
11182
+
11183
+ @classmethod
11184
+ def get_out_data_from_opts(cls, y: LayerBase, x: LayerBase, name: str, **kwargs):
11185
+ """
11186
+ :param LayerBase y:
11187
+ :param LayerBase x:
11188
+ :param str name:
11189
+ :rtype: Data
11190
+ """
11191
+ assert y.output.batch_ndim == 0, f"GradientLayer {name!r}: y should be a scalar, got {y}"
11192
+ return x.output.copy_template(name="%s_output" % name)
11193
+
11194
+
11156
11195
  class SyntheticGradientLayer(_ConcatInputLayer):
11157
11196
  """
11158
11197
  This is a generalized way to be able to replace the true gradient with any kind of predicted gradient.
@@ -181,6 +181,18 @@ class TorchBackend(Backend[torch.Tensor]):
181
181
  """cast"""
182
182
  return raw_tensor.to(dtype=TorchBackend.as_dtype_raw(dtype))
183
183
 
184
+ @staticmethod
185
+ def set_requires_gradient(tensor: Tensor[torch.Tensor]):
186
+ """set requires grad"""
187
+ tensor.raw_tensor.requires_grad = True
188
+
189
+ @staticmethod
190
+ def gradient(y: Tensor, x: Tensor) -> Tensor:
191
+ """gradient"""
192
+ out = x.copy_template(name="gradient")
193
+ out.raw_tensor = torch.autograd.grad(y.raw_tensor, x.raw_tensor, create_graph=True)[0]
194
+ return out
195
+
184
196
  @staticmethod
185
197
  def stop_gradient(tensor: Tensor) -> Tensor:
186
198
  """stop grad"""
@@ -188,6 +200,33 @@ class TorchBackend(Backend[torch.Tensor]):
188
200
  out.raw_tensor = out.raw_tensor.detach()
189
201
  return out
190
202
 
203
+ @staticmethod
204
+ def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
205
+ """scaled gradient"""
206
+ from returnn.torch.functional.scaled_gradient import scaled_gradient
207
+
208
+ out = tensor.copy()
209
+ out.raw_tensor = scaled_gradient(out.raw_tensor, scale=scale)
210
+ return out
211
+
212
+ @staticmethod
213
+ def scaled_gradient_ext(
214
+ x: Tensor, *, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[Dim] = None
215
+ ):
216
+ """scaled gradient ext"""
217
+ from returnn.torch.functional.scaled_gradient import scaled_gradient_ext
218
+
219
+ out = x.copy()
220
+ out.raw_tensor = scaled_gradient_ext(
221
+ out.raw_tensor,
222
+ scale=scale,
223
+ shift=shift,
224
+ scale_shift_by_sum_over_axis=x.get_axis_from_description(scale_shift_by_sum_over_axis, allow_int=False)
225
+ if scale_shift_by_sum_over_axis is not None
226
+ else None,
227
+ )
228
+ return out
229
+
191
230
  @staticmethod
192
231
  def merge_dims(
193
232
  source: Tensor,
@@ -0,0 +1,79 @@
1
+ """
2
+ Scaled gradients for backward pass.
3
+ This also covers gradient reversal, which is simply the case with scale=-1.
4
+ We actually extend the simple scaling by some further optional transformations like shifting.
5
+
6
+ The code is adapted from our TF implementation, see :func:`returnn.tf.util.basic.scaled_gradient`.
7
+
8
+ For some discussion on the specific implementation, see:
9
+ https://discuss.pytorch.org/t/gradient-scaling-reversal/186392
10
+
11
+ Also see other reference implementations:
12
+ https://github.com/facebookresearch/fairseq/blob/100cd91db19bb/fairseq/modules/grad_multiply.py
13
+ https://github.com/janfreyberg/pytorch-revgrad/blob/449fa763a76d/src/pytorch_revgrad/functional.py
14
+ https://github.com/tadeephuy/GradientReversal/blob/5d9857d63/gradient_reversal/functional.py
15
+ """
16
+
17
+
18
+ from __future__ import annotations
19
+ from typing import Optional
20
+ import torch
21
+
22
+
23
+ # noinspection PyMethodOverriding,PyAbstractClass,PyMissingOrEmptyDocstring
24
+ class _ScaledGradient(torch.autograd.Function):
25
+ @staticmethod
26
+ def forward(ctx, x: torch.Tensor, scale: float) -> torch.Tensor:
27
+ ctx.scale = scale
28
+ return x
29
+
30
+ @staticmethod
31
+ def backward(ctx, grad_output):
32
+ return grad_output * ctx.scale, None
33
+
34
+
35
+ def scaled_gradient(x: torch.Tensor, scale: float) -> torch.Tensor:
36
+ """
37
+ :param x:
38
+ :param scale:
39
+ :return: just x, however, in backward pass, the gradient is scaled by the given factor
40
+ """
41
+ return _ScaledGradient.apply(x, scale)
42
+
43
+
44
+ # noinspection PyMethodOverriding,PyAbstractClass,PyMissingOrEmptyDocstring
45
+ class _ScaledGradientExt(torch.autograd.Function):
46
+ @staticmethod
47
+ def forward(
48
+ ctx, x: torch.Tensor, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[int] = None
49
+ ):
50
+ ctx.scale = scale
51
+ ctx.shift = shift
52
+ ctx.scale_shift_by_sum_over_axis = scale_shift_by_sum_over_axis
53
+ return x
54
+
55
+ @staticmethod
56
+ def backward(ctx, grad):
57
+ grad_out = grad
58
+ if isinstance(ctx.scale, torch.Tensor) or ctx.scale != 1:
59
+ grad_out = grad_out * ctx.scale
60
+ if isinstance(ctx.shift, torch.Tensor) or ctx.shift != 0:
61
+ if ctx.scale_shift_by_sum_over_axis is not None:
62
+ m = torch.sum(torch.abs(grad), dim=ctx.scale_shift_by_sum_over_axis, keepdim=True)
63
+ grad_out = grad_out + ctx.shift * m
64
+ else:
65
+ grad_out = grad_out + ctx.shift
66
+ return grad_out, None, None, None
67
+
68
+
69
+ def scaled_gradient_ext(
70
+ x: torch.Tensor, *, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[int] = None
71
+ ):
72
+ """
73
+ :param x:
74
+ :param scale: will scale gradient by this value
75
+ :param shift: will shift gradient by this value
76
+ :param scale_shift_by_sum_over_axis: if given, will scale and shift by the sum over the given axis
77
+ :return: just x, but gradient in backward pass will be transformed accordingly
78
+ """
79
+ return _ScaledGradientExt.apply(x, scale, shift, scale_shift_by_sum_over_axis)
@@ -0,0 +1,11 @@
1
+ """
2
+ Some mathematical functions, in pure NumPy.
3
+ """
4
+
5
+
6
+ from __future__ import annotations
7
+
8
+
9
+ def next_power_of_two(n: int) -> int:
10
+ """next power of two, >= n"""
11
+ return 2 ** (int(n - 1).bit_length())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: returnn
3
- Version: 1.20230814.164933
3
+ Version: 1.20230815.191535
4
4
  Summary: The RWTH extensible training framework for universal recurrent neural networks
5
5
  Home-page: https://github.com/rwth-i6/returnn/
6
6
  Author: Albert Zeyer
@@ -169,6 +169,7 @@ returnn/frontend/dropout.py
169
169
  returnn/frontend/dtype.py
170
170
  returnn/frontend/gradient.py
171
171
  returnn/frontend/init.py
172
+ returnn/frontend/label_smoothing.py
172
173
  returnn/frontend/linear.py
173
174
  returnn/frontend/loop.py
174
175
  returnn/frontend/loss.py
@@ -262,6 +263,7 @@ returnn/torch/frontend/_rand.py
262
263
  returnn/torch/frontend/bridge.py
263
264
  returnn/torch/functional/README.md
264
265
  returnn/torch/functional/__init__.py
266
+ returnn/torch/functional/scaled_gradient.py
265
267
  returnn/util/__init__.py
266
268
  returnn/util/basic.py
267
269
  returnn/util/better_exchook.py
@@ -270,6 +272,7 @@ returnn/util/debug.py
270
272
  returnn/util/debug_helpers.py
271
273
  returnn/util/fsa.py
272
274
  returnn/util/literal_py_to_pickle.py
275
+ returnn/util/math.py
273
276
  returnn/util/pprint.py
274
277
  returnn/util/py-to-pickle.cpp
275
278
  returnn/util/py_compat.py
@@ -328,6 +331,8 @@ tests/test_rf_const.py
328
331
  tests/test_rf_container.py
329
332
  tests/test_rf_conv.py
330
333
  tests/test_rf_encoder_conformer.py
334
+ tests/test_rf_gradient.py
335
+ tests/test_rf_label_smoothing.py
331
336
  tests/test_rf_loop.py
332
337
  tests/test_rf_math.py
333
338
  tests/test_rf_normalization.py
@@ -0,0 +1,33 @@
1
+ """
2
+ RETURNN frontend (returnn.frontend) tests
3
+ """
4
+
5
+ from __future__ import annotations
6
+ import _setup_test_env # noqa
7
+ import returnn.frontend as rf
8
+ from returnn.tensor import Tensor, Dim, TensorDict, batch_dim
9
+ from rf_utils import run_model
10
+
11
+
12
+ def test_scaled_gradient():
13
+ time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
14
+ in_dim = Dim(7, name="in")
15
+ extern_data = TensorDict(
16
+ {
17
+ "data": Tensor("data", [batch_dim, time_dim, in_dim], dtype="float32"),
18
+ }
19
+ )
20
+
21
+ # noinspection PyShadowingNames
22
+ def _forward_step(*, model: rf.Module, extern_data: TensorDict):
23
+ model # noqa # unused
24
+ data = extern_data["data"]
25
+ rf.set_requires_gradient(data)
26
+
27
+ out = rf.scaled_gradient(data, scale=-0.5)
28
+ out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))
29
+
30
+ grad = rf.gradient(rf.reduce_sum(out, axis=out.dims, use_mask=False), data)
31
+ grad.mark_as_output("grad")
32
+
33
+ run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step)
@@ -0,0 +1,39 @@
1
+ """
2
+ RETURNN frontend (returnn.frontend) tests
3
+ """
4
+
5
+ from __future__ import annotations
6
+ import _setup_test_env # noqa
7
+ import returnn.frontend as rf
8
+ from returnn.tensor import Tensor, Dim, TensorDict, batch_dim
9
+ from rf_utils import run_model
10
+
11
+
12
+ def test_label_smoothed_log_prob_gradient():
13
+ time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
14
+ vocab_dim = Dim(7, name="in")
15
+ extern_data = TensorDict(
16
+ {
17
+ "data": Tensor("data", [batch_dim, time_dim, vocab_dim], dtype="float32", feature_dim=vocab_dim),
18
+ "targets": Tensor("targets", [batch_dim, time_dim], dtype="int32", sparse_dim=vocab_dim),
19
+ }
20
+ )
21
+
22
+ # noinspection PyShadowingNames
23
+ def _forward_step(*, model: rf.Module, extern_data: TensorDict):
24
+ model # noqa # unused
25
+ data = extern_data["data"]
26
+ targets = extern_data["targets"]
27
+ rf.set_requires_gradient(data)
28
+
29
+ log_prob = rf.log_softmax(data, axis=vocab_dim)
30
+ out = rf.label_smoothed_log_prob_gradient(log_prob, 0.1)
31
+ loss = rf.cross_entropy(target=targets, estimated=log_prob, estimated_type="log-probs", axis=vocab_dim)
32
+
33
+ out.mark_as_default_output(shape=(batch_dim, time_dim, vocab_dim))
34
+ loss.mark_as_output("loss")
35
+
36
+ grad = rf.gradient(rf.reduce_sum(loss, axis=loss.dims), data)
37
+ grad.mark_as_output("grad")
38
+
39
+ run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step)