returnn 1.20240830.140746__tar.gz → 1.20240905.105440__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/PKG-INFO +1 -1
- returnn-1.20240905.105440/_setup_info_generated.py +2 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/distrib_files.py +26 -5
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_numpy_backend.py +15 -1
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/_dim_extra.py +124 -2
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/config_entry_points.py +3 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/data.py +1 -1
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/engine.py +51 -6
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/bridge.py +10 -7
- returnn-1.20240905.105440/returnn/torch/util/module.py +43 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn.egg-info/PKG-INFO +1 -1
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn.egg-info/SOURCES.txt +1 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/rf_utils.py +4 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_array.py +1 -1
- returnn-1.20240905.105440/tests/test_rf_decoder_transformer.py +324 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_frontend.py +23 -0
- returnn-1.20240830.140746/_setup_info_generated.py +0 -2
- returnn-1.20240830.140746/tests/test_rf_decoder_transformer.py +0 -163
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/.editorconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/.gitignore +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/.gitmodules +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/.kateconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/CHANGELOG.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/CODEOWNERS +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/CONTRIBUTING.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/LICENSE +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/MANIFEST.in +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/README.rst +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/12AX.cluster_map +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-fwd.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-list-devices.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-pretrain.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-rf-pt-benchmark.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-rf.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-torch.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/demo.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/pyproject.toml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/requirements.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/__main__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/__setup__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/config.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/audio.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/basic.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/cached.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/generating.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/lm.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/map.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/meta.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/postprocessing.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/engine/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/engine/base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/engine/batch.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/forward_iface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/backend.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/backend.hpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/module.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/module.hpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/py_utils.hpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/tensor_ops.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_native/tensor_ops.hpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_random_journal.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/array_.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/attention.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/audio/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/audio/mel.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/audio/specaugment.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/build_from_dict.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/cond.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/const.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/container.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conv.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conversions/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conversions/hf_llama.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/conversions/torch_nn.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/decoder/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/decoder/transformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/device.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/dims.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/encoder/e_branchformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/graph.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/hooks.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/init.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/label_smoothing.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/linear.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/loop.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/loss.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/math_.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/module.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/parametrizations.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/parametrize.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/piecewise_linear.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/rand.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/rec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/signal.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/state.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/stepwise_scheduler.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/frontend/types.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/import_/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/import_/common.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/import_/git.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/import_/import_.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/log.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/native_op.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/native_op.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/pretrain.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/cache.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/control.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/sprint/interface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/dim.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tensor/utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/compat.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/distributed.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/engine.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/loop.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/masked_computation.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/horovod.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/native_op.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/network.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/sprint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/updater.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/extern_data.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/queued_data_iter.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/distributed.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/frontend/raw_ops.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/updater.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/array_.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/diagnose_gpu.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/gradient_checkpoint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/torch/util/scaled_gradient.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/basic.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/bpe.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/debug.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/file_cache.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/fsa.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/math.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/native_code_compiler.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/pprint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/py-to-pickle.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/py_compat.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/py_ext_mod_compiler.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/result_with_reason.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/task_system.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/train_proc_manager.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn/util/watch_memory.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/rnn.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/setup.cfg +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/setup.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/DummySprintExec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/_setup_test_env.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/lint_common.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/pylint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/spelling.dic +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Config.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Fsa.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Log.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Pretrain.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_ResNet.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFEngine.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TFUtil.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_Util.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_demos.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_fork_exec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_attention.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_cond.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_const.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_container.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_conv.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_gradient.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_label_smoothing.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_loop.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_math.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_piecewise_linear.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_rec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_rf_signal.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_tensor.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_tools.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_engine.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/test_torch_util.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tests/torch_utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/collect-words.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/compile_native_op.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-forward.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-network-json.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/dump-pickle.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/get-attention-weights.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/hdf_dump.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/torch_avg_checkpoints.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/torch_export_to_onnx.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/torch_inspect_checkpoint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240905.105440}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
|
@@ -317,15 +317,36 @@ class DistributeFilesDataset(CachedDataset2):
|
|
|
317
317
|
return True
|
|
318
318
|
|
|
319
319
|
def _get_sub_dataset_dict(self, files: List[FileTree]) -> Dict[str, Any]:
|
|
320
|
+
import tree
|
|
321
|
+
|
|
320
322
|
dataset_dict = self.get_sub_epoch_dataset(files)
|
|
321
323
|
dataset_dict = extend_dataset_dict_from_parent_dataset(dataset_dict, parent_dataset=self)
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
324
|
+
|
|
325
|
+
flat_sub_dset = tree.flatten_with_path(dataset_dict)
|
|
326
|
+
|
|
327
|
+
part_epoch_cfg = next(
|
|
328
|
+
((path, v) for path, v in flat_sub_dset if path[-1] == "partition_epoch" and v != 1), None
|
|
329
|
+
)
|
|
330
|
+
if part_epoch_cfg is not None:
|
|
331
|
+
path, subeps = part_epoch_cfg
|
|
325
332
|
raise ValueError(
|
|
326
|
-
f"{self}: sub dataset should have
|
|
327
|
-
f"
|
|
333
|
+
f"{self}: sub dataset should not have partition_epoch, "
|
|
334
|
+
f'but got "partition_epoch": {subeps} at {".".join(path)} in {dataset_dict}.'
|
|
328
335
|
)
|
|
336
|
+
|
|
337
|
+
# Heuristic check for well-definedness of seq ordering. Might need to be extended in the
|
|
338
|
+
# future if there are other ways of defining a seq order than the ones below.
|
|
339
|
+
if (
|
|
340
|
+
not any(path[-1] == "seq_ordering" for path, _ in flat_sub_dset)
|
|
341
|
+
and not any(path[-1] == "seq_order_control_dataset" for path, _ in flat_sub_dset)
|
|
342
|
+
and not any(path[-1] == "map_seq_stream" for path, _ in flat_sub_dset)
|
|
343
|
+
):
|
|
344
|
+
raise ValueError(
|
|
345
|
+
f"{self}: there should be an explicit seq_ordering somewhere in the sub dataset "
|
|
346
|
+
f"(or seq_order_control_dataset for MetaDataset or map_seq_stream for PostprocessingDataset), "
|
|
347
|
+
f"but found none in {dataset_dict}."
|
|
348
|
+
)
|
|
349
|
+
|
|
329
350
|
return dataset_dict
|
|
330
351
|
|
|
331
352
|
@staticmethod
|
|
@@ -153,7 +153,10 @@ class NumpyBackend(Backend[numpy.ndarray]):
|
|
|
153
153
|
op = NumpyBackend._CombineKindMap.get(kind)
|
|
154
154
|
if not op:
|
|
155
155
|
raise ValueError(f"RF NumpyBackend: combine kind {kind!r} not supported")
|
|
156
|
-
|
|
156
|
+
res = op(a, b)
|
|
157
|
+
if not isinstance(res, numpy.ndarray):
|
|
158
|
+
res = numpy.array(res)
|
|
159
|
+
return res
|
|
157
160
|
|
|
158
161
|
@staticmethod
|
|
159
162
|
def range_over_dim(dim: Dim, *, dtype: Optional[str] = None, device: Optional[str] = None) -> Tensor[numpy.ndarray]:
|
|
@@ -211,3 +214,14 @@ class NumpyBackend(Backend[numpy.ndarray]):
|
|
|
211
214
|
sparse_dim=source.sparse_dim,
|
|
212
215
|
)
|
|
213
216
|
return res
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def activation_raw(raw_tensor: numpy.ndarray, func: str) -> numpy.ndarray:
|
|
220
|
+
"""
|
|
221
|
+
:param raw_tensor:
|
|
222
|
+
:param func: "tanh", "sigmoid", "relu", ...
|
|
223
|
+
:return: raw tensor with elementwise activation applied
|
|
224
|
+
"""
|
|
225
|
+
if func == "relu":
|
|
226
|
+
return numpy.array(numpy.maximum(raw_tensor, 0))
|
|
227
|
+
raise NotImplementedError("NumpyBackend: activation %r not implemented" % func)
|
|
@@ -4,8 +4,9 @@ or just rarely used attribs, such that we can save memory for the common case.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
|
-
from typing import TYPE_CHECKING, Optional, Union, Any, Tuple, Sequence, Dict, List, Set, Callable
|
|
7
|
+
from typing import TYPE_CHECKING, Optional, Union, Any, Tuple, Sequence, MutableMapping, Dict, List, Set, Callable
|
|
8
8
|
import operator
|
|
9
|
+
import weakref
|
|
9
10
|
|
|
10
11
|
from returnn.util.basic import Entity
|
|
11
12
|
from returnn.util import basic as util
|
|
@@ -118,7 +119,7 @@ class _DimExtra:
|
|
|
118
119
|
self.same_for_batch_ctx = {} # type: Dict[Tuple[BatchInfo,Optional[ControlFlowContext]],_d.Dim]
|
|
119
120
|
self.cache_dyn_size_ext_dev = {} # type: Dict[str,_t.Tensor] # device -> dyn_size_ext
|
|
120
121
|
self.cache_seq_mask: Dict[Tuple[str, Optional[Tuple[Dim, ...]]], _t.Tensor] = {} # (dev,dim_order) -> seq_mask
|
|
121
|
-
self.cache_dim_math
|
|
122
|
+
self.cache_dim_math = _CacheDimMath() # op (add,sub,...), operand -> Dim
|
|
122
123
|
|
|
123
124
|
def __getstate__(self):
|
|
124
125
|
d = vars(self).copy()
|
|
@@ -389,6 +390,10 @@ class _DimMixin:
|
|
|
389
390
|
if dim_extra:
|
|
390
391
|
# Any dims via dim math could also contain raw tensors,
|
|
391
392
|
# so iterate through them.
|
|
393
|
+
if dim.dyn_size_ext is not None or dim.dimension is None:
|
|
394
|
+
dim_extra.cache_dim_math.clear()
|
|
395
|
+
else:
|
|
396
|
+
dim_extra.cache_dim_math.clear_dynamic()
|
|
392
397
|
queue += dim_extra.cache_dim_math.values()
|
|
393
398
|
if dim_extra.same_as:
|
|
394
399
|
queue.append(dim_extra.same_as)
|
|
@@ -2873,6 +2878,123 @@ def dim_cmp_value(obj):
|
|
|
2873
2878
|
return obj
|
|
2874
2879
|
|
|
2875
2880
|
|
|
2881
|
+
class _CacheDimMath:
|
|
2882
|
+
"""op (add,sub,...), operand -> Dim"""
|
|
2883
|
+
|
|
2884
|
+
class _OperandCache:
|
|
2885
|
+
def __init__(self):
|
|
2886
|
+
self.dims: MutableMapping[Dim, Dim] = weakref.WeakKeyDictionary()
|
|
2887
|
+
self.statics: Dict[int, Dim] = {}
|
|
2888
|
+
|
|
2889
|
+
def __init__(self):
|
|
2890
|
+
self._ops: Dict[str, _CacheDimMath._OperandCache] = {}
|
|
2891
|
+
|
|
2892
|
+
def __repr__(self):
|
|
2893
|
+
return "_CacheDimMath({%s})" % ", ".join("%r: %r" % (k, v) for k, v in self.items())
|
|
2894
|
+
|
|
2895
|
+
def _get_op_dict(self, __key: Tuple[str, Union[Dim, int]]) -> _OperandCache:
|
|
2896
|
+
if __key[0] in self._ops:
|
|
2897
|
+
return self._ops[__key[0]]
|
|
2898
|
+
else:
|
|
2899
|
+
op_dict = self._OperandCache()
|
|
2900
|
+
self._ops[__key[0]] = op_dict
|
|
2901
|
+
return op_dict
|
|
2902
|
+
|
|
2903
|
+
def __setitem__(self, __key: Tuple[str, Union[Dim, int]], __value: Dim):
|
|
2904
|
+
op_dict = self._get_op_dict(__key)
|
|
2905
|
+
if isinstance(__key[1], int):
|
|
2906
|
+
value_dict = op_dict.statics
|
|
2907
|
+
else:
|
|
2908
|
+
value_dict = op_dict.dims
|
|
2909
|
+
if __key[1] in value_dict:
|
|
2910
|
+
value_dict[__key[1]] = __value
|
|
2911
|
+
return
|
|
2912
|
+
if len(value_dict) >= 5:
|
|
2913
|
+
# Just to avoid memory leaks.
|
|
2914
|
+
value_dict.clear()
|
|
2915
|
+
value_dict[__key[1]] = __value
|
|
2916
|
+
|
|
2917
|
+
def __delitem__(self, __key: Tuple[str, Union[Dim, int]]):
|
|
2918
|
+
op_dict = self._ops[__key[0]]
|
|
2919
|
+
if isinstance(__key[1], int):
|
|
2920
|
+
del op_dict.statics[__key[1]]
|
|
2921
|
+
else:
|
|
2922
|
+
del op_dict.dims[__key[1]]
|
|
2923
|
+
|
|
2924
|
+
def __getitem__(self, __key: Tuple[str, Union[Dim, int]]) -> Dim:
|
|
2925
|
+
op_dict = self._ops[__key[0]]
|
|
2926
|
+
if isinstance(__key[1], int):
|
|
2927
|
+
return op_dict.statics[__key[1]]
|
|
2928
|
+
else:
|
|
2929
|
+
return op_dict.dims[__key[1]]
|
|
2930
|
+
|
|
2931
|
+
def __contains__(self, __key: Tuple[str, Union[Dim, int]]) -> bool:
|
|
2932
|
+
op_dict = self._ops.get(__key[0])
|
|
2933
|
+
if not op_dict:
|
|
2934
|
+
return False
|
|
2935
|
+
if isinstance(__key[1], int):
|
|
2936
|
+
return __key[1] in op_dict.statics
|
|
2937
|
+
else:
|
|
2938
|
+
return __key[1] in op_dict.dims
|
|
2939
|
+
|
|
2940
|
+
def get(self, __key: Tuple[str, Union[Dim, int]], default: Optional[Dim] = None) -> Optional[Dim]:
|
|
2941
|
+
"""get"""
|
|
2942
|
+
op_dict = self._ops.get(__key[0])
|
|
2943
|
+
if not op_dict:
|
|
2944
|
+
return default
|
|
2945
|
+
if isinstance(__key[1], int):
|
|
2946
|
+
return op_dict.statics.get(__key[1], default)
|
|
2947
|
+
else:
|
|
2948
|
+
return op_dict.dims.get(__key[1], default)
|
|
2949
|
+
|
|
2950
|
+
def setdefault(self, __key: Tuple[str, Union[Dim, int]], __value: Dim):
|
|
2951
|
+
"""setdefault"""
|
|
2952
|
+
existing = self.get(__key)
|
|
2953
|
+
if existing is not None:
|
|
2954
|
+
return existing
|
|
2955
|
+
self[__key] = __value
|
|
2956
|
+
return __value
|
|
2957
|
+
|
|
2958
|
+
def clear(self):
|
|
2959
|
+
"""clear"""
|
|
2960
|
+
self._ops.clear()
|
|
2961
|
+
|
|
2962
|
+
def clear_dynamic(self):
|
|
2963
|
+
"""clear dynamic part"""
|
|
2964
|
+
for op_dict in self._ops.values():
|
|
2965
|
+
for k, v in list(op_dict.dims.items()):
|
|
2966
|
+
if v.dyn_size_ext is not None or v.dimension is None:
|
|
2967
|
+
del op_dict.dims[k]
|
|
2968
|
+
|
|
2969
|
+
def __len__(self):
|
|
2970
|
+
count = 0
|
|
2971
|
+
for op_dict in self._ops.values():
|
|
2972
|
+
count += len(op_dict.statics)
|
|
2973
|
+
count += len(op_dict.dims)
|
|
2974
|
+
return count
|
|
2975
|
+
|
|
2976
|
+
def items(self):
|
|
2977
|
+
"""items"""
|
|
2978
|
+
for op_name, op_dict in self._ops.items():
|
|
2979
|
+
for key, value in op_dict.statics.items():
|
|
2980
|
+
yield (op_name, key), value
|
|
2981
|
+
for key, value in op_dict.dims.items():
|
|
2982
|
+
yield (op_name, key), value
|
|
2983
|
+
|
|
2984
|
+
def keys(self):
|
|
2985
|
+
"""keys"""
|
|
2986
|
+
for k, v in self.items():
|
|
2987
|
+
yield k
|
|
2988
|
+
|
|
2989
|
+
def values(self):
|
|
2990
|
+
"""values"""
|
|
2991
|
+
for k, v in self.items():
|
|
2992
|
+
yield v
|
|
2993
|
+
|
|
2994
|
+
def __iter__(self):
|
|
2995
|
+
yield from self.keys()
|
|
2996
|
+
|
|
2997
|
+
|
|
2876
2998
|
def _behavior_version_reset_callback():
|
|
2877
2999
|
# Reset things we did in _handle_new_min_version.
|
|
2878
3000
|
_DimMixin._SimpleEquality = False
|
|
@@ -118,6 +118,9 @@ def get_net_dict(
|
|
|
118
118
|
# but now the TF engine actually wants to have Tensor[tf.Tensor].
|
|
119
119
|
# Reset it now. The TF engine should redefine it again.
|
|
120
120
|
elem.reset_batch_and_raw()
|
|
121
|
+
elif isinstance(elem, set):
|
|
122
|
+
# map_structure does not recurse into sets.
|
|
123
|
+
nest.map_structure(_cleanup_net_dict_value, sorted(list(elem)))
|
|
121
124
|
return elem
|
|
122
125
|
|
|
123
126
|
# Do some cleanup.
|
|
@@ -339,7 +339,7 @@ class BatchInfo:
|
|
|
339
339
|
|
|
340
340
|
# Ok, need to extend.
|
|
341
341
|
global_batch_dims = [dim for dim in all_virtual_dims if isinstance(dim, BatchInfo.GlobalBatchDim)]
|
|
342
|
-
assert len(global_batch_dims) == 1
|
|
342
|
+
assert len(global_batch_dims) == 1, f"got global_batch_dims={global_batch_dims!r}"
|
|
343
343
|
global_batch_dim = global_batch_dims[0]
|
|
344
344
|
assert base.virtual_dims == [global_batch_dim]
|
|
345
345
|
beams = [dim for dim in all_virtual_dims if isinstance(dim, BatchInfo.BeamDim)]
|
|
@@ -3,13 +3,15 @@ Main engine for PyTorch
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
|
-
from typing import Optional, Any, Union, Callable, Dict
|
|
6
|
+
from typing import Optional, Any, Union, Callable, Dict, Set
|
|
7
7
|
from contextlib import nullcontext
|
|
8
8
|
|
|
9
9
|
import gc
|
|
10
10
|
import os
|
|
11
11
|
import time
|
|
12
12
|
import socket
|
|
13
|
+
import fnmatch
|
|
14
|
+
import re
|
|
13
15
|
|
|
14
16
|
import torch
|
|
15
17
|
import torch.distributed
|
|
@@ -41,6 +43,7 @@ from .data import extern_data as extern_data_util
|
|
|
41
43
|
from .data.queued_data_iter import QueuedDataIter
|
|
42
44
|
from .frontend.bridge import rf_module_to_pt_module
|
|
43
45
|
from .util import diagnose_gpu
|
|
46
|
+
from .util import module as util_module
|
|
44
47
|
from .distributed import DistributedContext, get_ctx as dist_get_ctx
|
|
45
48
|
|
|
46
49
|
|
|
@@ -83,6 +86,7 @@ class Engine(EngineBase):
|
|
|
83
86
|
self._forward_step_expected_outputs = TensorDict()
|
|
84
87
|
self._forward_step_expected_outputs.update(self.config.typed_value("model_outputs"), auto_convert=True)
|
|
85
88
|
self._save_model_epoch_interval = 1
|
|
89
|
+
self._ignore_param_set: Set[str] = set() # for the updater and for saving the model checkpoint
|
|
86
90
|
self._updater = None # type: Optional[Updater]
|
|
87
91
|
|
|
88
92
|
self._use_autocast = False
|
|
@@ -721,6 +725,7 @@ class Engine(EngineBase):
|
|
|
721
725
|
|
|
722
726
|
self._create_model(epoch=epoch, step=step)
|
|
723
727
|
|
|
728
|
+
self._ignore_param_set.clear()
|
|
724
729
|
loaded_state_keys = set()
|
|
725
730
|
missing_keys = set()
|
|
726
731
|
unexpected_keys = set()
|
|
@@ -745,6 +750,7 @@ class Engine(EngineBase):
|
|
|
745
750
|
|
|
746
751
|
preload_from_files = self.config.typed_value("preload_from_files", {})
|
|
747
752
|
if preload_from_files:
|
|
753
|
+
model_state_keys_set = set(self._pt_model.state_dict().keys())
|
|
748
754
|
# see `preload_from_files` in tf engine and `returnn.tf.network.CustomCheckpointLoader`
|
|
749
755
|
# We use the reversed sorted order here to achieve consistent behavior with the TF engine.
|
|
750
756
|
# There, the keys are used in sorted order but if a variable is loaded,
|
|
@@ -754,12 +760,39 @@ class Engine(EngineBase):
|
|
|
754
760
|
# In order to get consistent behavior, we use the reversed order.
|
|
755
761
|
for preload_key, opts in reversed(sorted(preload_from_files.items())):
|
|
756
762
|
assert isinstance(opts, dict) and "filename" in opts
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
763
|
+
init_for_train = opts.get("init_for_train", False)
|
|
764
|
+
if init_for_train:
|
|
765
|
+
if isinstance(init_for_train, str) and init_for_train == "always":
|
|
766
|
+
# No matter if this is the first train epoch
|
|
767
|
+
# or training with loading some prev epoch,
|
|
768
|
+
# those parameters will always be loaded via preload_from_files,
|
|
769
|
+
# and thus also not stored in our own checkpoint.
|
|
770
|
+
pass
|
|
771
|
+
elif isinstance(init_for_train, bool) and init_for_train:
|
|
772
|
+
if not is_first_train_epoch:
|
|
773
|
+
continue
|
|
774
|
+
else:
|
|
775
|
+
raise ValueError(
|
|
776
|
+
f"preload key {preload_key}:"
|
|
777
|
+
f" invalid init_for_train value {init_for_train!r} (type {type(init_for_train).__name__})"
|
|
778
|
+
)
|
|
760
779
|
else: # default: init for recog
|
|
761
780
|
if is_training:
|
|
762
781
|
continue
|
|
782
|
+
if opts["filename"] is None:
|
|
783
|
+
print(f"Pre-load (initialize) weights for key '{preload_key}'", file=log.v3)
|
|
784
|
+
pattern = opts["pattern"]
|
|
785
|
+
match = re.compile(fnmatch.translate(pattern)).match
|
|
786
|
+
remove = []
|
|
787
|
+
for name in self._pt_model.state_dict().keys():
|
|
788
|
+
if match(name) and name in missing_keys:
|
|
789
|
+
remove.append(name)
|
|
790
|
+
if remove:
|
|
791
|
+
print(f"Randomly initialize params: {remove}", file=log.v3)
|
|
792
|
+
missing_keys.difference_update(remove)
|
|
793
|
+
else:
|
|
794
|
+
print("(No relevant parameters matching.)", file=log.v3)
|
|
795
|
+
continue
|
|
763
796
|
print(f"Pre-load weights for key '{preload_key}' from {opts['filename']}", file=log.v3)
|
|
764
797
|
preload_model_state = torch.load(opts["filename"])
|
|
765
798
|
if opts.get("checkpoint_key", "model") is not None:
|
|
@@ -786,6 +819,8 @@ class Engine(EngineBase):
|
|
|
786
819
|
preload_model_state.pop(key)
|
|
787
820
|
for new_name, name_in_checkpoint in opts.get("var_name_mapping", {}).items():
|
|
788
821
|
preload_model_state[new_name] = preload_model_state.pop(name_in_checkpoint)
|
|
822
|
+
if init_for_train == "always":
|
|
823
|
+
self._ignore_param_set.update(set(preload_model_state.keys()).intersection(model_state_keys_set))
|
|
789
824
|
missing_keys_preload, unexpected_keys_preload = self._pt_model.load_state_dict(
|
|
790
825
|
preload_model_state, strict=False
|
|
791
826
|
)
|
|
@@ -797,7 +832,7 @@ class Engine(EngineBase):
|
|
|
797
832
|
if opts.get("prefix", ""):
|
|
798
833
|
prefix_keys = [key for key in self._pt_model.state_dict() if key.startswith(opts.get("prefix", ""))]
|
|
799
834
|
else:
|
|
800
|
-
prefix_keys =
|
|
835
|
+
prefix_keys = model_state_keys_set
|
|
801
836
|
missing_keys_preload = (
|
|
802
837
|
set(prefix_keys).intersection(set(missing_keys_preload)).difference(loaded_state_keys)
|
|
803
838
|
)
|
|
@@ -816,6 +851,9 @@ class Engine(EngineBase):
|
|
|
816
851
|
)
|
|
817
852
|
unexpected_keys.update(unexpected_keys_preload)
|
|
818
853
|
|
|
854
|
+
if self._ignore_param_set:
|
|
855
|
+
util_module.convert_parameters_to_buffers(self._pt_model, self._ignore_param_set, persistent=False)
|
|
856
|
+
|
|
819
857
|
if missing_keys:
|
|
820
858
|
raise Exception(
|
|
821
859
|
"\n".join(
|
|
@@ -913,9 +951,16 @@ class Engine(EngineBase):
|
|
|
913
951
|
tmp_filename = filename + ".tmp_write"
|
|
914
952
|
if os.path.exists(tmp_filename):
|
|
915
953
|
os.unlink(tmp_filename)
|
|
954
|
+
state_dict = self._pt_model.state_dict()
|
|
955
|
+
if self._ignore_param_set:
|
|
956
|
+
# Do some extra check that we don't save the ignored parameters.
|
|
957
|
+
# Should not be in the state_dict anymore because we should have converted them to buffers
|
|
958
|
+
# via util_module.convert_parameters_to_buffers before.
|
|
959
|
+
remaining = set(state_dict.keys()).intersection(self._ignore_param_set)
|
|
960
|
+
assert not remaining, f"_save_model: found remaining params in state_dict to ignore: {remaining}"
|
|
916
961
|
torch.save(
|
|
917
962
|
{
|
|
918
|
-
"model":
|
|
963
|
+
"model": state_dict,
|
|
919
964
|
"epoch": self.epoch,
|
|
920
965
|
"step": self.global_train_step,
|
|
921
966
|
"effective_learning_rate": self._updater.get_effective_learning_rate() if self._updater else None,
|
|
@@ -149,13 +149,16 @@ class RFModuleAsPTModule(torch.nn.Module):
|
|
|
149
149
|
for name, rf_param in self._rf_module.named_parameters(recurse=False):
|
|
150
150
|
pt_param = getattr(self, name)
|
|
151
151
|
if rf_param.auxiliary and self._aux_params_as_buffers:
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
152
|
+
if not isinstance(pt_param, torch.nn.Parameter):
|
|
153
|
+
assert isinstance(pt_param, torch.Tensor) # but not torch.nn.Parameter
|
|
154
|
+
# See similar logic in torch.nn.Module._apply.
|
|
155
|
+
pt_param = torch.nn.Parameter(pt_param, pt_param.requires_grad)
|
|
156
|
+
# Otherwise, we do not care whether it is a torch.nn.Parameter or not.
|
|
157
|
+
# Its type might have changed due to convert_parameters_to_buffers.
|
|
158
|
+
# Just make sure it is a tensor.
|
|
159
|
+
assert isinstance(pt_param, torch.Tensor)
|
|
160
|
+
# noinspection PyProtectedMember
|
|
161
|
+
rf_param.dtype = rf_param._raw_backend.get_dtype_name_raw(pt_param) # dtype might have changed
|
|
159
162
|
rf_param.raw_tensor = pt_param
|
|
160
163
|
|
|
161
164
|
def register_parameter(self, name: str, param: Optional[torch.nn.Parameter]) -> None:
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utils for modules
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from typing import Collection
|
|
7
|
+
import torch
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def convert_parameters_to_buffers(
|
|
11
|
+
module: torch.nn.Module, parameter_names: Collection[str], *, deep: bool = True, persistent: bool
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
:param module:
|
|
15
|
+
:param parameter_names:
|
|
16
|
+
:param deep: parameter_name can contain '.' to access submodules
|
|
17
|
+
:param persistent: whether the buffer is persistent. if True, the buffer will be saved to the state_dict.
|
|
18
|
+
passed to module.register_buffer.
|
|
19
|
+
"""
|
|
20
|
+
for parameter_name in parameter_names:
|
|
21
|
+
convert_parameter_to_buffer(module, parameter_name, deep=deep, persistent=persistent)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def convert_parameter_to_buffer(module: torch.nn.Module, parameter_name: str, *, deep: bool = True, persistent: bool):
|
|
25
|
+
"""
|
|
26
|
+
:param module:
|
|
27
|
+
:param parameter_name:
|
|
28
|
+
:param deep: parameter_name can contain '.' to access submodules
|
|
29
|
+
:param persistent: whether the buffer is persistent. if True, the buffer will be saved to the state_dict.
|
|
30
|
+
passed to module.register_buffer.
|
|
31
|
+
"""
|
|
32
|
+
if "." in parameter_name:
|
|
33
|
+
if not deep:
|
|
34
|
+
raise ValueError("parameter_name can't contain '.' when deep is False")
|
|
35
|
+
module_path, _, parameter_name = parameter_name.rpartition(".")
|
|
36
|
+
module = module.get_submodule(module_path)
|
|
37
|
+
|
|
38
|
+
parameter = getattr(module, parameter_name)
|
|
39
|
+
if not isinstance(parameter, torch.nn.Parameter):
|
|
40
|
+
raise ValueError(f"{parameter_name} is not a torch.nn.Parameter, got type {type(parameter).__name__}")
|
|
41
|
+
delattr(module, parameter_name)
|
|
42
|
+
parameter.requires_grad = False
|
|
43
|
+
module.register_buffer(parameter_name, parameter, persistent=persistent)
|
|
@@ -302,6 +302,7 @@ returnn/torch/util/__init__.py
|
|
|
302
302
|
returnn/torch/util/array_.py
|
|
303
303
|
returnn/torch/util/diagnose_gpu.py
|
|
304
304
|
returnn/torch/util/gradient_checkpoint.py
|
|
305
|
+
returnn/torch/util/module.py
|
|
305
306
|
returnn/torch/util/scaled_gradient.py
|
|
306
307
|
returnn/util/__init__.py
|
|
307
308
|
returnn/util/basic.py
|
|
@@ -185,6 +185,9 @@ def _run_model_net_dict_tf(
|
|
|
185
185
|
|
|
186
186
|
from returnn.tf.frontend_layers.config_entry_points import get_net_dict
|
|
187
187
|
|
|
188
|
+
# noinspection PyProtectedMember
|
|
189
|
+
from returnn.frontend import _backend
|
|
190
|
+
|
|
188
191
|
config = Config(
|
|
189
192
|
{
|
|
190
193
|
"debug_runtime_sanity_checks": True,
|
|
@@ -203,6 +206,7 @@ def _run_model_net_dict_tf(
|
|
|
203
206
|
outputs_layers = rf.get_run_ctx().outputs
|
|
204
207
|
print("*** outputs:", outputs_layers)
|
|
205
208
|
|
|
209
|
+
_backend.select_backend_tf()
|
|
206
210
|
net = TFNetwork(config=config, train_flag=False)
|
|
207
211
|
net.construct_from_dict(net_dict)
|
|
208
212
|
|
|
@@ -238,7 +238,7 @@ def test_pad_time_right():
|
|
|
238
238
|
assert data_.dims == (batch_dim, time_dim, in_dim)
|
|
239
239
|
new_time_dim = out_.dims[1]
|
|
240
240
|
assert out_.dims == (batch_dim, new_time_dim, in_dim) and new_time_dim != time_dim
|
|
241
|
-
assert new_time_dim == time_dim + 1 # math dim... not really necessary check here...
|
|
241
|
+
# assert new_time_dim == time_dim + 1 # math dim... not really necessary check here...
|
|
242
242
|
assert time_dim.dyn_size_ext.dims == new_time_dim.dyn_size_ext.dims == (batch_dim,)
|
|
243
243
|
batch_size = batch_dim.get_dim_value()
|
|
244
244
|
assert batch_size > 1
|