returnn 1.20250109.145311__tar.gz → 1.20250114.164134__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- {returnn-1.20250109.145311/returnn.egg-info → returnn-1.20250114.164134}/PKG-INFO +1 -1
- returnn-1.20250114.164134/_setup_info_generated.py +2 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/requirements.txt +0 -1
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/__main__.py +1 -2
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/distrib_files.py +1 -2
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/postprocessing.py +4 -1
- returnn-1.20250114.164134/returnn/frontend/_cache.py +208 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/attention.py +12 -12
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/conversions/hf_llama.py +7 -4
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/debug.py +13 -0
- returnn-1.20250114.164134/returnn/util/lru_cache.py +309 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/py-to-pickle.cpp +1 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134/returnn.egg-info}/PKG-INFO +1 -1
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn.egg-info/SOURCES.txt +3 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_Dataset.py +8 -3
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_Log.py +4 -1
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_attention.py +17 -9
- returnn-1.20250114.164134/tests/test_threading.py +88 -0
- returnn-1.20250109.145311/_setup_info_generated.py +0 -2
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/.editorconfig +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/.gitignore +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/.gitmodules +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/.kateconfig +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/CHANGELOG.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/CODEOWNERS +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/CONTRIBUTING.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/LICENSE +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/MANIFEST.in +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/README.rst +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/12AX.cluster_map +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-fwd.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-list-devices.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-pretrain.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-rf-pt-benchmark.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-rf.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-torch.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/demo.sh +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/pyproject.toml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/__setup__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/config.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/audio.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/basic.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/cached.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/generating.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/lm.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/map.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/meta.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/engine/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/engine/base.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/engine/batch.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/forward_iface.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_native/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_native/backend.cpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_native/backend.hpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_native/module.cpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_native/module.hpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_native/py_utils.hpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_native/tensor_ops.cpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_native/tensor_ops.hpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_numpy_backend.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_random_journal.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/array_.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/audio/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/audio/mel.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/audio/specaugment.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/backend.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/build_from_dict.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/cond.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/const.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/container.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/conv.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/conversions/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/conversions/torch_nn.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/decoder/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/decoder/transformer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/device.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/dims.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/encoder/conformer_v2.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/encoder/e_branchformer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/encoder/transformer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/graph.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/hooks.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/init.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/label_smoothing.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/linear.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/loop.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/loss.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/math_.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/module.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/parametrizations.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/parametrize.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/piecewise_linear.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/rand.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/rec.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/signal.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/state.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/stepwise_scheduler.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/types.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/import_/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/import_/common.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/import_/git.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/import_/import_.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/log.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/native_op.cpp +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/native_op.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/pretrain.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/sprint/cache.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/sprint/control.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/sprint/interface.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/_dim_extra.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/dim.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tensor/utils.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/compat.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/distributed.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/engine.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/loop.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/masked_computation.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/horovod.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/native_op.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/network.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/sprint.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/updater.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/util/data.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/data/extern_data.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/data/queued_data_iter.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/distributed.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/engine.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/frontend/raw_ops.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/optim/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/optim/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/optim/lion.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/updater.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/array_.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/debug_inf_nan.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/diagnose_gpu.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/exception_helper.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/gradient_checkpoint.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/module.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/torch/util/scaled_gradient.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/__init__.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/basic.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/bpe.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/file_cache.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/fsa.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/math.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/native_code_compiler.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/pprint.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/py_compat.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/py_ext_mod_compiler.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/result_with_reason.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/task_system.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/train_proc_manager.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/util/watch_memory.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/rnn.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/setup.cfg +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/setup.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/DummySprintExec.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/_setup_test_env.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/lint_common.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/pylint.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/rf_utils.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/spelling.dic +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_Config.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_Fsa.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_Pretrain.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_ResNet.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TFEngine.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TFUtil.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_Util.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_demos.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_fork_exec.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_array.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_base.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_cond.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_const.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_container.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_conv.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_decoder_transformer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_gradient.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_label_smoothing.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_loop.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_math.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_piecewise_linear.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_rec.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_rf_signal.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_tensor.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_tools.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_torch_dataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_torch_engine.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/test_torch_util.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tests/torch_utils.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/collect-words.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/compile_native_op.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/dump-dataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/dump-forward.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/dump-network-json.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/dump-pickle.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/get-attention-weights.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/hdf_dump.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/torch_avg_checkpoints.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/torch_export_to_onnx.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/torch_inspect_checkpoint.py +0 -0
- {returnn-1.20250109.145311 → returnn-1.20250114.164134}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
|
@@ -513,10 +513,9 @@ def finalize(error_occurred=False):
|
|
|
513
513
|
destroy_process_group()
|
|
514
514
|
|
|
515
515
|
|
|
516
|
-
def need_data():
|
|
516
|
+
def need_data() -> bool:
|
|
517
517
|
"""
|
|
518
518
|
:return: whether we need to init the data (call :func:`init_data`) for the current task (:func:`execute_main_task`)
|
|
519
|
-
:rtype: bool
|
|
520
519
|
"""
|
|
521
520
|
if config.has("need_data") and not config.bool("need_data", True):
|
|
522
521
|
return False
|
|
@@ -364,8 +364,7 @@ class DistributeFilesDataset(CachedDataset2):
|
|
|
364
364
|
Distributes the files from files_order into ``num_bins`` while attempting
|
|
365
365
|
to make every bin as evenly sized (based on ``file_sizes``) as possible.
|
|
366
366
|
"""
|
|
367
|
-
|
|
368
|
-
total_size = sum(file_sizes.values())
|
|
367
|
+
total_size = sum(file_sizes[_get_key_for_file_tree(f_tree)] for f_tree in files_order)
|
|
369
368
|
avg_size_per_sub_epoch = total_size / num_bins
|
|
370
369
|
# Now evenly distribute the files over the bins.
|
|
371
370
|
# Note that many one-pass variants of algorithms to evenly distribute
|
|
@@ -138,11 +138,13 @@ class PostprocessingDataset(CachedDataset2):
|
|
|
138
138
|
self._in_tensor_dict_template = TensorDict(
|
|
139
139
|
{name: self._make_tensor_template_from_input(name) for name in self._dataset.get_data_keys()}
|
|
140
140
|
)
|
|
141
|
+
self.labels = {}
|
|
141
142
|
if self._map_outputs is not None:
|
|
142
143
|
self._out_tensor_dict_template = TensorDict()
|
|
143
144
|
self._out_tensor_dict_template.update(self._map_outputs, auto_convert=True)
|
|
144
145
|
else:
|
|
145
146
|
self._out_tensor_dict_template = self._in_tensor_dict_template.copy_template()
|
|
147
|
+
self.labels = self._dataset.labels.copy()
|
|
146
148
|
# update only after _out_tensor_dict_template has been created from _in_tensor_dict_template
|
|
147
149
|
self._in_tensor_dict_template.update({"seq_tag": {"dims": (), "dtype": "string"}}, auto_convert=True)
|
|
148
150
|
self.num_outputs = {
|
|
@@ -152,8 +154,9 @@ class PostprocessingDataset(CachedDataset2):
|
|
|
152
154
|
self._default_input = "data" if "data" in self.num_outputs else next(iter(self.num_outputs.keys()))
|
|
153
155
|
self.num_inputs = self.num_outputs[self._default_input][0]
|
|
154
156
|
|
|
155
|
-
self.labels = {}
|
|
156
157
|
for k, t in self._out_tensor_dict_template.data.items():
|
|
158
|
+
if self.labels.get(k):
|
|
159
|
+
continue
|
|
157
160
|
if t.vocab:
|
|
158
161
|
self.labels[k] = t.vocab.labels
|
|
159
162
|
elif t.sparse_dim: # sparse_dim but not vocab
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cache, to store some data.
|
|
3
|
+
See :class:`Cache`.
|
|
4
|
+
|
|
5
|
+
One use case example is :func:`sinusoidal_positional_encoding` and :func:`relative_positional_encoding`.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
from typing import Optional, Union, Any, Type, Callable, Tuple, Dict
|
|
10
|
+
from weakref import ref
|
|
11
|
+
import tree
|
|
12
|
+
from returnn.util.lru_cache import lru_cache
|
|
13
|
+
from returnn.tensor import Tensor, Dim
|
|
14
|
+
import returnn.frontend as rf
|
|
15
|
+
from returnn.frontend._backend import global_backend, get_backend_by_raw_tensor_type, Backend
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Cache:
|
|
19
|
+
"""
|
|
20
|
+
Cache, intended for internal use of RF functions.
|
|
21
|
+
|
|
22
|
+
One use case example is :func:`sinusoidal_positional_encoding` and :func:`relative_positional_encoding`.
|
|
23
|
+
|
|
24
|
+
There are some specific properties we must take care of:
|
|
25
|
+
|
|
26
|
+
- Lifetime of values: For graph-based backends, it can only stay alive for the current run ctx.
|
|
27
|
+
(For eager-based backends, there is no such restriction.)
|
|
28
|
+
- Size: Put some limit, use LRU logic.
|
|
29
|
+
- Dims: Use only weakrefs. Some Dim should not stay alive just because of the cache.
|
|
30
|
+
- Scalar dynamic Dims in eager mode, or static dims: Instead of the Dim, use the dim value for the key
|
|
31
|
+
(and map the output to the Dim).
|
|
32
|
+
- Tensor as keys: Use weakrefs. Also don't check by value but by identity.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, max_size: int):
|
|
36
|
+
# Use lru_cache here, but not via a decorator,
|
|
37
|
+
# as we want custom set/get logic.
|
|
38
|
+
# Also, we want the lru_cache to be local to this Cache instance,
|
|
39
|
+
# not shared over all instances of this class.
|
|
40
|
+
self._lru_cache = lru_cache(max_size)(_lru_cache_dummy_func)
|
|
41
|
+
|
|
42
|
+
def get(self, key, default=None):
|
|
43
|
+
"""
|
|
44
|
+
:param key:
|
|
45
|
+
:param default:
|
|
46
|
+
:return: entry in cache or default
|
|
47
|
+
"""
|
|
48
|
+
key_transformed = _transform_key(key)
|
|
49
|
+
key_transformed_orig, value = self._lru_cache.cache_peek(key_transformed, fallback=(None, None))
|
|
50
|
+
if key_transformed_orig is None:
|
|
51
|
+
return default
|
|
52
|
+
|
|
53
|
+
assert len(key_transformed_orig) == len(key_transformed)
|
|
54
|
+
dim_map = {} # orig -> new
|
|
55
|
+
for key_item_orig, key_item in zip(key_transformed_orig, key_transformed):
|
|
56
|
+
if isinstance(key_item_orig, DimWrapper):
|
|
57
|
+
assert isinstance(key_item, DimWrapper)
|
|
58
|
+
dim_orig = key_item_orig.dim_ref()
|
|
59
|
+
dim = key_item.dim_ref()
|
|
60
|
+
assert isinstance(dim_orig, Dim) and isinstance(dim, Dim)
|
|
61
|
+
dim_map[dim_orig] = dim
|
|
62
|
+
|
|
63
|
+
# noinspection PyShadowingNames
|
|
64
|
+
def _map_output(output):
|
|
65
|
+
if isinstance(output, Dim):
|
|
66
|
+
return dim_map.get(output, output)
|
|
67
|
+
if isinstance(output, Tensor):
|
|
68
|
+
if any(dim in dim_map for dim in output.dims):
|
|
69
|
+
out_raw = output.raw_tensor
|
|
70
|
+
for axis, dim in enumerate(output.dims):
|
|
71
|
+
if dim in dim_map:
|
|
72
|
+
output = output.copy_template_replace_dim_tag(axis=axis, new_dim_tag=dim_map[dim])
|
|
73
|
+
output.raw_tensor = out_raw
|
|
74
|
+
return output
|
|
75
|
+
|
|
76
|
+
return tree.map_structure(_map_output, value)
|
|
77
|
+
|
|
78
|
+
def set(self, key, value):
|
|
79
|
+
"""
|
|
80
|
+
:param key:
|
|
81
|
+
:param value:
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def _finalize_callback(*_args):
|
|
85
|
+
self._lru_cache.cache_pop(key_transformed, fallback=None)
|
|
86
|
+
|
|
87
|
+
key_backend = _get_backend(key)
|
|
88
|
+
value_backend = _get_backend(value)
|
|
89
|
+
if key_backend != value_backend:
|
|
90
|
+
raise ValueError(f"key and value have different backends: {key_backend} != {value_backend}")
|
|
91
|
+
key_transformed = _transform_key(key, finalize_callback=_finalize_callback)
|
|
92
|
+
self._lru_cache.cache_set(key_transformed, result=(key_transformed, value))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _lru_cache_dummy_func(*_args, **_kwargs):
|
|
96
|
+
raise Exception("This should not be called.")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _transform_key(
|
|
100
|
+
key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Optional[Dict[Dim, DimWrapper]] = None
|
|
101
|
+
) -> Tuple[Union[Type[Backend], ref[rf.RunCtx], _KeyItemType], ...]:
|
|
102
|
+
backend = _get_backend(key)
|
|
103
|
+
keys_flat = [backend]
|
|
104
|
+
if not backend.executing_eagerly():
|
|
105
|
+
# See comment above: If graph-mode, the cached value becomes invalid
|
|
106
|
+
# when the current run ctx goes out of scope.
|
|
107
|
+
keys_flat.append(ref(rf.get_run_ctx(), finalize_callback))
|
|
108
|
+
if collected_dim_map is None:
|
|
109
|
+
collected_dim_map = {}
|
|
110
|
+
keys_flat += [
|
|
111
|
+
_transform_key_item(key, finalize_callback=finalize_callback, collected_dim_map=collected_dim_map)
|
|
112
|
+
for key in tree.flatten(key)
|
|
113
|
+
]
|
|
114
|
+
return tuple(keys_flat)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _transform_key_item(
|
|
118
|
+
key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Dict[Dim, DimWrapper]
|
|
119
|
+
) -> _KeyItemType:
|
|
120
|
+
if isinstance(key, Tensor):
|
|
121
|
+
return TensorWrapper(key, finalize_callback=finalize_callback)
|
|
122
|
+
if isinstance(key, Dim):
|
|
123
|
+
if key in collected_dim_map:
|
|
124
|
+
return collected_dim_map[key]
|
|
125
|
+
dim_wrapper = DimWrapper(key, finalize_callback=finalize_callback)
|
|
126
|
+
collected_dim_map[key] = dim_wrapper
|
|
127
|
+
return dim_wrapper
|
|
128
|
+
if not isinstance(key, _RawTypes):
|
|
129
|
+
raise TypeError(f"unexpected type {type(key)}")
|
|
130
|
+
return key
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _get_backend(*args) -> Type[Backend]:
|
|
134
|
+
args_flat = tree.flatten(args)
|
|
135
|
+
for arg in args_flat:
|
|
136
|
+
if isinstance(arg, Tensor) and arg.raw_tensor is not None:
|
|
137
|
+
return get_backend_by_raw_tensor_type(type(arg.raw_tensor))
|
|
138
|
+
return global_backend.__class__
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class TensorWrapper:
|
|
142
|
+
"""
|
|
143
|
+
Wraps :class:`Tensor`.
|
|
144
|
+
Using weakref for the tensor, including also ``raw_tensor``.
|
|
145
|
+
Equality is given if the identity is the same, for the Tensor itself and the raw_tensor.
|
|
146
|
+
No value of the tensor is checked.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self, value: Tensor, *, finalize_callback):
|
|
150
|
+
self.value_ref = ref(value, finalize_callback)
|
|
151
|
+
self.raw_value_ref = ref(value.raw_tensor, finalize_callback)
|
|
152
|
+
self._hash = id(value)
|
|
153
|
+
|
|
154
|
+
def __eq__(self, other):
|
|
155
|
+
if isinstance(other, TensorWrapper):
|
|
156
|
+
return self.value_ref() is other.value_ref() and self.raw_value_ref() is other.raw_value_ref()
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
def __hash__(self):
|
|
160
|
+
return self._hash
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class DimWrapper:
|
|
164
|
+
"""
|
|
165
|
+
Wraps :class:`Dim`.
|
|
166
|
+
Using weakref for the dim.
|
|
167
|
+
If the size is scalar and known, equality is given when the size is equal (and dim tag is ignored)
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def __init__(self, dim: Dim, *, finalize_callback):
|
|
171
|
+
self.dim_value = _dim_value_for_key(dim)
|
|
172
|
+
# finalize_callback only needed when we don't use the dim value.
|
|
173
|
+
self.dim_ref = ref(dim, finalize_callback if self.dim_value is None else None)
|
|
174
|
+
self.dyn_size_ref = (
|
|
175
|
+
# E.g. consider the batch dim or data spatial dim which would be reset each step.
|
|
176
|
+
# We need some ref to the dyn size, and finalize this key when it goes out of scope.
|
|
177
|
+
# This is only needed when there is no info on the static size (or eager scalar dyn size).
|
|
178
|
+
ref(dim.dyn_size_ext.raw_tensor, finalize_callback)
|
|
179
|
+
if self.dim_value is None and dim.dyn_size_ext and dim.dyn_size_ext.raw_tensor is not None
|
|
180
|
+
else None
|
|
181
|
+
)
|
|
182
|
+
self._hash = hash(dim) if self.dim_value is None else hash(self.dim_value)
|
|
183
|
+
|
|
184
|
+
def __eq__(self, other):
|
|
185
|
+
if isinstance(other, DimWrapper):
|
|
186
|
+
if self.dim_value is not None:
|
|
187
|
+
return self.dim_value == other.dim_value
|
|
188
|
+
return self.dim_ref() == other.dim_ref() and self.dyn_size_ref() is other.dyn_size_ref()
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
def __hash__(self):
|
|
192
|
+
return self._hash
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _dim_value_for_key(dim: Dim) -> Optional[int]:
|
|
196
|
+
if dim.size is not None:
|
|
197
|
+
return dim.size
|
|
198
|
+
if dim.dyn_size_ext and not dim.dyn_size_ext.dims:
|
|
199
|
+
if dim.dyn_size_ext.raw_tensor is not None:
|
|
200
|
+
# noinspection PyProtectedMember
|
|
201
|
+
if dim.dyn_size_ext._raw_backend.executing_eagerly():
|
|
202
|
+
return int(dim.get_dim_value())
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# For now... we might extend it by some more types.
|
|
207
|
+
_KeyItemType = Union[None, str, bool, int, float, TensorWrapper, DimWrapper]
|
|
208
|
+
_RawTypes = (type(None), str, bool, int, float)
|
|
@@ -4,10 +4,10 @@ Attention
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Tuple, Union, Optional, Sequence
|
|
7
|
-
import weakref
|
|
8
7
|
import logging
|
|
9
8
|
from returnn.tensor import Tensor, Dim, single_step_dim
|
|
10
9
|
import returnn.frontend as rf
|
|
10
|
+
from returnn.frontend._cache import Cache
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
@@ -330,7 +330,7 @@ class RotaryPosCausalSelfAttention(CausalSelfAttention):
|
|
|
330
330
|
q = _apply_rope(
|
|
331
331
|
q,
|
|
332
332
|
(
|
|
333
|
-
rf.gather(pos_enc, axis=hist_dim, indices=
|
|
333
|
+
rf.gather(pos_enc, axis=hist_dim, indices=rf.last_frame_position_of_dim(hist_dim))
|
|
334
334
|
if axis == single_step_dim
|
|
335
335
|
else rf.replace_dim(pos_enc, in_dim=hist_dim, out_dim=axis)[0]
|
|
336
336
|
),
|
|
@@ -892,7 +892,7 @@ def _make_indices(
|
|
|
892
892
|
return indices, out_spatial_dim
|
|
893
893
|
|
|
894
894
|
|
|
895
|
-
_relative_positional_encoding_cache =
|
|
895
|
+
_relative_positional_encoding_cache = Cache(128)
|
|
896
896
|
|
|
897
897
|
|
|
898
898
|
def relative_positional_encoding(
|
|
@@ -924,10 +924,10 @@ def relative_positional_encoding(
|
|
|
924
924
|
"""
|
|
925
925
|
if not dtype:
|
|
926
926
|
dtype = rf.get_default_float_dtype()
|
|
927
|
-
cache = _relative_positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
|
|
928
927
|
cache_key = (query_spatial_dim, key_value_spatial_dim, feat_dim, query_offset, dtype)
|
|
929
|
-
|
|
930
|
-
|
|
928
|
+
cache_entry = _relative_positional_encoding_cache.get(cache_key)
|
|
929
|
+
if cache_entry is not None:
|
|
930
|
+
return cache_entry
|
|
931
931
|
import math
|
|
932
932
|
|
|
933
933
|
with rf.control_flow_ctx(None):
|
|
@@ -946,11 +946,11 @@ def relative_positional_encoding(
|
|
|
946
946
|
allow_missing_implicit_dims=True,
|
|
947
947
|
)
|
|
948
948
|
emb.feature_dim = feat_dim
|
|
949
|
-
|
|
949
|
+
_relative_positional_encoding_cache.set(cache_key, (emb, out_spatial_dim))
|
|
950
950
|
return emb, out_spatial_dim
|
|
951
951
|
|
|
952
952
|
|
|
953
|
-
_sinusoidal_positional_encoding_cache =
|
|
953
|
+
_sinusoidal_positional_encoding_cache = Cache(128) # (spatial_dim, feat_dim) -> enc
|
|
954
954
|
|
|
955
955
|
|
|
956
956
|
def sinusoidal_positional_encoding(
|
|
@@ -982,10 +982,10 @@ def sinusoidal_positional_encoding(
|
|
|
982
982
|
dtype = rf.get_default_float_dtype()
|
|
983
983
|
if not device:
|
|
984
984
|
device = rf.get_default_device()
|
|
985
|
-
cache = _sinusoidal_positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
|
|
986
985
|
cache_key = (spatial_dim, feat_dim, offset, base, dtype, device)
|
|
987
|
-
|
|
988
|
-
|
|
986
|
+
cache_entry = _sinusoidal_positional_encoding_cache.get(cache_key)
|
|
987
|
+
if cache_entry is not None:
|
|
988
|
+
return cache_entry
|
|
989
989
|
import math
|
|
990
990
|
|
|
991
991
|
with rf.control_flow_ctx(None):
|
|
@@ -1012,7 +1012,7 @@ def sinusoidal_positional_encoding(
|
|
|
1012
1012
|
{spatial_dim, feat_dim} if spatial_dim != single_step_dim else {feat_dim}, allow_missing_implicit_dims=True
|
|
1013
1013
|
)
|
|
1014
1014
|
emb.feature_dim = feat_dim
|
|
1015
|
-
|
|
1015
|
+
_sinusoidal_positional_encoding_cache.set(cache_key, emb)
|
|
1016
1016
|
return emb
|
|
1017
1017
|
|
|
1018
1018
|
|
{returnn-1.20250109.145311 → returnn-1.20250114.164134}/returnn/frontend/conversions/hf_llama.py
RENAMED
|
@@ -8,6 +8,7 @@ import returnn.frontend as rf
|
|
|
8
8
|
from returnn.frontend.decoder.transformer import TransformerDecoder, TransformerDecoderLayer, FeedForwardGated
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
|
+
# noinspection PyUnresolvedReferences,PyPackageRequirements,PyProtectedMember
|
|
11
12
|
from transformers.models.llama.modeling_llama import (
|
|
12
13
|
LlamaModel,
|
|
13
14
|
LlamaForCausalLM,
|
|
@@ -25,6 +26,8 @@ def import_params_hf_llama_to_rf_transformer_decoder(
|
|
|
25
26
|
Import params from HF Llama model to RF :class:`TransformerDecoder`.
|
|
26
27
|
"""
|
|
27
28
|
import torch
|
|
29
|
+
|
|
30
|
+
# noinspection PyUnresolvedReferences,PyPackageRequirements,PyProtectedMember
|
|
28
31
|
from transformers.models.llama.modeling_llama import LlamaModel, LlamaForCausalLM, LlamaDecoderLayer
|
|
29
32
|
|
|
30
33
|
print("HF Model:")
|
|
@@ -206,10 +209,10 @@ def import_params_hf_llama_att_to_rf_rotary_att(model_hf: LlamaAttention, model_
|
|
|
206
209
|
"""
|
|
207
210
|
import torch
|
|
208
211
|
|
|
209
|
-
assert model_hf.
|
|
210
|
-
assert model_hf.hidden_size == model_rf.in_dim.dimension
|
|
211
|
-
dim = model_hf.hidden_size
|
|
212
|
-
nh = model_hf.
|
|
212
|
+
assert model_hf.config.num_attention_heads == model_rf.num_heads.dimension
|
|
213
|
+
assert model_hf.config.hidden_size == model_rf.in_dim.dimension
|
|
214
|
+
dim = model_hf.config.hidden_size
|
|
215
|
+
nh = model_hf.config.num_attention_heads
|
|
213
216
|
hdim = dim // nh
|
|
214
217
|
|
|
215
218
|
print("HF Model:")
|
|
@@ -182,6 +182,19 @@ def init_better_exchook():
|
|
|
182
182
|
|
|
183
183
|
sys.excepthook = excepthook
|
|
184
184
|
|
|
185
|
+
def threading_excepthook(args, /):
|
|
186
|
+
"""
|
|
187
|
+
Thread-specific excepthook to ensure the main thread is killed on unhandled exceptions in sub threads.
|
|
188
|
+
"""
|
|
189
|
+
log_out = log.v1 or sys.stdout
|
|
190
|
+
print(
|
|
191
|
+
f"Unhandled exception in thread {threading.current_thread()}, going to interrupt main thread:", file=log_out
|
|
192
|
+
)
|
|
193
|
+
better_exchook(args.exc_type, args.exc_value, args.exc_traceback, autodebugshell=False, file=log_out)
|
|
194
|
+
thread.interrupt_main()
|
|
195
|
+
|
|
196
|
+
threading.excepthook = threading_excepthook
|
|
197
|
+
|
|
185
198
|
from returnn.util.basic import to_bool
|
|
186
199
|
|
|
187
200
|
if os.environ.get("DEBUG_WARN_WITH_TRACEBACK") and to_bool(os.environ.get("DEBUG_WARN_WITH_TRACEBACK")):
|