returnn 1.20250110.143435__tar.gz → 1.20250113.193158__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- {returnn-1.20250110.143435/returnn.egg-info → returnn-1.20250113.193158}/PKG-INFO +1 -1
- returnn-1.20250113.193158/_setup_info_generated.py +2 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/requirements.txt +0 -1
- returnn-1.20250113.193158/returnn/frontend/_cache.py +208 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/attention.py +12 -12
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/conversions/hf_llama.py +7 -4
- returnn-1.20250113.193158/returnn/util/lru_cache.py +309 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/py-to-pickle.cpp +1 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158/returnn.egg-info}/PKG-INFO +1 -1
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn.egg-info/SOURCES.txt +2 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_attention.py +17 -9
- returnn-1.20250110.143435/_setup_info_generated.py +0 -2
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/.editorconfig +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/.gitignore +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/.gitmodules +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/.kateconfig +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/CHANGELOG.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/CODEOWNERS +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/CONTRIBUTING.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/LICENSE +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/MANIFEST.in +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/README.rst +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/12AX.cluster_map +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-fwd.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-list-devices.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-pretrain.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-rf-pt-benchmark.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-rf.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-torch.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/demo.sh +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/pyproject.toml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/__main__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/__setup__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/config.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/audio.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/basic.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/cached.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/distrib_files.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/generating.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/lm.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/map.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/meta.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/postprocessing.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/engine/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/engine/base.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/engine/batch.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/forward_iface.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_native/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_native/backend.cpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_native/backend.hpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_native/module.cpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_native/module.hpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_native/py_utils.hpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_native/tensor_ops.cpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_native/tensor_ops.hpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_numpy_backend.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_random_journal.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/array_.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/audio/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/audio/mel.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/audio/specaugment.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/backend.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/build_from_dict.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/cond.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/const.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/container.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/conv.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/conversions/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/conversions/torch_nn.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/decoder/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/decoder/transformer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/device.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/dims.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/encoder/conformer_v2.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/encoder/e_branchformer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/encoder/transformer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/graph.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/hooks.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/init.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/label_smoothing.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/linear.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/loop.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/loss.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/math_.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/module.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/parametrizations.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/parametrize.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/piecewise_linear.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/rand.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/rec.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/signal.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/state.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/stepwise_scheduler.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/types.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/import_/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/import_/common.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/import_/git.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/import_/import_.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/log.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/native_op.cpp +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/native_op.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/pretrain.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/sprint/cache.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/sprint/control.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/sprint/interface.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/_dim_extra.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/dim.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tensor/utils.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/compat.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/distributed.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/engine.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/loop.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/masked_computation.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/horovod.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/native_op.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/network.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/sprint.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/updater.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/util/data.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/data/extern_data.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/data/queued_data_iter.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/distributed.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/engine.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/frontend/raw_ops.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/optim/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/optim/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/optim/lion.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/updater.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/array_.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/debug_inf_nan.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/diagnose_gpu.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/exception_helper.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/gradient_checkpoint.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/module.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/torch/util/scaled_gradient.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/__init__.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/basic.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/bpe.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/debug.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/file_cache.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/fsa.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/math.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/native_code_compiler.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/pprint.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/py_compat.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/py_ext_mod_compiler.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/result_with_reason.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/task_system.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/train_proc_manager.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/util/watch_memory.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/rnn.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/setup.cfg +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/setup.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/DummySprintExec.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/_setup_test_env.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/lint_common.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/pylint.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/rf_utils.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/spelling.dic +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_Config.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_Dataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_Fsa.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_Log.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_Pretrain.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_ResNet.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TFEngine.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TFUtil.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_Util.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_demos.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_fork_exec.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_array.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_base.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_cond.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_const.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_container.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_conv.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_decoder_transformer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_gradient.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_label_smoothing.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_loop.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_math.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_piecewise_linear.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_rec.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_rf_signal.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_tensor.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_threading.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_tools.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_torch_dataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_torch_engine.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/test_torch_util.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tests/torch_utils.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/collect-words.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/compile_native_op.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/dump-dataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/dump-forward.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/dump-network-json.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/dump-pickle.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/get-attention-weights.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/hdf_dump.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/torch_avg_checkpoints.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/torch_export_to_onnx.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/torch_inspect_checkpoint.py +0 -0
- {returnn-1.20250110.143435 → returnn-1.20250113.193158}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cache, to store some data.
|
|
3
|
+
See :class:`Cache`.
|
|
4
|
+
|
|
5
|
+
One use case example is :func:`sinusoidal_positional_encoding` and :func:`relative_positional_encoding`.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
from typing import Optional, Union, Any, Type, Callable, Tuple, Dict
|
|
10
|
+
from weakref import ref
|
|
11
|
+
import tree
|
|
12
|
+
from returnn.util.lru_cache import lru_cache
|
|
13
|
+
from returnn.tensor import Tensor, Dim
|
|
14
|
+
import returnn.frontend as rf
|
|
15
|
+
from returnn.frontend._backend import global_backend, get_backend_by_raw_tensor_type, Backend
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Cache:
|
|
19
|
+
"""
|
|
20
|
+
Cache, intended for internal use of RF functions.
|
|
21
|
+
|
|
22
|
+
One use case example is :func:`sinusoidal_positional_encoding` and :func:`relative_positional_encoding`.
|
|
23
|
+
|
|
24
|
+
There are some specific properties we must take care of:
|
|
25
|
+
|
|
26
|
+
- Lifetime of values: For graph-based backends, it can only stay alive for the current run ctx.
|
|
27
|
+
(For eager-based backends, there is no such restriction.)
|
|
28
|
+
- Size: Put some limit, use LRU logic.
|
|
29
|
+
- Dims: Use only weakrefs. Some Dim should not stay alive just because of the cache.
|
|
30
|
+
- Scalar dynamic Dims in eager mode, or static dims: Instead of the Dim, use the dim value for the key
|
|
31
|
+
(and map the output to the Dim).
|
|
32
|
+
- Tensor as keys: Use weakrefs. Also don't check by value but by identity.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, max_size: int):
|
|
36
|
+
# Use lru_cache here, but not via a decorator,
|
|
37
|
+
# as we want custom set/get logic.
|
|
38
|
+
# Also, we want the lru_cache to be local to this Cache instance,
|
|
39
|
+
# not shared over all instances of this class.
|
|
40
|
+
self._lru_cache = lru_cache(max_size)(_lru_cache_dummy_func)
|
|
41
|
+
|
|
42
|
+
def get(self, key, default=None):
|
|
43
|
+
"""
|
|
44
|
+
:param key:
|
|
45
|
+
:param default:
|
|
46
|
+
:return: entry in cache or default
|
|
47
|
+
"""
|
|
48
|
+
key_transformed = _transform_key(key)
|
|
49
|
+
key_transformed_orig, value = self._lru_cache.cache_peek(key_transformed, fallback=(None, None))
|
|
50
|
+
if key_transformed_orig is None:
|
|
51
|
+
return default
|
|
52
|
+
|
|
53
|
+
assert len(key_transformed_orig) == len(key_transformed)
|
|
54
|
+
dim_map = {} # orig -> new
|
|
55
|
+
for key_item_orig, key_item in zip(key_transformed_orig, key_transformed):
|
|
56
|
+
if isinstance(key_item_orig, DimWrapper):
|
|
57
|
+
assert isinstance(key_item, DimWrapper)
|
|
58
|
+
dim_orig = key_item_orig.dim_ref()
|
|
59
|
+
dim = key_item.dim_ref()
|
|
60
|
+
assert isinstance(dim_orig, Dim) and isinstance(dim, Dim)
|
|
61
|
+
dim_map[dim_orig] = dim
|
|
62
|
+
|
|
63
|
+
# noinspection PyShadowingNames
|
|
64
|
+
def _map_output(output):
|
|
65
|
+
if isinstance(output, Dim):
|
|
66
|
+
return dim_map.get(output, output)
|
|
67
|
+
if isinstance(output, Tensor):
|
|
68
|
+
if any(dim in dim_map for dim in output.dims):
|
|
69
|
+
out_raw = output.raw_tensor
|
|
70
|
+
for axis, dim in enumerate(output.dims):
|
|
71
|
+
if dim in dim_map:
|
|
72
|
+
output = output.copy_template_replace_dim_tag(axis=axis, new_dim_tag=dim_map[dim])
|
|
73
|
+
output.raw_tensor = out_raw
|
|
74
|
+
return output
|
|
75
|
+
|
|
76
|
+
return tree.map_structure(_map_output, value)
|
|
77
|
+
|
|
78
|
+
def set(self, key, value):
|
|
79
|
+
"""
|
|
80
|
+
:param key:
|
|
81
|
+
:param value:
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def _finalize_callback(*_args):
|
|
85
|
+
self._lru_cache.cache_pop(key_transformed, fallback=None)
|
|
86
|
+
|
|
87
|
+
key_backend = _get_backend(key)
|
|
88
|
+
value_backend = _get_backend(value)
|
|
89
|
+
if key_backend != value_backend:
|
|
90
|
+
raise ValueError(f"key and value have different backends: {key_backend} != {value_backend}")
|
|
91
|
+
key_transformed = _transform_key(key, finalize_callback=_finalize_callback)
|
|
92
|
+
self._lru_cache.cache_set(key_transformed, result=(key_transformed, value))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _lru_cache_dummy_func(*_args, **_kwargs):
|
|
96
|
+
raise Exception("This should not be called.")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _transform_key(
|
|
100
|
+
key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Optional[Dict[Dim, DimWrapper]] = None
|
|
101
|
+
) -> Tuple[Union[Type[Backend], ref[rf.RunCtx], _KeyItemType], ...]:
|
|
102
|
+
backend = _get_backend(key)
|
|
103
|
+
keys_flat = [backend]
|
|
104
|
+
if not backend.executing_eagerly():
|
|
105
|
+
# See comment above: If graph-mode, the cached value becomes invalid
|
|
106
|
+
# when the current run ctx goes out of scope.
|
|
107
|
+
keys_flat.append(ref(rf.get_run_ctx(), finalize_callback))
|
|
108
|
+
if collected_dim_map is None:
|
|
109
|
+
collected_dim_map = {}
|
|
110
|
+
keys_flat += [
|
|
111
|
+
_transform_key_item(key, finalize_callback=finalize_callback, collected_dim_map=collected_dim_map)
|
|
112
|
+
for key in tree.flatten(key)
|
|
113
|
+
]
|
|
114
|
+
return tuple(keys_flat)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _transform_key_item(
|
|
118
|
+
key: Any, *, finalize_callback: Optional[Callable] = None, collected_dim_map: Dict[Dim, DimWrapper]
|
|
119
|
+
) -> _KeyItemType:
|
|
120
|
+
if isinstance(key, Tensor):
|
|
121
|
+
return TensorWrapper(key, finalize_callback=finalize_callback)
|
|
122
|
+
if isinstance(key, Dim):
|
|
123
|
+
if key in collected_dim_map:
|
|
124
|
+
return collected_dim_map[key]
|
|
125
|
+
dim_wrapper = DimWrapper(key, finalize_callback=finalize_callback)
|
|
126
|
+
collected_dim_map[key] = dim_wrapper
|
|
127
|
+
return dim_wrapper
|
|
128
|
+
if not isinstance(key, _RawTypes):
|
|
129
|
+
raise TypeError(f"unexpected type {type(key)}")
|
|
130
|
+
return key
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _get_backend(*args) -> Type[Backend]:
|
|
134
|
+
args_flat = tree.flatten(args)
|
|
135
|
+
for arg in args_flat:
|
|
136
|
+
if isinstance(arg, Tensor) and arg.raw_tensor is not None:
|
|
137
|
+
return get_backend_by_raw_tensor_type(type(arg.raw_tensor))
|
|
138
|
+
return global_backend.__class__
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class TensorWrapper:
|
|
142
|
+
"""
|
|
143
|
+
Wraps :class:`Tensor`.
|
|
144
|
+
Using weakref for the tensor, including also ``raw_tensor``.
|
|
145
|
+
Equality is given if the identity is the same, for the Tensor itself and the raw_tensor.
|
|
146
|
+
No value of the tensor is checked.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(self, value: Tensor, *, finalize_callback):
|
|
150
|
+
self.value_ref = ref(value, finalize_callback)
|
|
151
|
+
self.raw_value_ref = ref(value.raw_tensor, finalize_callback)
|
|
152
|
+
self._hash = id(value)
|
|
153
|
+
|
|
154
|
+
def __eq__(self, other):
|
|
155
|
+
if isinstance(other, TensorWrapper):
|
|
156
|
+
return self.value_ref() is other.value_ref() and self.raw_value_ref() is other.raw_value_ref()
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
def __hash__(self):
|
|
160
|
+
return self._hash
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class DimWrapper:
|
|
164
|
+
"""
|
|
165
|
+
Wraps :class:`Dim`.
|
|
166
|
+
Using weakref for the dim.
|
|
167
|
+
If the size is scalar and known, equality is given when the size is equal (and dim tag is ignored)
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def __init__(self, dim: Dim, *, finalize_callback):
|
|
171
|
+
self.dim_value = _dim_value_for_key(dim)
|
|
172
|
+
# finalize_callback only needed when we don't use the dim value.
|
|
173
|
+
self.dim_ref = ref(dim, finalize_callback if self.dim_value is None else None)
|
|
174
|
+
self.dyn_size_ref = (
|
|
175
|
+
# E.g. consider the batch dim or data spatial dim which would be reset each step.
|
|
176
|
+
# We need some ref to the dyn size, and finalize this key when it goes out of scope.
|
|
177
|
+
# This is only needed when there is no info on the static size (or eager scalar dyn size).
|
|
178
|
+
ref(dim.dyn_size_ext.raw_tensor, finalize_callback)
|
|
179
|
+
if self.dim_value is None and dim.dyn_size_ext and dim.dyn_size_ext.raw_tensor is not None
|
|
180
|
+
else None
|
|
181
|
+
)
|
|
182
|
+
self._hash = hash(dim) if self.dim_value is None else hash(self.dim_value)
|
|
183
|
+
|
|
184
|
+
def __eq__(self, other):
|
|
185
|
+
if isinstance(other, DimWrapper):
|
|
186
|
+
if self.dim_value is not None:
|
|
187
|
+
return self.dim_value == other.dim_value
|
|
188
|
+
return self.dim_ref() == other.dim_ref() and self.dyn_size_ref() is other.dyn_size_ref()
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
def __hash__(self):
|
|
192
|
+
return self._hash
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _dim_value_for_key(dim: Dim) -> Optional[int]:
|
|
196
|
+
if dim.size is not None:
|
|
197
|
+
return dim.size
|
|
198
|
+
if dim.dyn_size_ext and not dim.dyn_size_ext.dims:
|
|
199
|
+
if dim.dyn_size_ext.raw_tensor is not None:
|
|
200
|
+
# noinspection PyProtectedMember
|
|
201
|
+
if dim.dyn_size_ext._raw_backend.executing_eagerly():
|
|
202
|
+
return int(dim.get_dim_value())
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# For now... we might extend it by some more types.
|
|
207
|
+
_KeyItemType = Union[None, str, bool, int, float, TensorWrapper, DimWrapper]
|
|
208
|
+
_RawTypes = (type(None), str, bool, int, float)
|
|
@@ -4,10 +4,10 @@ Attention
|
|
|
4
4
|
|
|
5
5
|
from __future__ import annotations
|
|
6
6
|
from typing import Tuple, Union, Optional, Sequence
|
|
7
|
-
import weakref
|
|
8
7
|
import logging
|
|
9
8
|
from returnn.tensor import Tensor, Dim, single_step_dim
|
|
10
9
|
import returnn.frontend as rf
|
|
10
|
+
from returnn.frontend._cache import Cache
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
@@ -330,7 +330,7 @@ class RotaryPosCausalSelfAttention(CausalSelfAttention):
|
|
|
330
330
|
q = _apply_rope(
|
|
331
331
|
q,
|
|
332
332
|
(
|
|
333
|
-
rf.gather(pos_enc, axis=hist_dim, indices=
|
|
333
|
+
rf.gather(pos_enc, axis=hist_dim, indices=rf.last_frame_position_of_dim(hist_dim))
|
|
334
334
|
if axis == single_step_dim
|
|
335
335
|
else rf.replace_dim(pos_enc, in_dim=hist_dim, out_dim=axis)[0]
|
|
336
336
|
),
|
|
@@ -892,7 +892,7 @@ def _make_indices(
|
|
|
892
892
|
return indices, out_spatial_dim
|
|
893
893
|
|
|
894
894
|
|
|
895
|
-
_relative_positional_encoding_cache =
|
|
895
|
+
_relative_positional_encoding_cache = Cache(128)
|
|
896
896
|
|
|
897
897
|
|
|
898
898
|
def relative_positional_encoding(
|
|
@@ -924,10 +924,10 @@ def relative_positional_encoding(
|
|
|
924
924
|
"""
|
|
925
925
|
if not dtype:
|
|
926
926
|
dtype = rf.get_default_float_dtype()
|
|
927
|
-
cache = _relative_positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
|
|
928
927
|
cache_key = (query_spatial_dim, key_value_spatial_dim, feat_dim, query_offset, dtype)
|
|
929
|
-
|
|
930
|
-
|
|
928
|
+
cache_entry = _relative_positional_encoding_cache.get(cache_key)
|
|
929
|
+
if cache_entry is not None:
|
|
930
|
+
return cache_entry
|
|
931
931
|
import math
|
|
932
932
|
|
|
933
933
|
with rf.control_flow_ctx(None):
|
|
@@ -946,11 +946,11 @@ def relative_positional_encoding(
|
|
|
946
946
|
allow_missing_implicit_dims=True,
|
|
947
947
|
)
|
|
948
948
|
emb.feature_dim = feat_dim
|
|
949
|
-
|
|
949
|
+
_relative_positional_encoding_cache.set(cache_key, (emb, out_spatial_dim))
|
|
950
950
|
return emb, out_spatial_dim
|
|
951
951
|
|
|
952
952
|
|
|
953
|
-
_sinusoidal_positional_encoding_cache =
|
|
953
|
+
_sinusoidal_positional_encoding_cache = Cache(128) # (spatial_dim, feat_dim) -> enc
|
|
954
954
|
|
|
955
955
|
|
|
956
956
|
def sinusoidal_positional_encoding(
|
|
@@ -982,10 +982,10 @@ def sinusoidal_positional_encoding(
|
|
|
982
982
|
dtype = rf.get_default_float_dtype()
|
|
983
983
|
if not device:
|
|
984
984
|
device = rf.get_default_device()
|
|
985
|
-
cache = _sinusoidal_positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
|
|
986
985
|
cache_key = (spatial_dim, feat_dim, offset, base, dtype, device)
|
|
987
|
-
|
|
988
|
-
|
|
986
|
+
cache_entry = _sinusoidal_positional_encoding_cache.get(cache_key)
|
|
987
|
+
if cache_entry is not None:
|
|
988
|
+
return cache_entry
|
|
989
989
|
import math
|
|
990
990
|
|
|
991
991
|
with rf.control_flow_ctx(None):
|
|
@@ -1012,7 +1012,7 @@ def sinusoidal_positional_encoding(
|
|
|
1012
1012
|
{spatial_dim, feat_dim} if spatial_dim != single_step_dim else {feat_dim}, allow_missing_implicit_dims=True
|
|
1013
1013
|
)
|
|
1014
1014
|
emb.feature_dim = feat_dim
|
|
1015
|
-
|
|
1015
|
+
_sinusoidal_positional_encoding_cache.set(cache_key, emb)
|
|
1016
1016
|
return emb
|
|
1017
1017
|
|
|
1018
1018
|
|
{returnn-1.20250110.143435 → returnn-1.20250113.193158}/returnn/frontend/conversions/hf_llama.py
RENAMED
|
@@ -8,6 +8,7 @@ import returnn.frontend as rf
|
|
|
8
8
|
from returnn.frontend.decoder.transformer import TransformerDecoder, TransformerDecoderLayer, FeedForwardGated
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
|
+
# noinspection PyUnresolvedReferences,PyPackageRequirements,PyProtectedMember
|
|
11
12
|
from transformers.models.llama.modeling_llama import (
|
|
12
13
|
LlamaModel,
|
|
13
14
|
LlamaForCausalLM,
|
|
@@ -25,6 +26,8 @@ def import_params_hf_llama_to_rf_transformer_decoder(
|
|
|
25
26
|
Import params from HF Llama model to RF :class:`TransformerDecoder`.
|
|
26
27
|
"""
|
|
27
28
|
import torch
|
|
29
|
+
|
|
30
|
+
# noinspection PyUnresolvedReferences,PyPackageRequirements,PyProtectedMember
|
|
28
31
|
from transformers.models.llama.modeling_llama import LlamaModel, LlamaForCausalLM, LlamaDecoderLayer
|
|
29
32
|
|
|
30
33
|
print("HF Model:")
|
|
@@ -206,10 +209,10 @@ def import_params_hf_llama_att_to_rf_rotary_att(model_hf: LlamaAttention, model_
|
|
|
206
209
|
"""
|
|
207
210
|
import torch
|
|
208
211
|
|
|
209
|
-
assert model_hf.
|
|
210
|
-
assert model_hf.hidden_size == model_rf.in_dim.dimension
|
|
211
|
-
dim = model_hf.hidden_size
|
|
212
|
-
nh = model_hf.
|
|
212
|
+
assert model_hf.config.num_attention_heads == model_rf.num_heads.dimension
|
|
213
|
+
assert model_hf.config.hidden_size == model_rf.in_dim.dimension
|
|
214
|
+
dim = model_hf.config.hidden_size
|
|
215
|
+
nh = model_hf.config.num_attention_heads
|
|
213
216
|
hdim = dim // nh
|
|
214
217
|
|
|
215
218
|
print("HF Model:")
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""
|
|
2
|
+
:func:`lru_cache`, copied from Python functools, slightly adapted,
|
|
3
|
+
and extended by functions to check whether some key is cached or not.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
from typing import Dict, Any
|
|
8
|
+
from functools import update_wrapper
|
|
9
|
+
from threading import RLock
|
|
10
|
+
from collections import namedtuple
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def lru_cache(maxsize: int = 128, typed: bool = False):
|
|
14
|
+
"""Least-recently-used cache decorator.
|
|
15
|
+
|
|
16
|
+
If *maxsize* is set to None, the LRU features are disabled and the cache
|
|
17
|
+
can grow without bound.
|
|
18
|
+
|
|
19
|
+
If *typed* is True, arguments of different types will be cached separately.
|
|
20
|
+
For example, f(3.0) and f(3) will be treated as distinct calls with
|
|
21
|
+
distinct results.
|
|
22
|
+
|
|
23
|
+
Arguments to the cached function must be hashable.
|
|
24
|
+
|
|
25
|
+
Use f.cache_len() to see the current size of the cache.
|
|
26
|
+
Use f.cache_set(*args, result, **kwargs) to set a value in the cache directly.
|
|
27
|
+
Use f.cache_peek(*args, update_statistics=False, fallback=None, **kwargs)
|
|
28
|
+
to peek the cache, without ever calling the user function.
|
|
29
|
+
View the cache statistics named tuple (hits, misses, maxsize, currsize)
|
|
30
|
+
with f.cache_info().
|
|
31
|
+
Clear the cache and statistics with f.cache_clear().
|
|
32
|
+
Remove the oldest entry from the cache with f.cache_pop_oldest().
|
|
33
|
+
Take out some entry from the cache with f.cache_pop(*args, fallback=not_specified, **kwargs).
|
|
34
|
+
Set the maximum cache size to a new value with f.cache_set_maxsize(new_maxsize).
|
|
35
|
+
Access the underlying function with f.__wrapped__.
|
|
36
|
+
|
|
37
|
+
See: https://en.wikipedia.org/wiki/Cache_replacement_policies#Least_recently_used_(LRU)
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
# Users should only access the lru_cache through its public API:
|
|
42
|
+
# cache_info, cache_clear, and f.__wrapped__
|
|
43
|
+
# The internals of the lru_cache are encapsulated for thread safety and
|
|
44
|
+
# to allow the implementation to change (including a possible C version).
|
|
45
|
+
|
|
46
|
+
if isinstance(maxsize, int):
|
|
47
|
+
assert maxsize >= 0
|
|
48
|
+
elif callable(maxsize) and isinstance(typed, bool):
|
|
49
|
+
# The user_function was passed in directly via the maxsize argument
|
|
50
|
+
user_function, maxsize = maxsize, 128
|
|
51
|
+
return _lru_cache_wrapper(user_function, maxsize, typed)
|
|
52
|
+
elif maxsize is not None:
|
|
53
|
+
raise TypeError("Expected first argument to be an integer, a callable, or None")
|
|
54
|
+
|
|
55
|
+
# noinspection PyShadowingNames
|
|
56
|
+
def _decorating_function(user_function):
|
|
57
|
+
return _lru_cache_wrapper(user_function, maxsize, typed)
|
|
58
|
+
|
|
59
|
+
return _decorating_function
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _lru_cache_wrapper(user_function, maxsize: int, typed: bool):
|
|
66
|
+
# Constants shared by all lru cache instances:
|
|
67
|
+
make_key = _make_key # build a key from the function arguments
|
|
68
|
+
# noinspection PyPep8Naming
|
|
69
|
+
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
|
|
70
|
+
|
|
71
|
+
cache: Dict[Any, list] = {}
|
|
72
|
+
hits = misses = 0
|
|
73
|
+
full = False
|
|
74
|
+
cache_get = cache.get # bound method to lookup a key or return None
|
|
75
|
+
cache_len = cache.__len__ # get cache size without calling len()
|
|
76
|
+
lock = RLock() # because linkedlist updates aren't threadsafe
|
|
77
|
+
root = [] # root of the circular doubly linked list
|
|
78
|
+
root[:] = [root, root, None, None] # initialize by pointing to self
|
|
79
|
+
|
|
80
|
+
assert maxsize >= 0
|
|
81
|
+
|
|
82
|
+
def wrapper(*args, **kwds):
|
|
83
|
+
"""
|
|
84
|
+
User-facing wrapper function.
|
|
85
|
+
"""
|
|
86
|
+
# Size limited caching that tracks accesses by recency
|
|
87
|
+
nonlocal root, hits, misses, full
|
|
88
|
+
key = make_key(args, kwds, typed)
|
|
89
|
+
with lock:
|
|
90
|
+
link = cache_get(key)
|
|
91
|
+
if link is not None:
|
|
92
|
+
# Move the link to the front of the circular queue
|
|
93
|
+
link_prev, link_next, _key, result = link
|
|
94
|
+
link_prev[NEXT] = link_next
|
|
95
|
+
link_next[PREV] = link_prev
|
|
96
|
+
last = root[PREV]
|
|
97
|
+
last[NEXT] = root[PREV] = link
|
|
98
|
+
link[PREV] = last
|
|
99
|
+
link[NEXT] = root
|
|
100
|
+
hits += 1
|
|
101
|
+
return result
|
|
102
|
+
misses += 1
|
|
103
|
+
result = user_function(*args, **kwds)
|
|
104
|
+
if maxsize > 0:
|
|
105
|
+
_cache_insert(key, result)
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
def _cache_insert(key, result):
|
|
109
|
+
nonlocal root, full
|
|
110
|
+
with lock:
|
|
111
|
+
if key in cache:
|
|
112
|
+
# Getting here means that this same key was added to the
|
|
113
|
+
# cache while the lock was released. Since the link
|
|
114
|
+
# update is already done, we need only return the
|
|
115
|
+
# computed result and update the count of misses.
|
|
116
|
+
pass
|
|
117
|
+
elif full:
|
|
118
|
+
# Use the old root to store the new key and result.
|
|
119
|
+
oldroot = root
|
|
120
|
+
oldroot[KEY] = key
|
|
121
|
+
oldroot[RESULT] = result
|
|
122
|
+
# Empty the oldest link and make it the new root.
|
|
123
|
+
# Keep a reference to the old key and old result to
|
|
124
|
+
# prevent their ref counts from going to zero during the
|
|
125
|
+
# update. That will prevent potentially arbitrary object
|
|
126
|
+
# clean-up code (i.e. __del__) from running while we're
|
|
127
|
+
# still adjusting the links.
|
|
128
|
+
root = oldroot[NEXT]
|
|
129
|
+
oldkey = root[KEY]
|
|
130
|
+
root[KEY] = root[RESULT] = None
|
|
131
|
+
# Now update the cache dictionary.
|
|
132
|
+
del cache[oldkey]
|
|
133
|
+
# Save the potentially reentrant cache[key] assignment
|
|
134
|
+
# for last, after the root and links have been put in
|
|
135
|
+
# a consistent state.
|
|
136
|
+
cache[key] = oldroot
|
|
137
|
+
else:
|
|
138
|
+
# Put result in a new link at the front of the queue.
|
|
139
|
+
last = root[PREV]
|
|
140
|
+
link = [last, root, key, result]
|
|
141
|
+
last[NEXT] = root[PREV] = cache[key] = link
|
|
142
|
+
# Use the cache_len bound method instead of the len() function
|
|
143
|
+
# which could potentially be wrapped in an lru_cache itself.
|
|
144
|
+
full = cache_len() >= maxsize
|
|
145
|
+
|
|
146
|
+
def cache_info():
|
|
147
|
+
"""Report cache statistics"""
|
|
148
|
+
with lock:
|
|
149
|
+
return _CacheInfo(hits, misses, maxsize, cache_len())
|
|
150
|
+
|
|
151
|
+
def cache_clear():
|
|
152
|
+
"""Clear the cache and cache statistics"""
|
|
153
|
+
nonlocal hits, misses, full
|
|
154
|
+
with lock:
|
|
155
|
+
for link in cache.values():
|
|
156
|
+
link.clear() # make GC happy
|
|
157
|
+
cache.clear()
|
|
158
|
+
root[:] = [root, root, None, None]
|
|
159
|
+
hits = misses = 0
|
|
160
|
+
full = False
|
|
161
|
+
|
|
162
|
+
def cache_parameters():
|
|
163
|
+
"""
|
|
164
|
+
:return: parameters (maxsize, typed) of the cache as dict
|
|
165
|
+
"""
|
|
166
|
+
return {"maxsize": maxsize, "typed": typed}
|
|
167
|
+
|
|
168
|
+
def cache_set(*args, result, **kwargs):
|
|
169
|
+
"""
|
|
170
|
+
Sets a value in the cache directly.
|
|
171
|
+
"""
|
|
172
|
+
nonlocal root, full
|
|
173
|
+
if maxsize > 0:
|
|
174
|
+
key = make_key(args, kwargs, typed)
|
|
175
|
+
_cache_insert(key, result)
|
|
176
|
+
|
|
177
|
+
def cache_peek(*args, update_statistics: bool = True, fallback=None, **kwargs):
|
|
178
|
+
"""
|
|
179
|
+
Peeks the cache without ever calling the user function.
|
|
180
|
+
"""
|
|
181
|
+
nonlocal hits, misses
|
|
182
|
+
key = make_key(args, kwargs, typed)
|
|
183
|
+
with lock:
|
|
184
|
+
link = cache_get(key)
|
|
185
|
+
if link is not None:
|
|
186
|
+
if update_statistics:
|
|
187
|
+
hits += 1
|
|
188
|
+
return link[RESULT]
|
|
189
|
+
if update_statistics:
|
|
190
|
+
misses += 1
|
|
191
|
+
return fallback
|
|
192
|
+
|
|
193
|
+
not_specified = object()
|
|
194
|
+
|
|
195
|
+
def cache_pop(*args, fallback=not_specified, **kwargs):
|
|
196
|
+
"""
|
|
197
|
+
Removes the entry from the cache.
|
|
198
|
+
"""
|
|
199
|
+
nonlocal hits, misses
|
|
200
|
+
key = make_key(args, kwargs, typed)
|
|
201
|
+
with lock:
|
|
202
|
+
link = cache_get(key)
|
|
203
|
+
if link is not None:
|
|
204
|
+
# Take out link.
|
|
205
|
+
link[PREV][NEXT] = link[NEXT]
|
|
206
|
+
link[NEXT][PREV] = link[PREV]
|
|
207
|
+
oldkey = link[KEY]
|
|
208
|
+
oldvalue = link[RESULT]
|
|
209
|
+
link.clear()
|
|
210
|
+
del cache[oldkey]
|
|
211
|
+
return oldvalue
|
|
212
|
+
if fallback is not_specified:
|
|
213
|
+
raise KeyError("key not found")
|
|
214
|
+
return fallback
|
|
215
|
+
|
|
216
|
+
def cache_pop_oldest(*, fallback=not_specified):
|
|
217
|
+
"""
|
|
218
|
+
Removes the oldest entry from the cache.
|
|
219
|
+
"""
|
|
220
|
+
nonlocal root, full
|
|
221
|
+
with lock:
|
|
222
|
+
if not cache:
|
|
223
|
+
if fallback is not_specified:
|
|
224
|
+
raise KeyError("cache is empty")
|
|
225
|
+
return fallback
|
|
226
|
+
assert cache
|
|
227
|
+
# Take out oldest link.
|
|
228
|
+
link: list = root[NEXT]
|
|
229
|
+
link[NEXT][PREV] = root
|
|
230
|
+
root[NEXT] = link[NEXT]
|
|
231
|
+
oldkey = link[KEY]
|
|
232
|
+
oldvalue = link[RESULT]
|
|
233
|
+
link.clear()
|
|
234
|
+
del cache[oldkey]
|
|
235
|
+
full = cache_len() >= maxsize
|
|
236
|
+
return oldvalue
|
|
237
|
+
|
|
238
|
+
def cache_set_maxsize(new_maxsize: int):
|
|
239
|
+
"""
|
|
240
|
+
Resets the maxsize.
|
|
241
|
+
If the new maxsize is smaller than the current cache size, the oldest entries are removed.
|
|
242
|
+
"""
|
|
243
|
+
nonlocal maxsize, full
|
|
244
|
+
assert new_maxsize >= 0
|
|
245
|
+
with lock:
|
|
246
|
+
maxsize = new_maxsize
|
|
247
|
+
while cache_len() > maxsize:
|
|
248
|
+
cache_pop_oldest()
|
|
249
|
+
full = cache_len() >= maxsize
|
|
250
|
+
|
|
251
|
+
wrapper.cache_info = cache_info
|
|
252
|
+
wrapper.cache_clear = cache_clear
|
|
253
|
+
wrapper.cache_parameters = cache_parameters
|
|
254
|
+
wrapper.cache_set = cache_set
|
|
255
|
+
wrapper.cache_peek = cache_peek
|
|
256
|
+
wrapper.cache_pop = cache_pop
|
|
257
|
+
wrapper.cache_len = cache_len
|
|
258
|
+
wrapper.cache_pop_oldest = cache_pop_oldest
|
|
259
|
+
wrapper.cache_set_maxsize = cache_set_maxsize
|
|
260
|
+
|
|
261
|
+
update_wrapper(wrapper, user_function)
|
|
262
|
+
|
|
263
|
+
return wrapper
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _make_key(args, kwds, typed, *, _kwd_mark=(object(),), _fasttypes=(int, str), _tuple=tuple, _type=type, _len=len):
|
|
267
|
+
"""Make a cache key from optionally typed positional and keyword arguments
|
|
268
|
+
|
|
269
|
+
The key is constructed in a way that is flat as possible rather than
|
|
270
|
+
as a nested structure that would take more memory.
|
|
271
|
+
|
|
272
|
+
If there is only a single argument and its data type is known to cache
|
|
273
|
+
its hash value, then that argument is returned without a wrapper. This
|
|
274
|
+
saves space and improves lookup speed.
|
|
275
|
+
|
|
276
|
+
"""
|
|
277
|
+
# All of code below relies on kwds preserving the order input by the user.
|
|
278
|
+
# Formerly, we sorted() the kwds before looping. The new way is *much*
|
|
279
|
+
# faster; however, it means that f(x=1, y=2) will now be treated as a
|
|
280
|
+
# distinct call from f(y=2, x=1) which will be cached separately.
|
|
281
|
+
key = args
|
|
282
|
+
if kwds:
|
|
283
|
+
key += _kwd_mark
|
|
284
|
+
for item in kwds.items():
|
|
285
|
+
key += item
|
|
286
|
+
if typed:
|
|
287
|
+
key += _tuple(_type(v) for v in args) # noqa
|
|
288
|
+
if kwds:
|
|
289
|
+
key += _tuple(_type(v) for v in kwds.values()) # noqa
|
|
290
|
+
elif _len(key) == 1 and _type(key[0]) in _fasttypes:
|
|
291
|
+
return key[0]
|
|
292
|
+
return _HashedSeq(key)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class _HashedSeq(list):
|
|
296
|
+
"""
|
|
297
|
+
This class guarantees that hash() will be called no more than once
|
|
298
|
+
per element. This is important because the lru_cache() will hash
|
|
299
|
+
the key multiple times on a cache miss.
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
__slots__ = "hashvalue"
|
|
303
|
+
|
|
304
|
+
def __init__(self, tup, *, _hash=hash):
|
|
305
|
+
super().__init__(tup)
|
|
306
|
+
self.hashvalue = _hash(tup)
|
|
307
|
+
|
|
308
|
+
def __hash__(self):
|
|
309
|
+
return self.hashvalue
|
|
@@ -157,6 +157,7 @@ returnn/extern/graph_editor/transform.py
|
|
|
157
157
|
returnn/extern/graph_editor/util.py
|
|
158
158
|
returnn/frontend/__init__.py
|
|
159
159
|
returnn/frontend/_backend.py
|
|
160
|
+
returnn/frontend/_cache.py
|
|
160
161
|
returnn/frontend/_numpy_backend.py
|
|
161
162
|
returnn/frontend/_random_journal.py
|
|
162
163
|
returnn/frontend/_utils.py
|
|
@@ -320,6 +321,7 @@ returnn/util/debug_helpers.py
|
|
|
320
321
|
returnn/util/file_cache.py
|
|
321
322
|
returnn/util/fsa.py
|
|
322
323
|
returnn/util/literal_py_to_pickle.py
|
|
324
|
+
returnn/util/lru_cache.py
|
|
323
325
|
returnn/util/math.py
|
|
324
326
|
returnn/util/multi_proc_non_daemonic_spawn.py
|
|
325
327
|
returnn/util/native_code_compiler.py
|