returnn 1.20240727.10001__tar.gz → 1.20240730.153730__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/PKG-INFO +1 -1
- returnn-1.20240730.153730/_setup_info_generated.py +2 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/audio.py +40 -11
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/basic.py +1 -0
- returnn-1.20240730.153730/returnn/datasets/postprocessing.py +210 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/attention.py +69 -5
- returnn-1.20240730.153730/returnn/frontend/conversions/__init__.py +3 -0
- returnn-1.20240730.153730/returnn/frontend/conversions/hf_llama.py +56 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/decoder/transformer.py +104 -11
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/linear.py +1 -1
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/normalization.py +41 -5
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/debug.py +188 -1
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn.egg-info/PKG-INFO +1 -1
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn.egg-info/SOURCES.txt +3 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm-inspection-profile.xml +2 -1
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +2 -1
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Dataset.py +52 -1
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_attention.py +239 -0
- returnn-1.20240727.10001/_setup_info_generated.py +0 -2
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/.editorconfig +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/.gitignore +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/.gitmodules +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/.kateconfig +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/CHANGELOG.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/CODEOWNERS +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/CONTRIBUTING.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/LICENSE +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/MANIFEST.in +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/README.rst +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/12AX.cluster_map +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-fwd.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-list-devices.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-pretrain.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-rf-pt-benchmark.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-rf.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-torch.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/demo.sh +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/pyproject.toml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/requirements.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/__main__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/__setup__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/config.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/cached.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/distrib_files.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/generating.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/lm.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/map.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/meta.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/engine/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/engine/base.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/engine/batch.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/forward_iface.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/backend.cpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/backend.hpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/module.cpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/module.hpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/py_utils.hpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/tensor_ops.cpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_native/tensor_ops.hpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_numpy_backend.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_random_journal.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/array_.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/audio/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/audio/mel.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/audio/specaugment.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/backend.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/build_from_dict.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/cond.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/const.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/container.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/conv.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/decoder/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/device.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/dims.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/graph.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/hooks.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/init.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/label_smoothing.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/loop.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/loss.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/math_.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/module.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/parametrizations.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/parametrize.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/piecewise_linear.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/rand.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/rec.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/signal.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/state.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/stepwise_scheduler.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/frontend/types.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/import_/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/import_/common.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/import_/git.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/import_/import_.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/log.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/native_op.cpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/native_op.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/pretrain.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/cache.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/control.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/sprint/interface.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/_dim_extra.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/dim.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tensor/utils.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/compat.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/distributed.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/engine.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/loop.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/masked_computation.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/horovod.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/native_op.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/network.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/sprint.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/updater.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/data.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/extern_data.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/queued_data_iter.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/distributed.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/engine.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/frontend/raw_ops.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/updater.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/array_.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/diagnose_gpu.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/gradient_checkpoint.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/torch/util/scaled_gradient.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/__init__.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/basic.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/bpe.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/file_cache.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/fsa.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/math.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/native_code_compiler.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/pprint.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/py-to-pickle.cpp +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/py_compat.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/py_ext_mod_compiler.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/result_with_reason.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/task_system.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/train_proc_manager.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn/util/watch_memory.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/rnn.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/setup.cfg +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/setup.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/DummySprintExec.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/_setup_test_env.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/lint_common.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/pylint.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/rf_utils.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/spelling.dic +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Config.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Fsa.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Log.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Pretrain.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_ResNet.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFEngine.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TFUtil.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_Util.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_demos.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_fork_exec.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_array.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_base.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_cond.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_const.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_container.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_conv.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_gradient.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_label_smoothing.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_loop.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_math.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_piecewise_linear.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_rec.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_rf_signal.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_tensor.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_tools.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_dataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_engine.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/test_torch_util.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tests/torch_utils.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/collect-words.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/compile_native_op.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-dataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-forward.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-network-json.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/dump-pickle.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/get-attention-weights.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/hdf_dump.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/torch_avg_checkpoints.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/torch_export_to_onnx.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/torch_inspect_checkpoint.py +0 -0
- {returnn-1.20240727.10001 → returnn-1.20240730.153730}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
|
@@ -151,8 +151,6 @@ class OggZipDataset(CachedDataset2):
|
|
|
151
151
|
self.num_outputs["classes"] = [self.targets.num_labels, 1]
|
|
152
152
|
if self.feature_extractor:
|
|
153
153
|
self.num_outputs["data"] = [self.num_inputs, 2]
|
|
154
|
-
else:
|
|
155
|
-
self.num_outputs["data"] = [0, 2]
|
|
156
154
|
self._data: Optional[List[Dict[str, Any]]] = None # lazily loaded
|
|
157
155
|
self._fixed_random_subset = fixed_random_subset
|
|
158
156
|
self._fixed_random_subset_seed = fixed_random_subset_seed
|
|
@@ -402,15 +400,46 @@ class OggZipDataset(CachedDataset2):
|
|
|
402
400
|
self._lazy_init()
|
|
403
401
|
return len(self._data)
|
|
404
402
|
|
|
405
|
-
def
|
|
403
|
+
def get_data_dtype(self, key: str) -> str:
|
|
404
|
+
""":return: dtype of data entry with `key`"""
|
|
405
|
+
if key == "data":
|
|
406
|
+
return "float32"
|
|
407
|
+
elif key == "classes":
|
|
408
|
+
return "int32"
|
|
409
|
+
elif key == "raw":
|
|
410
|
+
return "string"
|
|
411
|
+
elif key == "orth":
|
|
412
|
+
return "uint8"
|
|
413
|
+
else:
|
|
414
|
+
raise ValueError(f"{self}: unknown data key: {key}")
|
|
415
|
+
|
|
416
|
+
def get_data_keys(self) -> List[str]:
|
|
417
|
+
""":return: available data keys"""
|
|
418
|
+
keys = []
|
|
419
|
+
if self.feature_extractor is not None:
|
|
420
|
+
keys.append("data")
|
|
421
|
+
if self.targets is not None:
|
|
422
|
+
keys.append("classes")
|
|
423
|
+
return [*keys, "orth", "raw"]
|
|
424
|
+
|
|
425
|
+
def get_data_shape(self, key: str):
|
|
406
426
|
"""
|
|
407
427
|
:returns get_data(*, key).shape[1:], i.e. num-frames excluded
|
|
408
428
|
:rtype: list[int]
|
|
409
429
|
"""
|
|
410
|
-
if key == "data"
|
|
430
|
+
if key == "data":
|
|
431
|
+
assert self.feature_extractor is not None
|
|
411
432
|
if self.feature_extractor.num_channels is not None:
|
|
412
433
|
return [self.feature_extractor.num_channels, self.feature_extractor.get_feature_dimension()]
|
|
413
|
-
|
|
434
|
+
return [self.feature_extractor.get_feature_dimension()]
|
|
435
|
+
elif key in ["classes", "orth", "raw"]:
|
|
436
|
+
return []
|
|
437
|
+
else:
|
|
438
|
+
raise ValueError(f"{self}: unknown data key {key}")
|
|
439
|
+
|
|
440
|
+
def is_data_sparse(self, key: str) -> bool:
|
|
441
|
+
""":return: whether data entry with `key` is sparse"""
|
|
442
|
+
return key == "classes"
|
|
414
443
|
|
|
415
444
|
def _get_transcription(self, corpus_seq_idx: int):
|
|
416
445
|
"""
|
|
@@ -467,13 +496,14 @@ class OggZipDataset(CachedDataset2):
|
|
|
467
496
|
"""
|
|
468
497
|
self._lazy_init()
|
|
469
498
|
seq_tag = self._get_tag_from_info_dict(self._data[corpus_seq_idx])
|
|
499
|
+
features = {}
|
|
470
500
|
if self.feature_extractor:
|
|
471
501
|
with self._open_audio_file(corpus_seq_idx) as audio_file:
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
features = numpy.zeros((), dtype=numpy.float32) # currently the API requires some dummy values...
|
|
502
|
+
data = self.feature_extractor.get_audio_features_from_raw_bytes(audio_file, seq_name=seq_tag)
|
|
503
|
+
features["data"] = data
|
|
475
504
|
targets, txt = self._get_transcription(corpus_seq_idx)
|
|
476
|
-
|
|
505
|
+
if self.targets is not None:
|
|
506
|
+
features["classes"] = numpy.array(targets, dtype="int32")
|
|
477
507
|
raw_txt = str_to_numpy_array(txt)
|
|
478
508
|
orth = txt.encode("utf8")
|
|
479
509
|
if PY3:
|
|
@@ -483,8 +513,7 @@ class OggZipDataset(CachedDataset2):
|
|
|
483
513
|
orth = list(map(ord, orth))
|
|
484
514
|
orth = numpy.array(orth, dtype="uint8")
|
|
485
515
|
return DatasetSeq(
|
|
486
|
-
features=features,
|
|
487
|
-
targets={"classes": targets, "raw": raw_txt, "orth": orth},
|
|
516
|
+
features={**features, "raw": raw_txt, "orth": orth},
|
|
488
517
|
seq_idx=corpus_seq_idx,
|
|
489
518
|
seq_tag=seq_tag,
|
|
490
519
|
)
|
|
@@ -1388,6 +1388,7 @@ def get_dataset_class(name: Union[str, Type[Dataset]]) -> Optional[Type[Dataset]
|
|
|
1388
1388
|
"map",
|
|
1389
1389
|
"multi_proc",
|
|
1390
1390
|
"distrib_files",
|
|
1391
|
+
"postprocessing",
|
|
1391
1392
|
]
|
|
1392
1393
|
for mod_name in mod_names:
|
|
1393
1394
|
mod = import_module("returnn.datasets.%s" % mod_name)
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provides :class:`PostprocessingDataset`.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
|
|
8
|
+
|
|
9
|
+
from returnn.datasets.basic import DatasetSeq
|
|
10
|
+
from returnn.datasets.util.vocabulary import Vocabulary
|
|
11
|
+
from returnn.tensor import Tensor, TensorDict
|
|
12
|
+
from returnn.tensor.dim import Dim
|
|
13
|
+
from .basic import init_dataset
|
|
14
|
+
from .cached2 import CachedDataset2
|
|
15
|
+
|
|
16
|
+
__all__ = ["PostprocessingDataset"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PostprocessingDataset(CachedDataset2):
|
|
20
|
+
"""
|
|
21
|
+
A dataset that allows for generic post-processing of data from another dataset
|
|
22
|
+
using a function on the segment level and on the level of multiple segments via
|
|
23
|
+
an iterator.
|
|
24
|
+
|
|
25
|
+
This allows integrating various data augmentation techniques like e.g. Mixup,
|
|
26
|
+
SpecAugment or speed perturbation into the data loading pipeline.
|
|
27
|
+
|
|
28
|
+
The integration into the data loading pipeline makes it easy to distribute the
|
|
29
|
+
data processing work across multiple CPU cores using `MultiProcDataset` and in
|
|
30
|
+
turn frees the GPU from data preprocessing tasks.
|
|
31
|
+
|
|
32
|
+
Example usage::
|
|
33
|
+
|
|
34
|
+
from returnn.tensor.dim import Dim, DimTypes
|
|
35
|
+
|
|
36
|
+
time_dim = Dim(None, kind=DimTypes.Spatial)
|
|
37
|
+
new_data_dim = Dim(128)
|
|
38
|
+
|
|
39
|
+
train = {
|
|
40
|
+
"class": "PostprocessingDataset",
|
|
41
|
+
"dataset": {
|
|
42
|
+
"class": "HDFDataset",
|
|
43
|
+
"files": ["/path/to/data.hdf"],
|
|
44
|
+
},
|
|
45
|
+
# one of them, but not both:
|
|
46
|
+
"map_seq": map_seq, # (data: TensorDict) -> TensorDict
|
|
47
|
+
"map_seq_stream": map_seqs, # (iter: Iterator[TensorDict]) -> Iterator[TensorDict]
|
|
48
|
+
# only required when data shapes change wrt. the wrapped dataset:
|
|
49
|
+
"map_outputs": {
|
|
50
|
+
"data": {"dims": [time_dim, new_data_dim]},
|
|
51
|
+
},
|
|
52
|
+
}
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
dataset: Dict[str, Any],
|
|
58
|
+
map_seq: Optional[Union[Callable[[TensorDict], TensorDict]]] = None,
|
|
59
|
+
map_seq_stream: Optional[Callable[[Iterator[TensorDict]], Iterator[TensorDict]]] = None,
|
|
60
|
+
map_outputs: Optional[Dict[str, Any]] = None,
|
|
61
|
+
**kwargs,
|
|
62
|
+
):
|
|
63
|
+
"""
|
|
64
|
+
:param dataset: inner dataset to be post-processed
|
|
65
|
+
:param map_seq: post processor function operating on the single-segment level.
|
|
66
|
+
To avoid confusion on the order of how the processing functions are applied to the data, only one of
|
|
67
|
+
`map_seq` and `map_seq_stream` can be specified at a time.
|
|
68
|
+
:param map_seq_stream: post processor function operating on the multiple segment level via an iterator.
|
|
69
|
+
Allows merging multiple segments into one, or generating multiple output segments from one input segment.
|
|
70
|
+
To avoid confusion on the order of how the processing functions are applied to the data, only one of
|
|
71
|
+
`map_seq` and `map_seq_stream` can be specified at a time.
|
|
72
|
+
:param map_outputs: Type and axis specification of the outputs of the mapping functions,
|
|
73
|
+
like extern_data and model_outputs.
|
|
74
|
+
To simplify the common case when no shapes change, this value can be left unspecified. The dataset then
|
|
75
|
+
assumes the same data layout as returned by the wrapped dataset.
|
|
76
|
+
Example: `map_outputs={"data": {"dim": 42}}`
|
|
77
|
+
:param kwargs: see :class:`CachedDataset2`, :class:`Dataset`
|
|
78
|
+
"""
|
|
79
|
+
super().__init__(**kwargs)
|
|
80
|
+
|
|
81
|
+
if self.seq_ordering != "default":
|
|
82
|
+
raise ValueError(f"{self}: specify seq_ordering in wrapped dataset, not in {self.__class__.__name__}")
|
|
83
|
+
if map_seq is None and map_seq_stream is None:
|
|
84
|
+
raise ValueError(f"{self}: need to either set map_seq or map_seq_stream")
|
|
85
|
+
if map_seq and map_seq_stream:
|
|
86
|
+
raise ValueError(f"{self}: cannot set both map_seq and map_seq_stream")
|
|
87
|
+
|
|
88
|
+
self._dataset_def = dataset
|
|
89
|
+
self._map_seq = map_seq
|
|
90
|
+
self._map_seq_stream = map_seq_stream
|
|
91
|
+
self._map_outputs = map_outputs
|
|
92
|
+
|
|
93
|
+
self._dataset = init_dataset(self._dataset_def, parent_dataset=self)
|
|
94
|
+
if self._map_seq_stream is None:
|
|
95
|
+
# if the stream mapper is set, the num_seqs may change and the estimation is less accurate
|
|
96
|
+
self._estimated_num_seqs = self._dataset.estimated_num_seqs
|
|
97
|
+
self._data_iter: Optional[Iterator[Tuple[int, TensorDict]]] = None
|
|
98
|
+
|
|
99
|
+
self._in_tensor_dict_template = TensorDict(
|
|
100
|
+
{name: self._make_tensor_template_from_input(name) for name in self._dataset.get_data_keys()}
|
|
101
|
+
)
|
|
102
|
+
self._out_tensor_dict_template = (
|
|
103
|
+
TensorDict(self._map_outputs) if self._map_outputs is not None else self._in_tensor_dict_template
|
|
104
|
+
)
|
|
105
|
+
self.num_outputs = {
|
|
106
|
+
k: (t.sparse_dim.size if t.sparse_dim else t.shape[-1] if len(t.shape) > 0 else 1, t.ndim)
|
|
107
|
+
for k, t in self._out_tensor_dict_template.data.items()
|
|
108
|
+
}
|
|
109
|
+
self._default_input = "data" if "data" in self.num_outputs else next(iter(self.num_outputs.keys()))
|
|
110
|
+
self.num_inputs = self.num_outputs[self._default_input][0]
|
|
111
|
+
|
|
112
|
+
self.labels = {}
|
|
113
|
+
for k, t in self._out_tensor_dict_template.data.items():
|
|
114
|
+
if t.vocab:
|
|
115
|
+
self.labels[k] = t.vocab.labels
|
|
116
|
+
elif t.sparse_dim: # sparse_dim but not vocab
|
|
117
|
+
self.labels[k] = list(map(str, range(t.sparse_dim.dimension))) # dummy labels
|
|
118
|
+
|
|
119
|
+
def init_seq_order(
|
|
120
|
+
self, epoch: Optional[int] = None, seq_list: Optional[List[str]] = None, seq_order: Optional[List[int]] = None
|
|
121
|
+
):
|
|
122
|
+
"""
|
|
123
|
+
:param epoch:
|
|
124
|
+
:param seq_list:
|
|
125
|
+
:param seq_order:
|
|
126
|
+
:return: whether the order changed (True is always safe to return)
|
|
127
|
+
"""
|
|
128
|
+
super().init_seq_order(epoch=epoch, seq_list=seq_list, seq_order=seq_order)
|
|
129
|
+
|
|
130
|
+
if epoch is None and seq_list is None and seq_order is None:
|
|
131
|
+
self._num_seqs = 0
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
assert self._dataset is not None
|
|
135
|
+
self._dataset.init_seq_order(epoch=epoch, seq_list=seq_list, seq_order=seq_order)
|
|
136
|
+
self._data_iter = enumerate(self._build_mapping_iter())
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
def _collect_single_seq(self, seq_idx: int) -> Optional[DatasetSeq]:
|
|
140
|
+
while True:
|
|
141
|
+
try:
|
|
142
|
+
loaded_seq_idx, tensor_dict = next(self._data_iter)
|
|
143
|
+
except StopIteration:
|
|
144
|
+
return None
|
|
145
|
+
assert loaded_seq_idx <= seq_idx, "_collect_single_seq must be done monotonically"
|
|
146
|
+
if loaded_seq_idx != seq_idx:
|
|
147
|
+
continue
|
|
148
|
+
seq = DatasetSeq(features={k: t.raw_tensor for k, t in tensor_dict.data.items()}, seq_idx=seq_idx)
|
|
149
|
+
return seq
|
|
150
|
+
|
|
151
|
+
def _build_mapping_iter(self) -> Iterator[TensorDict]:
|
|
152
|
+
"""
|
|
153
|
+
:return: an iterator applying both the segment level and across-segment transformations on the given dataset
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
def _validate_tensor_dict_iter(inner: Iterator[TensorDict]) -> Iterator[TensorDict]:
|
|
157
|
+
for t_dict in inner:
|
|
158
|
+
for data_key, out_t in self._out_tensor_dict_template.data.items():
|
|
159
|
+
in_t = t_dict.data[data_key]
|
|
160
|
+
assert (
|
|
161
|
+
in_t.ndim == out_t.batch_ndim
|
|
162
|
+
and in_t.dtype == out_t.dtype
|
|
163
|
+
and all(d.dimension in (d_, None) for (d, d_) in zip(in_t.dims, out_t.shape))
|
|
164
|
+
)
|
|
165
|
+
yield t_dict
|
|
166
|
+
|
|
167
|
+
data_iter = self._iterate_dataset()
|
|
168
|
+
if self._map_seq_stream is not None:
|
|
169
|
+
data_iter = self._map_seq_stream(data_iter)
|
|
170
|
+
assert isinstance(
|
|
171
|
+
data_iter, Iterator
|
|
172
|
+
), f"map_seq_stream must produce an {Iterator.__name__}, but produced {type(data_iter).__name__}"
|
|
173
|
+
return _validate_tensor_dict_iter(data_iter)
|
|
174
|
+
|
|
175
|
+
def _iterate_dataset(self) -> Iterator[TensorDict]:
|
|
176
|
+
"""
|
|
177
|
+
:return: generator providing data samples in the form of a TensorDict
|
|
178
|
+
"""
|
|
179
|
+
data_keys = self._dataset.get_data_keys()
|
|
180
|
+
|
|
181
|
+
seq_index = 0
|
|
182
|
+
while self._dataset.is_less_than_num_seqs(seq_index):
|
|
183
|
+
self._dataset.load_seqs(seq_index, seq_index + 1)
|
|
184
|
+
tensor_dict = self._in_tensor_dict_template.copy_template()
|
|
185
|
+
for data_key in data_keys:
|
|
186
|
+
tensor_dict.data[data_key].raw_tensor = self._dataset.get_data(seq_index, data_key)
|
|
187
|
+
if self._map_seq is not None:
|
|
188
|
+
tensor_dict = self._map_seq(tensor_dict)
|
|
189
|
+
assert isinstance(
|
|
190
|
+
tensor_dict, TensorDict
|
|
191
|
+
), f"map_seq must produce a {TensorDict.__name__}, but produced {type(tensor_dict).__name__}"
|
|
192
|
+
yield tensor_dict
|
|
193
|
+
seq_index += 1
|
|
194
|
+
|
|
195
|
+
def _make_tensor_template_from_input(self, data_key: str) -> Tensor:
|
|
196
|
+
dtype = self._dataset.get_data_dtype(data_key)
|
|
197
|
+
if dtype == "string":
|
|
198
|
+
dims = []
|
|
199
|
+
else:
|
|
200
|
+
feature_dims = [
|
|
201
|
+
Dim(dimension=dim, name=f"{data_key}_dim{i + 1}")
|
|
202
|
+
for i, dim in enumerate(self._dataset.get_data_shape(data_key))
|
|
203
|
+
]
|
|
204
|
+
dims = [Dim(dimension=None, name=f"{data_key}_frame"), *feature_dims]
|
|
205
|
+
sparse_dim = None
|
|
206
|
+
if self._dataset.is_data_sparse(data_key):
|
|
207
|
+
sparse_dim = Dim(dimension=self._dataset.get_data_dim(data_key), name=f"{data_key}_sparse")
|
|
208
|
+
if data_key in self._dataset.labels:
|
|
209
|
+
sparse_dim.vocab = Vocabulary.create_vocab_from_labels(self._dataset.labels[data_key])
|
|
210
|
+
return Tensor(data_key, dims=dims, dtype=dtype, sparse_dim=sparse_dim)
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
Attention
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
|
|
6
5
|
from __future__ import annotations
|
|
7
6
|
from typing import Tuple, Union, Optional, Sequence
|
|
8
7
|
import weakref
|
|
@@ -17,6 +16,7 @@ __all__ = [
|
|
|
17
16
|
"SelfAttention",
|
|
18
17
|
"CausalSelfAttention",
|
|
19
18
|
"CausalSelfAttentionState",
|
|
19
|
+
"RotaryPosCausalSelfAttention",
|
|
20
20
|
"RelPosSelfAttention",
|
|
21
21
|
"RelPosCausalSelfAttention",
|
|
22
22
|
"CrossAttention",
|
|
@@ -264,6 +264,69 @@ class CausalSelfAttentionState(rf.State):
|
|
|
264
264
|
self.accum_axis = accum_axis
|
|
265
265
|
|
|
266
266
|
|
|
267
|
+
class RotaryPosCausalSelfAttention(CausalSelfAttention):
|
|
268
|
+
"""
|
|
269
|
+
Rotary positional encoding (RoPE)-based causal self attention
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
def __call__(
|
|
273
|
+
self,
|
|
274
|
+
source: Tensor,
|
|
275
|
+
axis: Dim,
|
|
276
|
+
*,
|
|
277
|
+
state: Optional[CausalSelfAttentionState] = None,
|
|
278
|
+
) -> Tuple[Tensor, CausalSelfAttentionState]:
|
|
279
|
+
"""forward"""
|
|
280
|
+
q, k, v = self.forward_qkv(source)
|
|
281
|
+
k, v, hist_dim, new_state = _causal_self_att_step(k, v, axis=axis, state=state, self=self)
|
|
282
|
+
|
|
283
|
+
# Apply RoPE using sinusoidal positional encoding.
|
|
284
|
+
# Note: base is a bit different in rf.sinusoidal_positional_encoding (like the original)
|
|
285
|
+
# vs how it's commonly used for RoPE.
|
|
286
|
+
# log(base) / (dim / 2 - 1) = log(10_000) * 2 / dim
|
|
287
|
+
# <=> log(base) = log(10_000) * (dim / 2 - 1) * 2 / dim = log(10_000) * (1 - 2 / dim)
|
|
288
|
+
# <=> base = 10_000 ** (1 - 2 / dim)
|
|
289
|
+
pos_enc = rf.sinusoidal_positional_encoding(
|
|
290
|
+
spatial_dim=hist_dim,
|
|
291
|
+
feat_dim=self.key_dim_per_head,
|
|
292
|
+
base=10_000 ** (1 - 2 / self.key_dim_per_head.dimension),
|
|
293
|
+
) # [T,D]
|
|
294
|
+
q = _apply_rope(
|
|
295
|
+
q,
|
|
296
|
+
(
|
|
297
|
+
rf.gather(pos_enc, axis=hist_dim, indices=hist_dim.dyn_size_ext - 1)
|
|
298
|
+
if axis == single_step_dim
|
|
299
|
+
else rf.replace_dim(pos_enc, in_dim=hist_dim, out_dim=axis)[0]
|
|
300
|
+
),
|
|
301
|
+
self.key_dim_per_head,
|
|
302
|
+
)
|
|
303
|
+
k = _apply_rope(k, pos_enc, self.key_dim_per_head)
|
|
304
|
+
|
|
305
|
+
output = self.attention(q, k, v, kv_axis=hist_dim)
|
|
306
|
+
return output, new_state
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _apply_rope(x: Tensor, pos_enc: Tensor, feat_dim: Dim) -> Tensor:
|
|
310
|
+
"""
|
|
311
|
+
:param x: [...,T,D] or [...,D]
|
|
312
|
+
:param pos_enc: [T,D] or [D]
|
|
313
|
+
:param feat_dim: D
|
|
314
|
+
:return: [...,T,D] or [...,D]
|
|
315
|
+
"""
|
|
316
|
+
feat_half_dim = feat_dim.div_left(2)
|
|
317
|
+
pe_imag, pe_real = rf.split(pos_enc, axis=feat_dim, out_dims=[feat_half_dim] * 2) # [T,D/2]
|
|
318
|
+
# pe_imag = sin, pe_real = cos
|
|
319
|
+
d2 = Dim(2, name="complex")
|
|
320
|
+
x = rf.split_dims(x, axis=feat_dim, dims=(feat_half_dim, d2)) # [...,T,D/2,2]
|
|
321
|
+
x_real = rf.gather(x, indices=0, axis=d2)
|
|
322
|
+
x_imag = rf.gather(x, indices=1, axis=d2)
|
|
323
|
+
x_real_ = x_real * pe_real - x_imag * pe_imag
|
|
324
|
+
x_imag_ = x_real * pe_imag + x_imag * pe_real
|
|
325
|
+
x_, _ = rf.stack((x_real_, x_imag_), out_dim=d2) # [...,T,D/2,2]
|
|
326
|
+
x_, _ = rf.merge_dims(x_, dims=(feat_half_dim, d2), out_dim=feat_dim) # [...,T,D]
|
|
327
|
+
return x_
|
|
328
|
+
|
|
329
|
+
|
|
267
330
|
class RelPosSelfAttention(SelfAttentionBase):
|
|
268
331
|
"""
|
|
269
332
|
Self-attention with relative positional encoding.
|
|
@@ -836,7 +899,7 @@ def relative_positional_encoding(
|
|
|
836
899
|
return emb, out_spatial_dim
|
|
837
900
|
|
|
838
901
|
|
|
839
|
-
|
|
902
|
+
_sinusoidal_positional_encoding_cache = weakref.WeakKeyDictionary() # run ctx -> (spatial_dim, feat_dim) -> enc
|
|
840
903
|
|
|
841
904
|
|
|
842
905
|
def sinusoidal_positional_encoding(
|
|
@@ -844,6 +907,7 @@ def sinusoidal_positional_encoding(
|
|
|
844
907
|
spatial_dim: Dim,
|
|
845
908
|
feat_dim: Dim,
|
|
846
909
|
offset: Optional[Union[int, Tensor]] = None,
|
|
910
|
+
base: Union[int, float] = 1e4,
|
|
847
911
|
dtype: Optional[str] = None,
|
|
848
912
|
device: Optional[str] = None,
|
|
849
913
|
) -> Tensor:
|
|
@@ -867,8 +931,8 @@ def sinusoidal_positional_encoding(
|
|
|
867
931
|
dtype = rf.get_default_float_dtype()
|
|
868
932
|
if not device:
|
|
869
933
|
device = rf.get_default_device()
|
|
870
|
-
cache =
|
|
871
|
-
cache_key = (spatial_dim, feat_dim, offset, dtype, device)
|
|
934
|
+
cache = _sinusoidal_positional_encoding_cache.setdefault(rf.get_run_ctx(), {})
|
|
935
|
+
cache_key = (spatial_dim, feat_dim, offset, base, dtype, device)
|
|
872
936
|
if cache_key in cache:
|
|
873
937
|
return cache[cache_key]
|
|
874
938
|
import math
|
|
@@ -886,7 +950,7 @@ def sinusoidal_positional_encoding(
|
|
|
886
950
|
|
|
887
951
|
feat2_dim = feat_dim.div_left(2)
|
|
888
952
|
div_term = rf.exp(
|
|
889
|
-
rf.range_over_dim(feat2_dim, dtype=dtype, device=device) * -(math.log(
|
|
953
|
+
rf.range_over_dim(feat2_dim, dtype=dtype, device=device) * -(math.log(base) / (feat2_dim.dimension - 1))
|
|
890
954
|
)
|
|
891
955
|
arg_sin = rf.combine_bc(rf.cast(indices, dtype), "*", div_term)
|
|
892
956
|
arg_cos = arg_sin + math.pi / 2.0
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Import the parameters from the HuggingFace Llama model.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
import returnn.frontend as rf
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from transformers.models.llama.modeling_llama import LlamaAttention
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def import_params_hf_llama_att_to_rf_rotary_att(model_hf: LlamaAttention, model_rf: rf.RotaryPosCausalSelfAttention):
|
|
14
|
+
"""
|
|
15
|
+
Import the parameters from the HF attention module.
|
|
16
|
+
"""
|
|
17
|
+
import torch
|
|
18
|
+
|
|
19
|
+
assert model_hf.num_heads == model_rf.num_heads.dimension
|
|
20
|
+
assert model_hf.hidden_size == model_rf.in_dim.dimension
|
|
21
|
+
dim = model_hf.hidden_size
|
|
22
|
+
nh = model_hf.num_heads
|
|
23
|
+
hdim = dim // nh
|
|
24
|
+
|
|
25
|
+
print("HF Model:")
|
|
26
|
+
print(model_hf)
|
|
27
|
+
print("Parameters:")
|
|
28
|
+
num_params_hf = 0
|
|
29
|
+
for k, v in model_hf.named_parameters():
|
|
30
|
+
print(f"{k}: {list(v.shape)} {v.dtype}")
|
|
31
|
+
num_params_hf += v.numel()
|
|
32
|
+
print("Total number of parameters:", num_params_hf)
|
|
33
|
+
|
|
34
|
+
print("RF Model:")
|
|
35
|
+
print(model_rf)
|
|
36
|
+
print("Parameters:")
|
|
37
|
+
num_params_rf = 0
|
|
38
|
+
for k, v in model_rf.named_parameters():
|
|
39
|
+
print(f"{k}: {list(v.dims)} {v.dtype}")
|
|
40
|
+
assert isinstance(v.raw_tensor, torch.nn.Parameter)
|
|
41
|
+
num_params_rf += v.num_elements()
|
|
42
|
+
print("Total number of parameters:", num_params_rf)
|
|
43
|
+
assert num_params_rf == num_params_hf
|
|
44
|
+
|
|
45
|
+
# Torch Linear: (out,in), but RF has (in,out).
|
|
46
|
+
q = model_hf.q_proj.weight.T.reshape(dim, nh, hdim) # (in,h,out/h)
|
|
47
|
+
k = model_hf.k_proj.weight.T.reshape(dim, nh, hdim) # (in,h,out/h)
|
|
48
|
+
v = model_hf.v_proj.weight.T.reshape(dim, nh, hdim) # (in,h,out/h)
|
|
49
|
+
q = q.reshape(dim, nh, 2, hdim // 2).transpose(-1, -2).flatten(-2) # reorder complex numbers
|
|
50
|
+
k = k.reshape(dim, nh, 2, hdim // 2).transpose(-1, -2).flatten(-2) # reorder complex numbers
|
|
51
|
+
qkv = torch.cat([q, k, v], dim=2) # (in,h,out/h*3)
|
|
52
|
+
qkv = qkv.reshape(dim, 3 * dim)
|
|
53
|
+
assert model_hf.q_proj.bias is None # not implemented
|
|
54
|
+
with torch.no_grad():
|
|
55
|
+
model_rf.qkv.weight.raw_tensor.copy_(qkv)
|
|
56
|
+
model_rf.proj.weight.raw_tensor.copy_(model_hf.o_proj.weight.T)
|