returnn 1.20240830.140746__tar.gz → 1.20240903.205823__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/PKG-INFO +1 -1
- returnn-1.20240903.205823/_setup_info_generated.py +2 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_numpy_backend.py +15 -1
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/_dim_extra.py +124 -2
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/config_entry_points.py +3 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/data.py +1 -1
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn.egg-info/PKG-INFO +1 -1
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/rf_utils.py +4 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_array.py +1 -1
- returnn-1.20240903.205823/tests/test_rf_decoder_transformer.py +324 -0
- returnn-1.20240830.140746/_setup_info_generated.py +0 -2
- returnn-1.20240830.140746/tests/test_rf_decoder_transformer.py +0 -163
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/.editorconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/.gitignore +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/.gitmodules +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/.kateconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/CHANGELOG.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/CODEOWNERS +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/CONTRIBUTING.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/LICENSE +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/MANIFEST.in +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/README.rst +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/12AX.cluster_map +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-fwd.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-list-devices.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-pretrain.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-rf-pt-benchmark.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-rf.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-torch.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/demo.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/pyproject.toml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/requirements.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/__main__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/__setup__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/config.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/audio.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/basic.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/cached.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/distrib_files.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/generating.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/lm.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/map.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/meta.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/postprocessing.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/engine/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/engine/base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/engine/batch.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/forward_iface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/backend.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/backend.hpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/module.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/module.hpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/py_utils.hpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/tensor_ops.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_native/tensor_ops.hpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_random_journal.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/array_.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/attention.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/audio/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/audio/mel.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/audio/specaugment.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/build_from_dict.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/cond.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/const.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/container.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conv.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conversions/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conversions/hf_llama.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/conversions/torch_nn.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/decoder/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/decoder/transformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/device.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/dims.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/encoder/e_branchformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/graph.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/hooks.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/init.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/label_smoothing.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/linear.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/loop.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/loss.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/math_.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/module.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/parametrizations.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/parametrize.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/piecewise_linear.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/rand.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/rec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/signal.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/state.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/stepwise_scheduler.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/frontend/types.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/import_/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/import_/common.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/import_/git.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/import_/import_.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/log.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/native_op.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/native_op.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/pretrain.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/cache.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/control.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/sprint/interface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/dim.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tensor/utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/compat.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/distributed.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/engine.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/loop.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/masked_computation.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/horovod.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/native_op.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/network.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/sprint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/updater.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/extern_data.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/queued_data_iter.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/distributed.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/engine.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/frontend/raw_ops.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/updater.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/array_.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/diagnose_gpu.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/gradient_checkpoint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/torch/util/scaled_gradient.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/__init__.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/basic.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/bpe.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/debug.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/file_cache.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/fsa.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/math.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/native_code_compiler.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/pprint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/py-to-pickle.cpp +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/py_compat.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/py_ext_mod_compiler.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/result_with_reason.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/task_system.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/train_proc_manager.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn/util/watch_memory.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn.egg-info/SOURCES.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/rnn.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/setup.cfg +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/setup.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/DummySprintExec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/_setup_test_env.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/lint_common.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/pylint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/spelling.dic +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Config.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Fsa.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Log.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Pretrain.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_ResNet.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFEngine.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TFUtil.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_Util.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_demos.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_fork_exec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_attention.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_base.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_cond.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_const.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_container.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_conv.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_gradient.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_label_smoothing.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_loop.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_math.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_piecewise_linear.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_rec.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_rf_signal.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_tensor.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_tools.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_engine.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/test_torch_util.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tests/torch_utils.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/collect-words.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/compile_native_op.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-forward.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-network-json.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/dump-pickle.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/get-attention-weights.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/hdf_dump.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/torch_avg_checkpoints.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/torch_export_to_onnx.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/torch_inspect_checkpoint.py +0 -0
- {returnn-1.20240830.140746 → returnn-1.20240903.205823}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
|
@@ -153,7 +153,10 @@ class NumpyBackend(Backend[numpy.ndarray]):
|
|
|
153
153
|
op = NumpyBackend._CombineKindMap.get(kind)
|
|
154
154
|
if not op:
|
|
155
155
|
raise ValueError(f"RF NumpyBackend: combine kind {kind!r} not supported")
|
|
156
|
-
|
|
156
|
+
res = op(a, b)
|
|
157
|
+
if not isinstance(res, numpy.ndarray):
|
|
158
|
+
res = numpy.array(res)
|
|
159
|
+
return res
|
|
157
160
|
|
|
158
161
|
@staticmethod
|
|
159
162
|
def range_over_dim(dim: Dim, *, dtype: Optional[str] = None, device: Optional[str] = None) -> Tensor[numpy.ndarray]:
|
|
@@ -211,3 +214,14 @@ class NumpyBackend(Backend[numpy.ndarray]):
|
|
|
211
214
|
sparse_dim=source.sparse_dim,
|
|
212
215
|
)
|
|
213
216
|
return res
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def activation_raw(raw_tensor: numpy.ndarray, func: str) -> numpy.ndarray:
|
|
220
|
+
"""
|
|
221
|
+
:param raw_tensor:
|
|
222
|
+
:param func: "tanh", "sigmoid", "relu", ...
|
|
223
|
+
:return: raw tensor with elementwise activation applied
|
|
224
|
+
"""
|
|
225
|
+
if func == "relu":
|
|
226
|
+
return numpy.array(numpy.maximum(raw_tensor, 0))
|
|
227
|
+
raise NotImplementedError("NumpyBackend: activation %r not implemented" % func)
|
|
@@ -4,8 +4,9 @@ or just rarely used attribs, such that we can save memory for the common case.
|
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
|
-
from typing import TYPE_CHECKING, Optional, Union, Any, Tuple, Sequence, Dict, List, Set, Callable
|
|
7
|
+
from typing import TYPE_CHECKING, Optional, Union, Any, Tuple, Sequence, MutableMapping, Dict, List, Set, Callable
|
|
8
8
|
import operator
|
|
9
|
+
import weakref
|
|
9
10
|
|
|
10
11
|
from returnn.util.basic import Entity
|
|
11
12
|
from returnn.util import basic as util
|
|
@@ -118,7 +119,7 @@ class _DimExtra:
|
|
|
118
119
|
self.same_for_batch_ctx = {} # type: Dict[Tuple[BatchInfo,Optional[ControlFlowContext]],_d.Dim]
|
|
119
120
|
self.cache_dyn_size_ext_dev = {} # type: Dict[str,_t.Tensor] # device -> dyn_size_ext
|
|
120
121
|
self.cache_seq_mask: Dict[Tuple[str, Optional[Tuple[Dim, ...]]], _t.Tensor] = {} # (dev,dim_order) -> seq_mask
|
|
121
|
-
self.cache_dim_math
|
|
122
|
+
self.cache_dim_math = _CacheDimMath() # op (add,sub,...), operand -> Dim
|
|
122
123
|
|
|
123
124
|
def __getstate__(self):
|
|
124
125
|
d = vars(self).copy()
|
|
@@ -389,6 +390,10 @@ class _DimMixin:
|
|
|
389
390
|
if dim_extra:
|
|
390
391
|
# Any dims via dim math could also contain raw tensors,
|
|
391
392
|
# so iterate through them.
|
|
393
|
+
if dim.dyn_size_ext is not None or dim.dimension is None:
|
|
394
|
+
dim_extra.cache_dim_math.clear()
|
|
395
|
+
else:
|
|
396
|
+
dim_extra.cache_dim_math.clear_dynamic()
|
|
392
397
|
queue += dim_extra.cache_dim_math.values()
|
|
393
398
|
if dim_extra.same_as:
|
|
394
399
|
queue.append(dim_extra.same_as)
|
|
@@ -2873,6 +2878,123 @@ def dim_cmp_value(obj):
|
|
|
2873
2878
|
return obj
|
|
2874
2879
|
|
|
2875
2880
|
|
|
2881
|
+
class _CacheDimMath:
|
|
2882
|
+
"""op (add,sub,...), operand -> Dim"""
|
|
2883
|
+
|
|
2884
|
+
class _OperandCache:
|
|
2885
|
+
def __init__(self):
|
|
2886
|
+
self.dims: MutableMapping[Dim, Dim] = weakref.WeakKeyDictionary()
|
|
2887
|
+
self.statics: Dict[int, Dim] = {}
|
|
2888
|
+
|
|
2889
|
+
def __init__(self):
|
|
2890
|
+
self._ops: Dict[str, _CacheDimMath._OperandCache] = {}
|
|
2891
|
+
|
|
2892
|
+
def __repr__(self):
|
|
2893
|
+
return "_CacheDimMath({%s})" % ", ".join("%r: %r" % (k, v) for k, v in self.items())
|
|
2894
|
+
|
|
2895
|
+
def _get_op_dict(self, __key: Tuple[str, Union[Dim, int]]) -> _OperandCache:
|
|
2896
|
+
if __key[0] in self._ops:
|
|
2897
|
+
return self._ops[__key[0]]
|
|
2898
|
+
else:
|
|
2899
|
+
op_dict = self._OperandCache()
|
|
2900
|
+
self._ops[__key[0]] = op_dict
|
|
2901
|
+
return op_dict
|
|
2902
|
+
|
|
2903
|
+
def __setitem__(self, __key: Tuple[str, Union[Dim, int]], __value: Dim):
|
|
2904
|
+
op_dict = self._get_op_dict(__key)
|
|
2905
|
+
if isinstance(__key[1], int):
|
|
2906
|
+
value_dict = op_dict.statics
|
|
2907
|
+
else:
|
|
2908
|
+
value_dict = op_dict.dims
|
|
2909
|
+
if __key[1] in value_dict:
|
|
2910
|
+
value_dict[__key[1]] = __value
|
|
2911
|
+
return
|
|
2912
|
+
if len(value_dict) >= 5:
|
|
2913
|
+
# Just to avoid memory leaks.
|
|
2914
|
+
value_dict.clear()
|
|
2915
|
+
value_dict[__key[1]] = __value
|
|
2916
|
+
|
|
2917
|
+
def __delitem__(self, __key: Tuple[str, Union[Dim, int]]):
|
|
2918
|
+
op_dict = self._ops[__key[0]]
|
|
2919
|
+
if isinstance(__key[1], int):
|
|
2920
|
+
del op_dict.statics[__key[1]]
|
|
2921
|
+
else:
|
|
2922
|
+
del op_dict.dims[__key[1]]
|
|
2923
|
+
|
|
2924
|
+
def __getitem__(self, __key: Tuple[str, Union[Dim, int]]) -> Dim:
|
|
2925
|
+
op_dict = self._ops[__key[0]]
|
|
2926
|
+
if isinstance(__key[1], int):
|
|
2927
|
+
return op_dict.statics[__key[1]]
|
|
2928
|
+
else:
|
|
2929
|
+
return op_dict.dims[__key[1]]
|
|
2930
|
+
|
|
2931
|
+
def __contains__(self, __key: Tuple[str, Union[Dim, int]]) -> bool:
|
|
2932
|
+
op_dict = self._ops.get(__key[0])
|
|
2933
|
+
if not op_dict:
|
|
2934
|
+
return False
|
|
2935
|
+
if isinstance(__key[1], int):
|
|
2936
|
+
return __key[1] in op_dict.statics
|
|
2937
|
+
else:
|
|
2938
|
+
return __key[1] in op_dict.dims
|
|
2939
|
+
|
|
2940
|
+
def get(self, __key: Tuple[str, Union[Dim, int]], default: Optional[Dim] = None) -> Optional[Dim]:
|
|
2941
|
+
"""get"""
|
|
2942
|
+
op_dict = self._ops.get(__key[0])
|
|
2943
|
+
if not op_dict:
|
|
2944
|
+
return default
|
|
2945
|
+
if isinstance(__key[1], int):
|
|
2946
|
+
return op_dict.statics.get(__key[1], default)
|
|
2947
|
+
else:
|
|
2948
|
+
return op_dict.dims.get(__key[1], default)
|
|
2949
|
+
|
|
2950
|
+
def setdefault(self, __key: Tuple[str, Union[Dim, int]], __value: Dim):
|
|
2951
|
+
"""setdefault"""
|
|
2952
|
+
existing = self.get(__key)
|
|
2953
|
+
if existing is not None:
|
|
2954
|
+
return existing
|
|
2955
|
+
self[__key] = __value
|
|
2956
|
+
return __value
|
|
2957
|
+
|
|
2958
|
+
def clear(self):
|
|
2959
|
+
"""clear"""
|
|
2960
|
+
self._ops.clear()
|
|
2961
|
+
|
|
2962
|
+
def clear_dynamic(self):
|
|
2963
|
+
"""clear dynamic part"""
|
|
2964
|
+
for op_dict in self._ops.values():
|
|
2965
|
+
for k, v in list(op_dict.dims.items()):
|
|
2966
|
+
if v.dyn_size_ext is not None or v.dimension is None:
|
|
2967
|
+
del op_dict.dims[k]
|
|
2968
|
+
|
|
2969
|
+
def __len__(self):
|
|
2970
|
+
count = 0
|
|
2971
|
+
for op_dict in self._ops.values():
|
|
2972
|
+
count += len(op_dict.statics)
|
|
2973
|
+
count += len(op_dict.dims)
|
|
2974
|
+
return count
|
|
2975
|
+
|
|
2976
|
+
def items(self):
|
|
2977
|
+
"""items"""
|
|
2978
|
+
for op_name, op_dict in self._ops.items():
|
|
2979
|
+
for key, value in op_dict.statics.items():
|
|
2980
|
+
yield (op_name, key), value
|
|
2981
|
+
for key, value in op_dict.dims.items():
|
|
2982
|
+
yield (op_name, key), value
|
|
2983
|
+
|
|
2984
|
+
def keys(self):
|
|
2985
|
+
"""keys"""
|
|
2986
|
+
for k, v in self.items():
|
|
2987
|
+
yield k
|
|
2988
|
+
|
|
2989
|
+
def values(self):
|
|
2990
|
+
"""values"""
|
|
2991
|
+
for k, v in self.items():
|
|
2992
|
+
yield v
|
|
2993
|
+
|
|
2994
|
+
def __iter__(self):
|
|
2995
|
+
yield from self.keys()
|
|
2996
|
+
|
|
2997
|
+
|
|
2876
2998
|
def _behavior_version_reset_callback():
|
|
2877
2999
|
# Reset things we did in _handle_new_min_version.
|
|
2878
3000
|
_DimMixin._SimpleEquality = False
|
|
@@ -118,6 +118,9 @@ def get_net_dict(
|
|
|
118
118
|
# but now the TF engine actually wants to have Tensor[tf.Tensor].
|
|
119
119
|
# Reset it now. The TF engine should redefine it again.
|
|
120
120
|
elem.reset_batch_and_raw()
|
|
121
|
+
elif isinstance(elem, set):
|
|
122
|
+
# map_structure does not recurse into sets.
|
|
123
|
+
nest.map_structure(_cleanup_net_dict_value, sorted(list(elem)))
|
|
121
124
|
return elem
|
|
122
125
|
|
|
123
126
|
# Do some cleanup.
|
|
@@ -339,7 +339,7 @@ class BatchInfo:
|
|
|
339
339
|
|
|
340
340
|
# Ok, need to extend.
|
|
341
341
|
global_batch_dims = [dim for dim in all_virtual_dims if isinstance(dim, BatchInfo.GlobalBatchDim)]
|
|
342
|
-
assert len(global_batch_dims) == 1
|
|
342
|
+
assert len(global_batch_dims) == 1, f"got global_batch_dims={global_batch_dims!r}"
|
|
343
343
|
global_batch_dim = global_batch_dims[0]
|
|
344
344
|
assert base.virtual_dims == [global_batch_dim]
|
|
345
345
|
beams = [dim for dim in all_virtual_dims if isinstance(dim, BatchInfo.BeamDim)]
|
|
@@ -185,6 +185,9 @@ def _run_model_net_dict_tf(
|
|
|
185
185
|
|
|
186
186
|
from returnn.tf.frontend_layers.config_entry_points import get_net_dict
|
|
187
187
|
|
|
188
|
+
# noinspection PyProtectedMember
|
|
189
|
+
from returnn.frontend import _backend
|
|
190
|
+
|
|
188
191
|
config = Config(
|
|
189
192
|
{
|
|
190
193
|
"debug_runtime_sanity_checks": True,
|
|
@@ -203,6 +206,7 @@ def _run_model_net_dict_tf(
|
|
|
203
206
|
outputs_layers = rf.get_run_ctx().outputs
|
|
204
207
|
print("*** outputs:", outputs_layers)
|
|
205
208
|
|
|
209
|
+
_backend.select_backend_tf()
|
|
206
210
|
net = TFNetwork(config=config, train_flag=False)
|
|
207
211
|
net.construct_from_dict(net_dict)
|
|
208
212
|
|
|
@@ -238,7 +238,7 @@ def test_pad_time_right():
|
|
|
238
238
|
assert data_.dims == (batch_dim, time_dim, in_dim)
|
|
239
239
|
new_time_dim = out_.dims[1]
|
|
240
240
|
assert out_.dims == (batch_dim, new_time_dim, in_dim) and new_time_dim != time_dim
|
|
241
|
-
assert new_time_dim == time_dim + 1 # math dim... not really necessary check here...
|
|
241
|
+
# assert new_time_dim == time_dim + 1 # math dim... not really necessary check here...
|
|
242
242
|
assert time_dim.dyn_size_ext.dims == new_time_dim.dyn_size_ext.dims == (batch_dim,)
|
|
243
243
|
batch_size = batch_dim.get_dim_value()
|
|
244
244
|
assert batch_size > 1
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Testing returnn.frontend.decoder.transformer.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import _setup_test_env # noqa
|
|
8
|
+
import sys
|
|
9
|
+
import unittest
|
|
10
|
+
import torch
|
|
11
|
+
from returnn.util import better_exchook
|
|
12
|
+
from returnn.util.debug import PyTracer, check_py_traces_rf_to_pt_equal
|
|
13
|
+
from returnn.tensor import Tensor, Dim
|
|
14
|
+
import returnn.frontend as rf
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _setup():
|
|
18
|
+
try:
|
|
19
|
+
import lovely_tensors
|
|
20
|
+
|
|
21
|
+
lovely_tensors.monkey_patch()
|
|
22
|
+
except ImportError:
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_setup()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_llama():
|
|
30
|
+
"""
|
|
31
|
+
Test that we can reproduce the Llama model.
|
|
32
|
+
|
|
33
|
+
This here is the final complete test.
|
|
34
|
+
There are several other sub-tests:
|
|
35
|
+
|
|
36
|
+
- :func:`test_rotary_embedding`
|
|
37
|
+
- :func:`test_rope_causal_self_att`
|
|
38
|
+
|
|
39
|
+
Some references for the whole Llama model:
|
|
40
|
+
https://github.com/meta-llama/llama/blob/main/llama/model.py
|
|
41
|
+
https://github.com/meta-llama/llama3/blob/main/llama/model.py
|
|
42
|
+
https://github.com/karpathy/llama2.c/blob/master/model.py
|
|
43
|
+
https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
|
|
44
|
+
https://github.com/hkproj/pytorch-llama/blob/main/model.py
|
|
45
|
+
https://github.com/likejazz/llama3.np/blob/main/llama3.py
|
|
46
|
+
"""
|
|
47
|
+
from returnn.frontend.decoder.transformer import TransformerDecoder, TransformerDecoderLayer, FeedForwardGated
|
|
48
|
+
from returnn.frontend.conversions.hf_llama import import_params_hf_llama_to_rf_transformer_decoder
|
|
49
|
+
from transformers.models.llama.modeling_llama import LlamaDecoderLayer, LlamaForCausalLM, LlamaModel, LlamaConfig
|
|
50
|
+
|
|
51
|
+
config = LlamaConfig(
|
|
52
|
+
vocab_size=11,
|
|
53
|
+
hidden_size=64,
|
|
54
|
+
intermediate_size=64 * 4,
|
|
55
|
+
num_hidden_layers=2,
|
|
56
|
+
num_attention_heads=2,
|
|
57
|
+
max_position_embeddings=128,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
model_hf = LlamaForCausalLM(config)
|
|
61
|
+
print("HF Model:")
|
|
62
|
+
print(model_hf)
|
|
63
|
+
print("Parameters:")
|
|
64
|
+
num_params = 0
|
|
65
|
+
for k, v in model_hf.named_parameters():
|
|
66
|
+
print(f"{k}: {list(v.shape)} {v.dtype}")
|
|
67
|
+
num_params += v.numel()
|
|
68
|
+
print("Total number of parameters:", num_params)
|
|
69
|
+
|
|
70
|
+
rf.select_backend_torch()
|
|
71
|
+
|
|
72
|
+
model_dim = Dim(config.hidden_size, name="model")
|
|
73
|
+
model_rf = TransformerDecoder(
|
|
74
|
+
encoder_dim=None,
|
|
75
|
+
vocab_dim=Dim(config.vocab_size, name="vocab"),
|
|
76
|
+
model_dim=model_dim,
|
|
77
|
+
num_layers=config.num_hidden_layers,
|
|
78
|
+
pos_enc=None,
|
|
79
|
+
norm=rf.RMSNorm,
|
|
80
|
+
ff=FeedForwardGated,
|
|
81
|
+
share_embedding=False,
|
|
82
|
+
input_embedding_scale=1.0,
|
|
83
|
+
decoder_layer_opts=dict(self_att=rf.RotaryPosCausalSelfAttention, self_att_opts=dict(with_bias=False)),
|
|
84
|
+
num_heads=config.num_attention_heads,
|
|
85
|
+
dropout=0,
|
|
86
|
+
att_dropout=0,
|
|
87
|
+
)
|
|
88
|
+
print("RF Model:")
|
|
89
|
+
print(model_rf)
|
|
90
|
+
print("Parameters:")
|
|
91
|
+
num_params = 0
|
|
92
|
+
for k, v in model_rf.named_parameters():
|
|
93
|
+
print(f"{k}: {list(v.dims)} {v.dtype}")
|
|
94
|
+
num_params += v.num_elements()
|
|
95
|
+
print("Total number of parameters:", num_params)
|
|
96
|
+
|
|
97
|
+
import_params_hf_llama_to_rf_transformer_decoder(model_hf, model_rf)
|
|
98
|
+
|
|
99
|
+
batch_dim = Dim(3, name="batch")
|
|
100
|
+
seq_dim = Dim(rf.random_uniform([batch_dim], minval=7, maxval=13, dtype="int32"), name="seq")
|
|
101
|
+
in_ = rf.random_uniform([batch_dim, seq_dim], sparse_dim=model_rf.vocab_dim)
|
|
102
|
+
in_.name = "input_labels"
|
|
103
|
+
|
|
104
|
+
with PyTracer([TransformerDecoder.__call__, TransformerDecoderLayer.__call__], Tensor) as trace_rf:
|
|
105
|
+
out_rf, _ = model_rf(in_, spatial_dim=seq_dim, state=model_rf.default_initial_state(batch_dims=[batch_dim]))
|
|
106
|
+
|
|
107
|
+
mask = rf.sequence_mask([batch_dim, seq_dim])
|
|
108
|
+
with PyTracer([LlamaForCausalLM.forward, LlamaModel.forward, LlamaDecoderLayer.forward], torch.Tensor) as trace_hf:
|
|
109
|
+
out_hf = model_hf(in_.raw_tensor, attention_mask=mask.raw_tensor)
|
|
110
|
+
|
|
111
|
+
check_py_traces_rf_to_pt_equal(
|
|
112
|
+
trace_rf.captured_locals,
|
|
113
|
+
trace_hf.captured_locals,
|
|
114
|
+
[
|
|
115
|
+
(
|
|
116
|
+
(TransformerDecoder.__call__, 0, "decoded", 0),
|
|
117
|
+
(LlamaModel.forward, 0, "inputs_embeds", 0),
|
|
118
|
+
(batch_dim, seq_dim, model_dim),
|
|
119
|
+
),
|
|
120
|
+
],
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
print("Check...")
|
|
124
|
+
assert out_rf.raw_tensor.shape == out_hf.logits.shape
|
|
125
|
+
torch.testing.assert_allclose(out_rf.raw_tensor, out_hf.logits)
|
|
126
|
+
print(" all matched!")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_feed_forward_gated():
|
|
130
|
+
from returnn.frontend.decoder.transformer import FeedForwardGated
|
|
131
|
+
from returnn.frontend.conversions.hf_llama import import_params_hf_llama_mlp_to_rf_feed_forward_gated
|
|
132
|
+
from transformers.models.llama.modeling_llama import LlamaMLP, LlamaConfig
|
|
133
|
+
|
|
134
|
+
config = LlamaConfig(
|
|
135
|
+
vocab_size=11,
|
|
136
|
+
hidden_size=64,
|
|
137
|
+
intermediate_size=64 * 4,
|
|
138
|
+
num_hidden_layers=2,
|
|
139
|
+
num_attention_heads=2,
|
|
140
|
+
max_position_embeddings=128,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
model_hf = LlamaMLP(config)
|
|
144
|
+
|
|
145
|
+
rf.select_backend_torch()
|
|
146
|
+
rf.set_random_seed(42)
|
|
147
|
+
|
|
148
|
+
model_dim = Dim(config.hidden_size, name="model")
|
|
149
|
+
model_rf = FeedForwardGated(out_dim=model_dim, ff_dim=Dim(config.intermediate_size, name="inter"), dropout=0.0)
|
|
150
|
+
|
|
151
|
+
import_params_hf_llama_mlp_to_rf_feed_forward_gated(model_hf, model_rf)
|
|
152
|
+
|
|
153
|
+
batch_dim = Dim(3, name="batch")
|
|
154
|
+
seq_dim = Dim(rf.random_uniform([batch_dim], minval=7, maxval=13, dtype="int32"), name="seq")
|
|
155
|
+
in_ = rf.random_uniform([batch_dim, seq_dim, model_dim])
|
|
156
|
+
in_.name = "input"
|
|
157
|
+
|
|
158
|
+
out_rf = model_rf(in_)
|
|
159
|
+
out_rf = out_rf.copy_transpose((batch_dim, seq_dim, model_dim))
|
|
160
|
+
|
|
161
|
+
out_hf = model_hf(in_.raw_tensor)
|
|
162
|
+
|
|
163
|
+
print("Check...")
|
|
164
|
+
assert out_rf.raw_tensor.shape == out_hf.shape
|
|
165
|
+
torch.testing.assert_allclose(out_rf.raw_tensor, out_hf)
|
|
166
|
+
print(" all matched!")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def test_transformer_rel_pos_att():
|
|
170
|
+
"""
|
|
171
|
+
This tests that TransformerDecoder together with RelPosCausalSelfAttention
|
|
172
|
+
and FeedForwardGated works in a reasonable standard setup.
|
|
173
|
+
Works = does not cause exceptions.
|
|
174
|
+
|
|
175
|
+
Additionally, we test an issue that dim tags seems to be leaking.
|
|
176
|
+
"""
|
|
177
|
+
from returnn.tensor import TensorDict, batch_dim
|
|
178
|
+
from returnn.frontend.decoder.transformer import TransformerDecoder, FeedForwardGated
|
|
179
|
+
from returnn.datasets.util.vocabulary import Vocabulary
|
|
180
|
+
from returnn.torch.data.extern_data import raw_dict_to_extern_data
|
|
181
|
+
|
|
182
|
+
rf.select_backend_torch()
|
|
183
|
+
|
|
184
|
+
vocab = Vocabulary.create_vocab_from_labels(
|
|
185
|
+
[" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"], eos_label=0, bos_label=0
|
|
186
|
+
)
|
|
187
|
+
assert vocab.bos_label_id is not None and vocab.eos_label_id is not None
|
|
188
|
+
vocab_dim = Dim(vocab.num_labels, name="vocab", vocab=vocab)
|
|
189
|
+
|
|
190
|
+
model_def = rf.build_dict(
|
|
191
|
+
TransformerDecoder,
|
|
192
|
+
encoder_dim=None,
|
|
193
|
+
num_layers=2, # with higher number of layers, probelm occurs more, but also with 2
|
|
194
|
+
model_dim=20,
|
|
195
|
+
num_heads=2,
|
|
196
|
+
pos_enc=None,
|
|
197
|
+
norm=rf.build_dict(rf.RMSNorm),
|
|
198
|
+
ff=rf.build_dict(FeedForwardGated),
|
|
199
|
+
decoder_layer_opts=dict(self_att=rf.build_dict(rf.RelPosCausalSelfAttention, with_bias=False)),
|
|
200
|
+
dropout=0.0,
|
|
201
|
+
att_dropout=0.0,
|
|
202
|
+
)
|
|
203
|
+
model = rf.build_from_dict(model_def, vocab_dim=vocab_dim)
|
|
204
|
+
assert isinstance(model, TransformerDecoder)
|
|
205
|
+
|
|
206
|
+
leakages = []
|
|
207
|
+
|
|
208
|
+
# Adapted from Dim reset_raw.
|
|
209
|
+
def _num_referenced_dim_tags(self: Dim) -> int:
|
|
210
|
+
visited = set() # ids
|
|
211
|
+
queue = [self]
|
|
212
|
+
while queue:
|
|
213
|
+
# noinspection PyShadowingNames
|
|
214
|
+
dim: Dim = queue.pop()
|
|
215
|
+
if id(dim) in visited:
|
|
216
|
+
continue
|
|
217
|
+
visited.add(id(dim))
|
|
218
|
+
# noinspection PyProtectedMember
|
|
219
|
+
dim_extra = dim._extra
|
|
220
|
+
if dim_extra:
|
|
221
|
+
# Any dims via dim math could also contain raw tensors,
|
|
222
|
+
# so iterate through them.
|
|
223
|
+
print("Dim:", dim)
|
|
224
|
+
print(" cache_dim_math:", dim_extra.cache_dim_math)
|
|
225
|
+
print(" same_as:", dim_extra.same_as)
|
|
226
|
+
print(" copy_same_as:", dim_extra.copy_same_as)
|
|
227
|
+
print(" same_for_batch_ctx:", dim_extra.same_for_batch_ctx)
|
|
228
|
+
queue += dim_extra.cache_dim_math.values()
|
|
229
|
+
if dim_extra.same_as:
|
|
230
|
+
queue.append(dim_extra.same_as)
|
|
231
|
+
if dim_extra.copy_same_as:
|
|
232
|
+
queue.append(dim_extra.copy_same_as)
|
|
233
|
+
queue += dim_extra.same_for_batch_ctx.values()
|
|
234
|
+
print(f"{self} _num_referenced_dim_tags (reset_raw), visited {len(visited)}")
|
|
235
|
+
return len(visited)
|
|
236
|
+
|
|
237
|
+
time_dim = Dim(None, name="time")
|
|
238
|
+
extern_data_template = TensorDict([Tensor("data", (batch_dim, time_dim), "int32", sparse_dim=vocab_dim)])
|
|
239
|
+
|
|
240
|
+
prev_step_num_tags = 0
|
|
241
|
+
for step in range(10):
|
|
242
|
+
print("Step:", step)
|
|
243
|
+
rf.init_train_step_run_ctx(train_flag=False, step=step)
|
|
244
|
+
|
|
245
|
+
# Check that we don't have any dim tags leaking.
|
|
246
|
+
# Do that right after init_train_step_run_ctx, because that might clean some previous caches.
|
|
247
|
+
step_num_tags = _num_referenced_dim_tags(time_dim)
|
|
248
|
+
if step > 1 and step_num_tags > prev_step_num_tags:
|
|
249
|
+
leakages.append(step_num_tags - prev_step_num_tags)
|
|
250
|
+
prev_step_num_tags = step_num_tags
|
|
251
|
+
|
|
252
|
+
seq_lens = torch.randint(5, 11, (3,), dtype=torch.int32)
|
|
253
|
+
extern_data = raw_dict_to_extern_data(
|
|
254
|
+
{"data": torch.randint(0, vocab_dim.dimension, (3, seq_lens.max())), "data:seq_len": seq_lens},
|
|
255
|
+
extern_data_template=extern_data_template,
|
|
256
|
+
device="cpu",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
targets = extern_data["data"]
|
|
260
|
+
targets_spatial_dim = time_dim
|
|
261
|
+
input_labels, (targets_w_eos_spatial_dim,) = rf.pad(
|
|
262
|
+
targets, axes=[targets_spatial_dim], padding=[(1, 0)], value=vocab.bos_label_id
|
|
263
|
+
)
|
|
264
|
+
targets_w_eos, _ = rf.pad(
|
|
265
|
+
targets,
|
|
266
|
+
axes=[targets_spatial_dim],
|
|
267
|
+
padding=[(0, 1)],
|
|
268
|
+
value=vocab.eos_label_id,
|
|
269
|
+
out_dims=[targets_w_eos_spatial_dim],
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
batch_dims = [batch_dim]
|
|
273
|
+
|
|
274
|
+
# Gradients not relevant for this test.
|
|
275
|
+
with torch.no_grad():
|
|
276
|
+
logits, _ = model(
|
|
277
|
+
input_labels,
|
|
278
|
+
spatial_dim=targets_w_eos_spatial_dim,
|
|
279
|
+
encoder=None,
|
|
280
|
+
state=model.default_initial_state(batch_dims=batch_dims),
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
logits_packed, pack_dim = rf.pack_padded(
|
|
284
|
+
logits, dims=batch_dims + [targets_w_eos_spatial_dim], enforce_sorted=False
|
|
285
|
+
)
|
|
286
|
+
targets_packed, _ = rf.pack_padded(
|
|
287
|
+
targets_w_eos, dims=batch_dims + [targets_w_eos_spatial_dim], enforce_sorted=False, out_dim=pack_dim
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
log_prob = rf.log_softmax(logits_packed, axis=model.vocab_dim)
|
|
291
|
+
# log_prob = rf.label_smoothed_log_prob_gradient(log_prob, 0.1, axis=model.target_dim)
|
|
292
|
+
loss = rf.cross_entropy(
|
|
293
|
+
target=targets_packed, estimated=log_prob, estimated_type="log-probs", axis=model.vocab_dim
|
|
294
|
+
)
|
|
295
|
+
loss.mark_as_loss("ce", use_normalized_loss=True)
|
|
296
|
+
|
|
297
|
+
best = rf.reduce_argmax(logits_packed, axis=model.vocab_dim)
|
|
298
|
+
frame_error = best != targets_packed
|
|
299
|
+
frame_error.mark_as_loss(name="fer", as_error=True)
|
|
300
|
+
|
|
301
|
+
assert not leakages, f"Leakages: {leakages}"
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
if __name__ == "__main__":
|
|
305
|
+
better_exchook.install()
|
|
306
|
+
if len(sys.argv) <= 1:
|
|
307
|
+
for k, v in sorted(globals().items()):
|
|
308
|
+
if k.startswith("test_"):
|
|
309
|
+
print("-" * 40)
|
|
310
|
+
print("Executing: %s" % k)
|
|
311
|
+
try:
|
|
312
|
+
v()
|
|
313
|
+
except unittest.SkipTest as exc:
|
|
314
|
+
print("SkipTest:", exc)
|
|
315
|
+
print("-" * 40)
|
|
316
|
+
print("Finished all tests.")
|
|
317
|
+
else:
|
|
318
|
+
assert len(sys.argv) >= 2
|
|
319
|
+
for arg in sys.argv[1:]:
|
|
320
|
+
print("Executing: %s" % arg)
|
|
321
|
+
if arg in globals():
|
|
322
|
+
globals()[arg]() # assume function and execute
|
|
323
|
+
else:
|
|
324
|
+
eval(arg) # assume Python code and execute
|