returnn 1.20251013.131953__tar.gz → 1.20251106.185107__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- {returnn-1.20251013.131953/returnn.egg-info → returnn-1.20251106.185107}/PKG-INFO +1 -1
- returnn-1.20251106.185107/_setup_info_generated.py +2 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/config.py +1 -1
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/distrib_files.py +53 -1
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/generating.py +3 -5
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/lm.py +20 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/meta.py +92 -23
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/util/vocabulary.py +90 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/attention.py +1 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/encoder/conformer.py +1 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/module.py +8 -1
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/nested.py +5 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/_dim_extra.py +39 -24
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/engine.py +37 -3
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/basic.py +3 -1
- returnn-1.20251106.185107/returnn/util/collect_outputs_dict.py +79 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/file_cache.py +15 -1
- {returnn-1.20251013.131953 → returnn-1.20251106.185107/returnn.egg-info}/PKG-INFO +1 -1
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn.egg-info/SOURCES.txt +1 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_Dataset.py +83 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/torch_scale_tuning.py +1 -1
- returnn-1.20251013.131953/_setup_info_generated.py +0 -2
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/.editorconfig +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/.gitignore +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/.gitmodules +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/.kateconfig +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/CHANGELOG.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/CODEOWNERS +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/CONTRIBUTING.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/LICENSE +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/MANIFEST.in +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/README.rst +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/12AX.cluster_map +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-fwd.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-list-devices.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-pretrain.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-rf-pt-benchmark.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-rf.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-torch.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/demo.sh +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/pyproject.toml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/requirements.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/__main__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/__setup__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/audio.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/basic.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/cached.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/huggingface.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/map.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/postprocessing.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/text_dict.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/engine/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/engine/base.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/engine/batch.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/forward_iface.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_cache.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_native/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_native/backend.cpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_native/backend.hpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_native/module.cpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_native/module.hpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_native/py_utils.hpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_native/tensor_ops.cpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_native/tensor_ops.hpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_numpy_backend.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_random_journal.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/array_.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/audio/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/audio/mel.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/audio/specaugment.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/backend.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/build_from_dict.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/cond.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/const.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/container.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/conv.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/conversions/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/conversions/hf_llama.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/conversions/torch_nn.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/decoder/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/decoder/transformer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/device.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/dims.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/encoder/conformer_v2.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/encoder/e_branchformer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/encoder/transformer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/graph.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/hooks.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/init.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/label_smoothing.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/linear.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/loop.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/loss.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/math_.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/parametrizations.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/parametrize.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/piecewise_linear.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/rand.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/rec.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/signal.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/state.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/stepwise_scheduler.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/types.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/import_/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/import_/common.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/import_/git.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/import_/import_.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/log.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/native_op.cpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/native_op.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/pretrain.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/sprint/cache.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/sprint/control.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/sprint/interface.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/dim.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tensor/utils.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/compat.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/distributed.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/engine.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/loop.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/masked_computation.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/horovod.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/native_op.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/network.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/sprint.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/updater.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/util/data.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/data/extern_data.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/data/queued_data_iter.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/distributed.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/frontend/raw_ops.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/optim/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/optim/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/optim/lion.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/updater.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/array_.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/debug_inf_nan.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/diagnose_gpu.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/exception_helper.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/gradient_checkpoint.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/module.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/torch/util/scaled_gradient.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/__init__.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/bpe.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/debug.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/fsa.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/lru_cache.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/math.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/native_code_compiler.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/pprint.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/py-to-pickle.cpp +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/py_ext_mod_compiler.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/result_with_reason.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/task_system.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/train_proc_manager.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/util/watch_memory.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn.egg-info/requires.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/rnn.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/setup.cfg +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/setup.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/DummySprintExec.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/_setup_test_env.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/lint_common.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/pylint.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/rf_utils.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/spelling.dic +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_Config.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_Fsa.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_Log.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_Pretrain.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_ResNet.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TFEngine.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TFUtil.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_Util.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_datasets_huggingface.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_demos.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_fork_exec.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_array.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_attention.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_base.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_cond.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_const.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_container.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_conv.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_decoder_transformer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_gradient.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_label_smoothing.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_loop.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_math.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_piecewise_linear.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_rec.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_rf_signal.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_tensor.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_threading.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_tools.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_torch_dataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_torch_engine.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/test_torch_util.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tests/torch_utils.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/collect-words.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/compile_native_op.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/dump-dataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/dump-forward.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/dump-network-json.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/dump-pickle.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/file-cache.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/get-attention-weights.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/hdf_dump.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/torch_avg_checkpoints.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/torch_export_to_onnx.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/torch_inspect_checkpoint.py +0 -0
- {returnn-1.20251013.131953 → returnn-1.20251106.185107}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
|
@@ -801,7 +801,7 @@ class SubProcCopyGlobalConfigPreInitFunc:
|
|
|
801
801
|
from returnn.log import log
|
|
802
802
|
from returnn import __old_mod_loader__
|
|
803
803
|
|
|
804
|
-
better_exchook.
|
|
804
|
+
better_exchook.setup_all()
|
|
805
805
|
__old_mod_loader__.disable_lazy_mod_loads()
|
|
806
806
|
|
|
807
807
|
if self.global_config:
|
|
@@ -13,7 +13,7 @@ import sys
|
|
|
13
13
|
import numpy
|
|
14
14
|
from returnn.log import log
|
|
15
15
|
from returnn.util import better_exchook
|
|
16
|
-
from returnn.util.basic import override_env_var, try_run
|
|
16
|
+
from returnn.util.basic import override_env_var, try_run, OptionalNotImplementedError
|
|
17
17
|
from returnn.util.literal_py_to_pickle import literal_eval
|
|
18
18
|
from returnn.util.multi_proc_non_daemonic_spawn import NonDaemonicSpawnContext
|
|
19
19
|
from returnn.config import SubProcCopyGlobalConfigPreInitFunc
|
|
@@ -505,6 +505,24 @@ class DistributeFilesDataset(CachedDataset2):
|
|
|
505
505
|
self._lazy_init_num_outputs()
|
|
506
506
|
return self._data_keys
|
|
507
507
|
|
|
508
|
+
def get_all_tags(self) -> List[str]:
|
|
509
|
+
"""get all tags"""
|
|
510
|
+
if self.partition_epoch > 1:
|
|
511
|
+
raise OptionalNotImplementedError(f"{self} get_all_tags not supported for partition_epoch > 1")
|
|
512
|
+
if self.epoch is None:
|
|
513
|
+
# Need to init the worker.
|
|
514
|
+
self.init_seq_order(epoch=1)
|
|
515
|
+
return self._workers[self.epoch].get_all_tags()
|
|
516
|
+
|
|
517
|
+
def get_total_num_seqs(self, *, fast: bool = False) -> int:
|
|
518
|
+
"""get total num seqs"""
|
|
519
|
+
if self.partition_epoch > 1:
|
|
520
|
+
raise OptionalNotImplementedError(f"{self} get_total_num_seqs not supported for partition_epoch > 1")
|
|
521
|
+
if self.epoch is None:
|
|
522
|
+
# Need to init the worker.
|
|
523
|
+
self.init_seq_order(epoch=1)
|
|
524
|
+
return self._workers[self.epoch].get_total_num_seqs(fast=fast)
|
|
525
|
+
|
|
508
526
|
|
|
509
527
|
def _get_key_for_file_tree(t: FileTree) -> str:
|
|
510
528
|
"""generates a deterministic key given a file tree"""
|
|
@@ -608,6 +626,26 @@ class _WorkerProcParent:
|
|
|
608
626
|
assert msg == "data_seq"
|
|
609
627
|
return data
|
|
610
628
|
|
|
629
|
+
def get_all_tags(self) -> List[str]:
|
|
630
|
+
"""get all tags"""
|
|
631
|
+
self._lazy_wait_for_init_seq_order()
|
|
632
|
+
self.parent_conn.send(("get_all_tags", {}))
|
|
633
|
+
msg, data = self.parent_conn.recv()
|
|
634
|
+
assert msg == "all_tags"
|
|
635
|
+
if isinstance(data, Exception):
|
|
636
|
+
raise data
|
|
637
|
+
return data
|
|
638
|
+
|
|
639
|
+
def get_total_num_seqs(self, **kwargs) -> int:
|
|
640
|
+
"""get total num seqs"""
|
|
641
|
+
self._lazy_wait_for_init_seq_order()
|
|
642
|
+
self.parent_conn.send(("get_total_num_seqs", kwargs))
|
|
643
|
+
msg, data = self.parent_conn.recv()
|
|
644
|
+
assert msg == "total_num_seqs"
|
|
645
|
+
if isinstance(data, Exception):
|
|
646
|
+
raise data
|
|
647
|
+
return data
|
|
648
|
+
|
|
611
649
|
def exit(self, *, join: bool = True):
|
|
612
650
|
"""exit"""
|
|
613
651
|
self._lazy_wait_for_init_seq_order()
|
|
@@ -722,6 +760,20 @@ def _worker_proc_loop(
|
|
|
722
760
|
got_init_seq_order = True
|
|
723
761
|
next_seq_idx = 0
|
|
724
762
|
cache.clear()
|
|
763
|
+
elif msg == "get_all_tags":
|
|
764
|
+
try:
|
|
765
|
+
tags = dataset.get_all_tags()
|
|
766
|
+
except Exception as exc:
|
|
767
|
+
parent_conn.send(("all_tags", exc))
|
|
768
|
+
else:
|
|
769
|
+
parent_conn.send(("all_tags", tags))
|
|
770
|
+
elif msg == "get_total_num_seqs":
|
|
771
|
+
try:
|
|
772
|
+
total_num_seqs = dataset.get_total_num_seqs(**kwargs)
|
|
773
|
+
except Exception as exc:
|
|
774
|
+
parent_conn.send(("total_num_seqs", exc))
|
|
775
|
+
else:
|
|
776
|
+
parent_conn.send(("total_num_seqs", total_num_seqs))
|
|
725
777
|
else:
|
|
726
778
|
raise Exception(f"unknown msg {msg!r}")
|
|
727
779
|
except KeyboardInterrupt: # when parent dies
|
|
@@ -1164,11 +1164,9 @@ class StaticDataset(CachedDataset2):
|
|
|
1164
1164
|
"""supports sorting"""
|
|
1165
1165
|
return True
|
|
1166
1166
|
|
|
1167
|
-
def _collect_single_seq(self, seq_idx):
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
:rtype: DatasetSeq
|
|
1171
|
-
"""
|
|
1167
|
+
def _collect_single_seq(self, seq_idx: int) -> Optional[DatasetSeq]:
|
|
1168
|
+
if seq_idx >= len(self._seq_order):
|
|
1169
|
+
return None
|
|
1172
1170
|
corpus_seq_idx = self._seq_order[seq_idx]
|
|
1173
1171
|
data = self.data[corpus_seq_idx]
|
|
1174
1172
|
return DatasetSeq(
|
|
@@ -694,6 +694,26 @@ class LmDataset(CachedDataset2):
|
|
|
694
694
|
self.next_seq_idx = seq_idx + 1
|
|
695
695
|
return DatasetSeq(seq_idx=seq_idx, features=data, targets=targets, seq_tag=seq_tag)
|
|
696
696
|
|
|
697
|
+
def finish_epoch(self, *, free_resources: bool = False):
|
|
698
|
+
"""finish epoch"""
|
|
699
|
+
super().finish_epoch(free_resources=free_resources)
|
|
700
|
+
|
|
701
|
+
if free_resources:
|
|
702
|
+
self._orths_offsets_and_lens = None
|
|
703
|
+
if self._orth_mmaps is not None:
|
|
704
|
+
for m in self._orth_mmaps:
|
|
705
|
+
if m is not None:
|
|
706
|
+
m.close()
|
|
707
|
+
self._orth_mmaps = None
|
|
708
|
+
if self._orth_files is not None:
|
|
709
|
+
for f in self._orth_files:
|
|
710
|
+
if f is not None:
|
|
711
|
+
f.close()
|
|
712
|
+
self._orth_files = None
|
|
713
|
+
|
|
714
|
+
self._seq_list = None
|
|
715
|
+
self._seq_index_by_tag = None
|
|
716
|
+
|
|
697
717
|
|
|
698
718
|
def _is_bliss(filename):
|
|
699
719
|
"""
|
|
@@ -964,7 +964,6 @@ class CombinedDataset(CachedDataset2):
|
|
|
964
964
|
self.dataset_keys = set([m[0] for m in data_map.keys()]) # type: typing.Set[str]
|
|
965
965
|
self.dataset_idx2key_map = dict(enumerate(sorted(self.dataset_keys))) # idx -> dataset-key
|
|
966
966
|
self.data_keys = set(data_map.values()) # type: typing.Set[str]
|
|
967
|
-
assert "data" in self.data_keys
|
|
968
967
|
self.target_list = sorted(self.data_keys - {"data"})
|
|
969
968
|
|
|
970
969
|
# Build target lookup table that maps from dataset_key and data_key (data key used by CombinedDataset)
|
|
@@ -994,8 +993,7 @@ class CombinedDataset(CachedDataset2):
|
|
|
994
993
|
if data_dims:
|
|
995
994
|
data_dims = convert_data_dims(data_dims)
|
|
996
995
|
self.data_dims = data_dims
|
|
997
|
-
|
|
998
|
-
for key in self.target_list:
|
|
996
|
+
for key in self.data_keys:
|
|
999
997
|
assert key in data_dims
|
|
1000
998
|
else:
|
|
1001
999
|
self.data_dims = {}
|
|
@@ -1009,7 +1007,7 @@ class CombinedDataset(CachedDataset2):
|
|
|
1009
1007
|
if dataset_data_key in dataset.labels:
|
|
1010
1008
|
self.labels[data_key] = dataset.labels[dataset_data_key]
|
|
1011
1009
|
|
|
1012
|
-
self.num_inputs = self.data_dims["data"][0]
|
|
1010
|
+
self.num_inputs = self.data_dims["data"][0] if "data" in self.data_dims else 0
|
|
1013
1011
|
self.num_outputs = self.data_dims
|
|
1014
1012
|
|
|
1015
1013
|
self.data_dtypes = {
|
|
@@ -1019,6 +1017,9 @@ class CombinedDataset(CachedDataset2):
|
|
|
1019
1017
|
|
|
1020
1018
|
self.dataset_seq_idx_boundaries: Optional[List[int]] = None
|
|
1021
1019
|
self.dataset_sorted_seq_idx_list: Optional[List[Tuple[int, int]]] = None
|
|
1020
|
+
self._sub_dataset_cur_loaded_seq_range: Optional[List[Tuple[int, int]]] = None
|
|
1021
|
+
# The usage is about the seqs already covered in dataset_sorted_seq_idx_list,
|
|
1022
|
+
# in case we dynamically build up this list.
|
|
1022
1023
|
self.used_num_seqs_per_subset: Optional[List[int]] = None
|
|
1023
1024
|
|
|
1024
1025
|
def init_seq_order(self, epoch=None, seq_list=None, seq_order=None):
|
|
@@ -1030,7 +1031,7 @@ class CombinedDataset(CachedDataset2):
|
|
|
1030
1031
|
"""
|
|
1031
1032
|
|
|
1032
1033
|
assert seq_list is None and seq_order is None, "seq_list and seq_order not supported for %s" % self.__class__
|
|
1033
|
-
need_reinit = self.epoch is None or self.epoch != epoch
|
|
1034
|
+
need_reinit = self.epoch is None or self.epoch != epoch or self.expected_load_seq_start > 0
|
|
1034
1035
|
num_seqs_saved = self._num_seqs
|
|
1035
1036
|
super(CombinedDataset, self).init_seq_order(
|
|
1036
1037
|
epoch=epoch, seq_list=seq_list, seq_order=seq_order
|
|
@@ -1047,13 +1048,15 @@ class CombinedDataset(CachedDataset2):
|
|
|
1047
1048
|
for dataset in self.datasets.values():
|
|
1048
1049
|
dataset.init_seq_order(epoch=epoch)
|
|
1049
1050
|
|
|
1051
|
+
self._sub_dataset_cur_loaded_seq_range = [(0, 0)] * len(self.datasets)
|
|
1052
|
+
|
|
1050
1053
|
# noinspection PyBroadException
|
|
1051
1054
|
try:
|
|
1052
1055
|
total_num_seqs = sum([self.datasets[k].num_seqs for k in sorted(self.datasets.keys())])
|
|
1053
1056
|
except Exception:
|
|
1054
1057
|
total_num_seqs = None
|
|
1055
1058
|
|
|
1056
|
-
if total_num_seqs is not None:
|
|
1059
|
+
if total_num_seqs is not None and self.seq_ordering != "interleave":
|
|
1057
1060
|
self.dataset_seq_idx_boundaries = self._create_dataset_seq_idx_boundaries()
|
|
1058
1061
|
|
|
1059
1062
|
if self.sampling_sizes:
|
|
@@ -1090,7 +1093,7 @@ class CombinedDataset(CachedDataset2):
|
|
|
1090
1093
|
|
|
1091
1094
|
# Re-initialize sequence orders of sub-datasets with created sequence list.
|
|
1092
1095
|
self.used_num_seqs_per_subset = []
|
|
1093
|
-
for dataset_idx, dataset_key in self.dataset_idx2key_map.items():
|
|
1096
|
+
for dataset_idx, dataset_key in sorted(self.dataset_idx2key_map.items()):
|
|
1094
1097
|
assert self.datasets[dataset_key].have_corpus_seq_idx()
|
|
1095
1098
|
self.datasets[dataset_key].init_seq_order(epoch=epoch, seq_order=seq_order_subdatasets[dataset_idx])
|
|
1096
1099
|
self.used_num_seqs_per_subset.append(len(seq_order_subdatasets[dataset_idx]))
|
|
@@ -1098,6 +1101,11 @@ class CombinedDataset(CachedDataset2):
|
|
|
1098
1101
|
else:
|
|
1099
1102
|
self.dataset_sorted_seq_idx_list = [] # We will fill this as we go
|
|
1100
1103
|
self.used_num_seqs_per_subset = [0] * len(self.datasets)
|
|
1104
|
+
self._num_seqs = total_num_seqs
|
|
1105
|
+
|
|
1106
|
+
# These are currently not supported/implemented.
|
|
1107
|
+
# All of these should just be done in the sub-datasets directly.
|
|
1108
|
+
assert self.partition_epoch == 1 and self.repeat_epoch == 1 and self._num_shards == 1
|
|
1101
1109
|
|
|
1102
1110
|
return True
|
|
1103
1111
|
|
|
@@ -1236,13 +1244,30 @@ class CombinedDataset(CachedDataset2):
|
|
|
1236
1244
|
|
|
1237
1245
|
return dataset.get_estimated_seq_length(dataset_seq_idx)
|
|
1238
1246
|
|
|
1239
|
-
def
|
|
1247
|
+
def _sub_dataset_make_cur_loaded(self, dataset_idx: int) -> bool:
|
|
1248
|
+
# Cur meaning for the next sequence to be added to dataset_sorted_seq_idx_list.
|
|
1249
|
+
seq_idx = self.used_num_seqs_per_subset[dataset_idx]
|
|
1250
|
+
cur_start, cur_end = self._sub_dataset_cur_loaded_seq_range[dataset_idx]
|
|
1251
|
+
if seq_idx >= cur_end:
|
|
1252
|
+
self._sub_dataset_load_seqs(dataset_idx, cur_start, seq_idx + 1)
|
|
1253
|
+
return True
|
|
1254
|
+
elif seq_idx < cur_start:
|
|
1255
|
+
return False
|
|
1256
|
+
else:
|
|
1257
|
+
return True
|
|
1258
|
+
|
|
1259
|
+
def _expand_dataset_seq_idxs(self, num_values: int) -> bool:
|
|
1240
1260
|
"""
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1261
|
+
Try to extend dataset_sorted_seq_idx_list.
|
|
1262
|
+
We expect that we have reached the end of it.
|
|
1263
|
+
|
|
1264
|
+
:param num_values: Add num_values entries to the dataset-segment-idx mapping table
|
|
1265
|
+
:return: whether we added num_values entries
|
|
1244
1266
|
"""
|
|
1245
|
-
for
|
|
1267
|
+
for _ in range(num_values):
|
|
1268
|
+
for j in range(len(self.datasets)):
|
|
1269
|
+
self._sub_dataset_make_cur_loaded(j)
|
|
1270
|
+
|
|
1246
1271
|
if self.seq_ordering == "default": # i.e. in order
|
|
1247
1272
|
dataset_idx = 0
|
|
1248
1273
|
while dataset_idx < len(self.datasets):
|
|
@@ -1265,6 +1290,32 @@ class CombinedDataset(CachedDataset2):
|
|
|
1265
1290
|
else:
|
|
1266
1291
|
return False # No dataset has remaining data
|
|
1267
1292
|
|
|
1293
|
+
elif self.seq_ordering == "interleave":
|
|
1294
|
+
complete_fracs_and_ds_idx = [
|
|
1295
|
+
(
|
|
1296
|
+
self.datasets[self.dataset_idx2key_map[j]].get_complete_frac(
|
|
1297
|
+
self.used_num_seqs_per_subset[j] - 1, allow_only_lr_suitable=True
|
|
1298
|
+
)
|
|
1299
|
+
if self.used_num_seqs_per_subset[j] > 0
|
|
1300
|
+
else 0.0,
|
|
1301
|
+
j,
|
|
1302
|
+
)
|
|
1303
|
+
for j in range(len(self.datasets))
|
|
1304
|
+
]
|
|
1305
|
+
assert all(frac is not None for frac, _ in complete_fracs_and_ds_idx), (
|
|
1306
|
+
f"{self}: Datasets must provide complete frac for interleave,"
|
|
1307
|
+
f" got {complete_fracs_and_ds_idx}, dataset idx2key map {self.dataset_idx2key_map}"
|
|
1308
|
+
)
|
|
1309
|
+
# Sort by complete frac, i.e. datasets with the lowest complete frac first.
|
|
1310
|
+
complete_fracs_and_ds_idx.sort()
|
|
1311
|
+
for complete_frac, dataset_idx in complete_fracs_and_ds_idx:
|
|
1312
|
+
if self.datasets[self.dataset_idx2key_map[dataset_idx]].is_less_than_num_seqs(
|
|
1313
|
+
self.used_num_seqs_per_subset[dataset_idx]
|
|
1314
|
+
):
|
|
1315
|
+
break
|
|
1316
|
+
else:
|
|
1317
|
+
return False # No dataset has remaining data
|
|
1318
|
+
|
|
1268
1319
|
elif self.seq_ordering == "random_dataset":
|
|
1269
1320
|
while True:
|
|
1270
1321
|
# Build probability table
|
|
@@ -1323,19 +1374,23 @@ class CombinedDataset(CachedDataset2):
|
|
|
1323
1374
|
def _load_seqs(self, start, end):
|
|
1324
1375
|
# If the segment order is not yet known, fix the next few segments
|
|
1325
1376
|
if end > len(self.dataset_sorted_seq_idx_list):
|
|
1326
|
-
self.
|
|
1377
|
+
self._expand_dataset_seq_idxs(end - len(self.dataset_sorted_seq_idx_list))
|
|
1327
1378
|
|
|
1328
1379
|
requested_seqs = self.dataset_sorted_seq_idx_list[start:end]
|
|
1329
1380
|
|
|
1330
1381
|
for dataset_idx in range(len(self.datasets)):
|
|
1331
|
-
dataset = self.datasets[self.dataset_idx2key_map[dataset_idx]]
|
|
1332
1382
|
sub_requested_seqs = [s[1] for s in requested_seqs if s[0] == dataset_idx]
|
|
1333
1383
|
if not sub_requested_seqs:
|
|
1334
1384
|
continue
|
|
1335
1385
|
sub_start, sub_end = min(sub_requested_seqs), max(sub_requested_seqs)
|
|
1336
|
-
|
|
1386
|
+
self._sub_dataset_load_seqs(dataset_idx, sub_start, sub_end + 1)
|
|
1337
1387
|
super(CombinedDataset, self)._load_seqs(start=start, end=end)
|
|
1338
1388
|
|
|
1389
|
+
def _sub_dataset_load_seqs(self, dataset_idx: int, start: int, end: int):
|
|
1390
|
+
self._sub_dataset_cur_loaded_seq_range[dataset_idx] = (start, end)
|
|
1391
|
+
dataset = self.datasets[self.dataset_idx2key_map[dataset_idx]]
|
|
1392
|
+
dataset.load_seqs(start, end)
|
|
1393
|
+
|
|
1339
1394
|
def _get_data(self, dataset_key, dataset_seq_idx, data_key):
|
|
1340
1395
|
"""
|
|
1341
1396
|
:type dataset_seq_idx: int
|
|
@@ -1348,7 +1403,10 @@ class CombinedDataset(CachedDataset2):
|
|
|
1348
1403
|
if dataset_data_key is not None:
|
|
1349
1404
|
return dataset.get_data(dataset_seq_idx, dataset_data_key)
|
|
1350
1405
|
else:
|
|
1351
|
-
|
|
1406
|
+
shape: List[int] = [0] * self.num_outputs[data_key][1]
|
|
1407
|
+
if shape and not self.is_data_sparse(data_key):
|
|
1408
|
+
shape[-1] = self.get_data_dim(data_key)
|
|
1409
|
+
return numpy.zeros(shape, dtype=self.data_dtypes[data_key])
|
|
1352
1410
|
|
|
1353
1411
|
def _collect_single_seq(self, seq_idx):
|
|
1354
1412
|
"""
|
|
@@ -1362,19 +1420,30 @@ class CombinedDataset(CachedDataset2):
|
|
|
1362
1420
|
dataset = self.datasets[dataset_key]
|
|
1363
1421
|
|
|
1364
1422
|
seq_tag = dataset.get_tag(dataset_seq_idx)
|
|
1365
|
-
features = self._get_data(dataset_key, dataset_seq_idx,
|
|
1366
|
-
|
|
1367
|
-
|
|
1423
|
+
features = {key: self._get_data(dataset_key, dataset_seq_idx, key) for key in self.data_keys}
|
|
1424
|
+
complete_frac = None
|
|
1425
|
+
if self.seq_ordering == "interleave":
|
|
1426
|
+
# In the interleave case, by design, this should be monotonically increasing,
|
|
1427
|
+
# as per how we select the next seq in _expand_dataset_seq_idxs.
|
|
1428
|
+
complete_frac = dataset.get_complete_frac(dataset_seq_idx, allow_only_lr_suitable=True)
|
|
1429
|
+
# In other cases, complete_frac is not so straightforward.
|
|
1430
|
+
# In the case that the total num seqs is known, then it's anyway not necessary.
|
|
1431
|
+
return DatasetSeq(seq_idx=seq_idx, complete_frac=complete_frac, seq_tag=seq_tag, features=features)
|
|
1368
1432
|
|
|
1369
|
-
def is_less_than_num_seqs(self, n):
|
|
1433
|
+
def is_less_than_num_seqs(self, n: int) -> bool:
|
|
1370
1434
|
"""
|
|
1371
|
-
:param
|
|
1372
|
-
:rtype: bool
|
|
1435
|
+
:param n:
|
|
1373
1436
|
"""
|
|
1374
1437
|
if n < len(self.dataset_sorted_seq_idx_list):
|
|
1375
1438
|
return True
|
|
1376
1439
|
else:
|
|
1377
|
-
return self.
|
|
1440
|
+
return self._expand_dataset_seq_idxs(n - len(self.dataset_sorted_seq_idx_list) + 1)
|
|
1441
|
+
|
|
1442
|
+
def get_data_keys(self) -> List[str]:
|
|
1443
|
+
"""data keys"""
|
|
1444
|
+
if "data" in self.data_keys:
|
|
1445
|
+
return ["data"] + sorted(self.data_keys - {"data"})
|
|
1446
|
+
return sorted(self.data_keys)
|
|
1378
1447
|
|
|
1379
1448
|
def get_target_list(self):
|
|
1380
1449
|
"""
|
|
@@ -11,6 +11,7 @@ __all__ = [
|
|
|
11
11
|
"SentencePieces",
|
|
12
12
|
"CharacterTargets",
|
|
13
13
|
"Utf8ByteTargets",
|
|
14
|
+
"HuggingFaceTokenizer",
|
|
14
15
|
]
|
|
15
16
|
|
|
16
17
|
from typing import Optional, Union, Type, Callable, List, Dict
|
|
@@ -691,3 +692,92 @@ class Utf8ByteTargets(Vocabulary):
|
|
|
691
692
|
assert ((seq >= 0) & (seq < 256)).all(), f"invalid byte value, must be within 0-255: {seq}"
|
|
692
693
|
seq = seq.astype(numpy.uint8)
|
|
693
694
|
return bytearray(seq).decode(encoding="utf8")
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
class HuggingFaceTokenizer(Vocabulary):
|
|
698
|
+
"""
|
|
699
|
+
Uses the `AutoTokenizer` class from the `transformers` package.
|
|
700
|
+
"""
|
|
701
|
+
|
|
702
|
+
def __init__(self, *, huggingface_repo_dir: str):
|
|
703
|
+
"""
|
|
704
|
+
:param str huggingface_repo_dir: the directory containing the `tokenizer_config.json` file.
|
|
705
|
+
"""
|
|
706
|
+
import transformers # noqa
|
|
707
|
+
|
|
708
|
+
# Make sure it is a string. (Could be e.g. Sis Path.)
|
|
709
|
+
huggingface_repo_dir = str(huggingface_repo_dir)
|
|
710
|
+
self._opts = {"huggingface_repo_dir": huggingface_repo_dir}
|
|
711
|
+
self._cache_key = huggingface_repo_dir
|
|
712
|
+
self.tokenizer = transformers.AutoTokenizer.from_pretrained(huggingface_repo_dir, trust_remote_code=True)
|
|
713
|
+
super().__init__(
|
|
714
|
+
vocab_file=None,
|
|
715
|
+
seq_postfix=None,
|
|
716
|
+
unknown_label=self.tokenizer.unk_token_id,
|
|
717
|
+
eos_label=self.tokenizer.eos_token_id,
|
|
718
|
+
bos_label=self.tokenizer.bos_token_id,
|
|
719
|
+
pad_label=self.tokenizer.pad_token_id,
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
def __repr__(self):
|
|
723
|
+
return "%s(%r)" % (self.__class__.__name__, self._opts)
|
|
724
|
+
|
|
725
|
+
def _parse_vocab(self):
|
|
726
|
+
self.num_labels = len(self.tokenizer)
|
|
727
|
+
# Do not load labels/vocab here. This is not really needed.
|
|
728
|
+
|
|
729
|
+
@property
|
|
730
|
+
def labels(self) -> List[str]:
|
|
731
|
+
"""list of labels"""
|
|
732
|
+
if self._cache_key and self._cache_key in self._cache:
|
|
733
|
+
self._vocab, self._labels = self._cache[self._cache_key]
|
|
734
|
+
assert self.num_labels == len(self._vocab) == len(self._labels)
|
|
735
|
+
else:
|
|
736
|
+
self._labels = [self.tokenizer._convert_id_to_token(i) for i in range(self.num_labels)] # noqa
|
|
737
|
+
self._vocab = {label: i for (i, label) in enumerate(self._labels)}
|
|
738
|
+
if self._cache_key:
|
|
739
|
+
self._cache[self._cache_key] = (self._vocab, self._labels)
|
|
740
|
+
return self._labels
|
|
741
|
+
|
|
742
|
+
def is_id_valid(self, idx: int) -> bool:
|
|
743
|
+
"""
|
|
744
|
+
:param idx:
|
|
745
|
+
"""
|
|
746
|
+
return 0 <= idx < len(self.tokenizer)
|
|
747
|
+
|
|
748
|
+
def id_to_label(self, idx: int, default: Union[str, Type[KeyError], None] = KeyError) -> Optional[str]:
|
|
749
|
+
"""
|
|
750
|
+
:param idx:
|
|
751
|
+
:param default:
|
|
752
|
+
"""
|
|
753
|
+
if default is not KeyError and not self.is_id_valid(idx):
|
|
754
|
+
return default
|
|
755
|
+
return self.tokenizer.convert_ids_to_tokens(idx)
|
|
756
|
+
|
|
757
|
+
def label_to_id(self, label: str, default: Union[int, Type[KeyError], None] = KeyError) -> Optional[int]:
|
|
758
|
+
"""
|
|
759
|
+
:param label:
|
|
760
|
+
:param default:
|
|
761
|
+
"""
|
|
762
|
+
res = self.tokenizer.convert_token_to_id(label)
|
|
763
|
+
if res == self.unknown_label_id or res < 0 or res is None:
|
|
764
|
+
# It could be that the label really is the unknown-label, or it could be that the label is unknown.
|
|
765
|
+
if label == self.id_to_label(self.unknown_label_id):
|
|
766
|
+
return self.unknown_label_id
|
|
767
|
+
if default is KeyError:
|
|
768
|
+
raise KeyError("label %r not found" % label)
|
|
769
|
+
return default
|
|
770
|
+
return res
|
|
771
|
+
|
|
772
|
+
def get_seq(self, sentence: str) -> List[int]:
|
|
773
|
+
"""
|
|
774
|
+
:param sentence: assumed to be seq of vocab entries separated by whitespace
|
|
775
|
+
"""
|
|
776
|
+
return self.tokenizer(sentence)["input_ids"]
|
|
777
|
+
|
|
778
|
+
def get_seq_labels(self, seq):
|
|
779
|
+
"""
|
|
780
|
+
:param list[int]|numpy.ndarray seq: 1D sequence
|
|
781
|
+
:rtype: str
|
|
782
|
+
"""
|
|
783
|
+
return self.tokenizer.decode(seq, skip_special_tokens=True)
|
|
@@ -483,6 +483,7 @@ class RelPosSelfAttention(SelfAttentionBase):
|
|
|
483
483
|
matrix_bd = _rel_pos_enc_shift(matrix_bd, axis, pos_emb_spatial_dim, hist_dim)
|
|
484
484
|
|
|
485
485
|
scores = matrix_ac + matrix_bd # (batch, head, time1, time2)
|
|
486
|
+
del matrix_ac, matrix_bd
|
|
486
487
|
scores *= self.key_dim_per_head.dimension**-0.5
|
|
487
488
|
att_weights = rf.softmax(scores, axis=hist_dim)
|
|
488
489
|
att_weights = rf.dropout(att_weights, self.att_dropout, axis=self.att_dropout_broadcast and hist_dim)
|
{returnn-1.20251013.131953 → returnn-1.20251106.185107}/returnn/frontend/encoder/conformer.py
RENAMED
|
@@ -273,6 +273,7 @@ class ConformerEncoderLayer(rf.Module):
|
|
|
273
273
|
x_mhsa = self.self_att(x_mhsa_ln, axis=spatial_dim)
|
|
274
274
|
x_mhsa = rf.dropout(x_mhsa, self.dropout, axis=self.dropout_broadcast and self.out_dim)
|
|
275
275
|
x_mhsa_out = x_mhsa + x_ffn1_out
|
|
276
|
+
del x_mhsa
|
|
276
277
|
|
|
277
278
|
# Conv
|
|
278
279
|
x_conv_ln = self.conv_layer_norm(x_mhsa_out)
|
|
@@ -274,10 +274,17 @@ class Functional(Module):
|
|
|
274
274
|
(This is often not necessary, but sometimes useful.)
|
|
275
275
|
"""
|
|
276
276
|
|
|
277
|
-
def __init__(self, func):
|
|
277
|
+
def __init__(self, func, *, attribs: Optional[Dict[str, Any]] = None):
|
|
278
|
+
"""
|
|
279
|
+
:param func: callable. you might want to use functools.partial if you want to fix some arguments.
|
|
280
|
+
:param attribs: optional dict of attributes to set on this module. e.g. ``out_dim``.
|
|
281
|
+
"""
|
|
278
282
|
super().__init__()
|
|
279
283
|
assert callable(func)
|
|
280
284
|
self.func = func
|
|
285
|
+
if attribs:
|
|
286
|
+
for k, v in attribs.items():
|
|
287
|
+
setattr(self, k, v)
|
|
281
288
|
|
|
282
289
|
def __repr__(self):
|
|
283
290
|
return f"{self.__class__.__name__}({self.func.__qualname__})"
|
|
@@ -275,6 +275,8 @@ def _masked_select(
|
|
|
275
275
|
return s
|
|
276
276
|
assert s in dim_map
|
|
277
277
|
return dim_map[s]
|
|
278
|
+
if s is None:
|
|
279
|
+
return None
|
|
278
280
|
raise TypeError(f"_masked_select: unexpected type ({type(s)})")
|
|
279
281
|
|
|
280
282
|
|
|
@@ -420,6 +422,9 @@ def _masked_scatter(
|
|
|
420
422
|
if s in merged_dim_map:
|
|
421
423
|
return merged_dim_map[s]
|
|
422
424
|
return s
|
|
425
|
+
if s is None:
|
|
426
|
+
assert backup is None
|
|
427
|
+
return None
|
|
423
428
|
raise TypeError(f"_masked_scatter: unexpected type ({type(s)})")
|
|
424
429
|
|
|
425
430
|
|
|
@@ -18,6 +18,8 @@ if TYPE_CHECKING:
|
|
|
18
18
|
# just for type hints, otherwise use _d.Dim
|
|
19
19
|
from .dim import Dim
|
|
20
20
|
|
|
21
|
+
from returnn.datasets.util.vocabulary import Vocabulary
|
|
22
|
+
|
|
21
23
|
from . import dim as _d
|
|
22
24
|
from . import tensor as _t
|
|
23
25
|
from . import marked_dim as _m
|
|
@@ -41,54 +43,63 @@ class _DimExtra:
|
|
|
41
43
|
self,
|
|
42
44
|
*,
|
|
43
45
|
dim: Dim,
|
|
44
|
-
kind=DimTypes.Unspecified,
|
|
45
|
-
vocab=None,
|
|
46
|
-
undefined=False,
|
|
47
|
-
special=False,
|
|
48
|
-
auto_generated=False,
|
|
49
|
-
match_priority=0,
|
|
50
|
-
derived_from_tag=None,
|
|
51
|
-
derived_from_op=None,
|
|
52
|
-
batch=None,
|
|
53
|
-
control_flow_ctx=None,
|
|
46
|
+
kind: Entity = DimTypes.Unspecified,
|
|
47
|
+
vocab: Union[None, Dict[str, Any], Vocabulary] = None,
|
|
48
|
+
undefined: bool = False,
|
|
49
|
+
special: bool = False,
|
|
50
|
+
auto_generated: bool = False,
|
|
51
|
+
match_priority: int = 0,
|
|
52
|
+
derived_from_tag: Optional[Dim] = None,
|
|
53
|
+
derived_from_op: Optional[Op] = None,
|
|
54
|
+
batch: Optional[BatchInfo] = None,
|
|
55
|
+
control_flow_ctx: Optional[ControlFlowContext] = None,
|
|
54
56
|
src_data: Optional[_t.Tensor] = None,
|
|
55
57
|
src_axis: Optional[int] = None,
|
|
56
58
|
):
|
|
57
59
|
"""
|
|
58
60
|
:param dim:
|
|
59
|
-
:param
|
|
60
|
-
:param
|
|
61
|
-
:param
|
|
62
|
-
:param
|
|
61
|
+
:param kind:
|
|
62
|
+
:param vocab:
|
|
63
|
+
:param undefined: When this is specified as `None` by the user via `shape`.
|
|
64
|
+
:param special: this can not be a dim tag of :class:`Tensor`.
|
|
63
65
|
But this dim tag also does not match anything except itself.
|
|
64
66
|
So it can be used to represent special placeholders with special meanings like ``single_step``.
|
|
65
|
-
:param
|
|
67
|
+
:param auto_generated:
|
|
66
68
|
This is auto-generated by RETURNN because it was not explicitly specified by the user.
|
|
67
69
|
E.g. for ConvLayer and others.
|
|
68
70
|
This implies certain behavior on equality, such as comparing the description,
|
|
69
71
|
to allow for several independent creations of the dim tag during template construction.
|
|
70
|
-
:param
|
|
72
|
+
:param derived_from_tag:
|
|
71
73
|
Whether this new tag is reduced, down/up sampled, padded etc from this given other tag.
|
|
72
74
|
In situations where dim tags are being matched (Data.get_common_data),
|
|
73
75
|
the behavior is to consider them as equal,
|
|
74
76
|
and assume that the chain of operations (e.g. padding + valid conv) results in the same dim.
|
|
75
|
-
:param
|
|
76
|
-
:param
|
|
77
|
+
:param derived_from_op:
|
|
78
|
+
:param match_priority: when there is ambiguity between multiple dim tags, this value defines the order
|
|
77
79
|
in which the dimension are assigned to their matching counterparts.
|
|
78
80
|
A dimension tag with a higher priority value is assigned first.
|
|
79
81
|
E.g. for a square matrix used for a linear transformation,
|
|
80
82
|
the reduce dim tag should have a higher priority.
|
|
81
|
-
:param
|
|
82
|
-
:param
|
|
83
|
+
:param batch: for batch-dim, or dynamic dims per batch
|
|
84
|
+
:param control_flow_ctx:
|
|
83
85
|
:param src_data:
|
|
84
86
|
:param src_axis:
|
|
85
87
|
"""
|
|
86
88
|
self.dim = dim
|
|
87
89
|
assert kind is None or (isinstance(kind, Entity) and kind in DimTypes.Types)
|
|
88
90
|
self.kind = kind
|
|
91
|
+
if vocab:
|
|
92
|
+
from returnn.datasets.util.vocabulary import Vocabulary
|
|
93
|
+
|
|
94
|
+
if isinstance(vocab, Vocabulary):
|
|
95
|
+
pass
|
|
96
|
+
elif isinstance(vocab, dict):
|
|
97
|
+
vocab = Vocabulary.create_vocab(**vocab)
|
|
98
|
+
else:
|
|
99
|
+
raise TypeError(f"invalid vocab {vocab!r} type {type(vocab)}")
|
|
89
100
|
self.vocab = vocab
|
|
90
|
-
self.same_as
|
|
91
|
-
self.copy_same_as
|
|
101
|
+
self.same_as: Optional[Dim] = None
|
|
102
|
+
self.copy_same_as: Optional[Dim] = None
|
|
92
103
|
self.derived_from_tag = derived_from_tag
|
|
93
104
|
self.derived_from_op = derived_from_op
|
|
94
105
|
if derived_from_op and not derived_from_op.output:
|
|
@@ -116,8 +127,8 @@ class _DimExtra:
|
|
|
116
127
|
self.auto_generated = auto_generated
|
|
117
128
|
# We can have different tag variants per batch info (e.g. with beam), or per control flow ctx.
|
|
118
129
|
# They each have same_as = self. The same_base should have the base (global) batch info.
|
|
119
|
-
self.same_for_batch_ctx
|
|
120
|
-
self.cache_dyn_size_ext_dev
|
|
130
|
+
self.same_for_batch_ctx: Dict[Tuple[BatchInfo, Optional[ControlFlowContext]], Dim] = {}
|
|
131
|
+
self.cache_dyn_size_ext_dev: Dict[str, _t.Tensor] = {} # device -> dyn_size_ext
|
|
121
132
|
self.cache_seq_mask: Dict[Tuple[str, Optional[Tuple[Dim, ...]]], _t.Tensor] = {} # (dev,dim_order) -> seq_mask
|
|
122
133
|
self.cache_dim_math = _CacheDimMath() # op (add,sub,...), operand -> Dim
|
|
123
134
|
|
|
@@ -134,6 +145,7 @@ class _DimExtra:
|
|
|
134
145
|
def __setstate__(self, state):
|
|
135
146
|
self.__dict__.update(state)
|
|
136
147
|
if self.kind is not None:
|
|
148
|
+
# noinspection PyTypeChecker
|
|
137
149
|
self.kind = {v.name: v for v in DimTypes.Types}[self.kind]
|
|
138
150
|
|
|
139
151
|
def __sis_state__(self):
|
|
@@ -151,6 +163,9 @@ class _DimMixin:
|
|
|
151
163
|
def _handle_extra_kwargs(self: Dim, *, dyn_size: Optional[_t.RawTensorType] = None, **kwargs):
|
|
152
164
|
if kwargs:
|
|
153
165
|
self._extra = _DimExtra(dim=self, **kwargs)
|
|
166
|
+
if self._extra.vocab and self.size is None:
|
|
167
|
+
self.size = self._extra.vocab.num_labels
|
|
168
|
+
self.capacity = self.capacity or self.size
|
|
154
169
|
if dyn_size is not None:
|
|
155
170
|
self.dyn_size = dyn_size
|
|
156
171
|
if self.derived_from_op and self.is_dynamic():
|