returnn 1.20241030.185827__tar.gz → 1.20241106.124322__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/PKG-INFO +1 -1
- returnn-1.20241106.124322/_setup_info_generated.py +2 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/array_.py +11 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/learning_rate_control.py +2 -2
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/pipeline.py +64 -13
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/returnn_dataset_wrapper.py +12 -1
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/engine.py +65 -28
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/basic.py +36 -40
- returnn-1.20241106.124322/returnn/util/math.py +87 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn.egg-info/PKG-INFO +1 -1
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Util.py +30 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_demos.py +3 -0
- returnn-1.20241030.185827/_setup_info_generated.py +0 -2
- returnn-1.20241030.185827/returnn/util/math.py +0 -34
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/.editorconfig +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/.gitignore +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/.gitmodules +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/.kateconfig +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/CHANGELOG.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/CODEOWNERS +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/CONTRIBUTING.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/LICENSE +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/MANIFEST.in +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/README.rst +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/12AX.cluster_map +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-fwd.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-list-devices.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-pretrain.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-rf-pt-benchmark.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-rf.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-torch.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/demo.sh +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/pyproject.toml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/requirements.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/__main__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/__setup__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/config.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/audio.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/basic.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/cached.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/distrib_files.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/generating.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/lm.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/map.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/meta.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/postprocessing.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/engine/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/engine/base.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/engine/batch.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/forward_iface.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/backend.cpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/backend.hpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/module.cpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/module.hpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/py_utils.hpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/tensor_ops.cpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_native/tensor_ops.hpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_numpy_backend.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_random_journal.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/attention.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/audio/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/audio/mel.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/audio/specaugment.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/backend.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/build_from_dict.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/cond.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/const.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/container.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conv.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conversions/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conversions/hf_llama.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/conversions/torch_nn.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/decoder/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/decoder/transformer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/device.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/dims.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/e_branchformer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/encoder/transformer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/graph.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/hooks.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/init.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/label_smoothing.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/linear.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/loop.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/loss.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/math_.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/module.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/parametrizations.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/parametrize.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/piecewise_linear.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/rand.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/rec.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/signal.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/state.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/stepwise_scheduler.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/frontend/types.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/import_/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/import_/common.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/import_/git.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/import_/import_.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/log.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/native_op.cpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/native_op.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/pretrain.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/cache.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/control.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/sprint/interface.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/_dim_extra.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/dim.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tensor/utils.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/compat.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/distributed.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/engine.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/loop.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/masked_computation.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/horovod.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/native_op.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/network.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/sprint.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/updater.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/data.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/extern_data.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/queued_data_iter.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/distributed.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/frontend/raw_ops.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/optim/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/optim/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/optim/lion.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/updater.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/array_.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/diagnose_gpu.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/exception_helper.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/gradient_checkpoint.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/module.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/torch/util/scaled_gradient.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/__init__.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/bpe.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/debug.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/file_cache.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/fsa.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/native_code_compiler.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/pprint.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/py-to-pickle.cpp +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/py_compat.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/py_ext_mod_compiler.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/result_with_reason.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/task_system.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/train_proc_manager.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn/util/watch_memory.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn.egg-info/SOURCES.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/rnn.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/setup.cfg +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/setup.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/DummySprintExec.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/_setup_test_env.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/lint_common.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/pylint.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/rf_utils.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/spelling.dic +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Config.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Dataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Fsa.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Log.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_Pretrain.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_ResNet.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFEngine.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TFUtil.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_fork_exec.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_array.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_attention.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_base.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_cond.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_const.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_container.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_conv.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_decoder_transformer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_gradient.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_label_smoothing.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_loop.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_math.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_piecewise_linear.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_rec.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_rf_signal.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_tensor.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_tools.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_dataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_engine.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/test_torch_util.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tests/torch_utils.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/collect-words.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/compile_native_op.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-dataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-forward.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-network-json.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/dump-pickle.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/get-attention-weights.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/hdf_dump.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/torch_avg_checkpoints.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/torch_export_to_onnx.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/torch_inspect_checkpoint.py +0 -0
- {returnn-1.20241030.185827 → returnn-1.20241106.124322}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
|
@@ -36,6 +36,7 @@ __all__ = [
|
|
|
36
36
|
"masked_scatter",
|
|
37
37
|
"sequence_mask",
|
|
38
38
|
"pack_padded",
|
|
39
|
+
"pad_packed",
|
|
39
40
|
"gather",
|
|
40
41
|
"scatter",
|
|
41
42
|
"scatter_argmax",
|
|
@@ -627,6 +628,8 @@ def pack_padded(
|
|
|
627
628
|
Packing means to only store the non-padded frames.
|
|
628
629
|
This uses :func:`masked_select` internally based on the mask of non-masked frames.
|
|
629
630
|
|
|
631
|
+
See :func:`pad_packed` for the inverse operation.
|
|
632
|
+
|
|
630
633
|
:param source:
|
|
631
634
|
:param dims: dims in source to pack. the order defines the format. first dim is major, etc.
|
|
632
635
|
if there are no padded frames, e.g. dims=[B,T] would just result in the [B*T,...] reshaped tensor.
|
|
@@ -648,6 +651,14 @@ def pack_padded(
|
|
|
648
651
|
return rf.masked_select(source, mask=mask, dims=dims, out_dim=out_dim)
|
|
649
652
|
|
|
650
653
|
|
|
654
|
+
def pad_packed(source: Tensor, *, in_dim: Dim, dims: Sequence[Dim]) -> Tensor:
|
|
655
|
+
"""
|
|
656
|
+
Inverse of :func:`pack_padded`, i.e. unpack the sequence, i.e. pad it back to the original length.
|
|
657
|
+
"""
|
|
658
|
+
mask = rf.sequence_mask(dims, device=source.device)
|
|
659
|
+
return rf.masked_scatter(source, mask=mask, in_dim=in_dim, dims=dims)
|
|
660
|
+
|
|
661
|
+
|
|
651
662
|
# noinspection PyUnusedLocal
|
|
652
663
|
def gather(
|
|
653
664
|
source: Tensor,
|
|
@@ -5,7 +5,7 @@ The base class is :class:`LearningRateControl`.
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
|
|
8
|
-
from typing import Optional, Any, Dict
|
|
8
|
+
from typing import Optional, Union, Any, Dict
|
|
9
9
|
import typing
|
|
10
10
|
import os
|
|
11
11
|
import returnn.util.basic as util
|
|
@@ -350,7 +350,7 @@ class LearningRateControl:
|
|
|
350
350
|
relative_error /= learning_rate / self.default_learning_rate
|
|
351
351
|
return relative_error
|
|
352
352
|
|
|
353
|
-
def set_epoch_error(self, epoch, error):
|
|
353
|
+
def set_epoch_error(self, epoch: int, error: Dict[str, Union[float, Dict[str, float]]]):
|
|
354
354
|
"""
|
|
355
355
|
:type epoch: int
|
|
356
356
|
:type error: dict[str,float|dict[str,float]]
|
|
@@ -28,7 +28,7 @@ import numpy
|
|
|
28
28
|
import torch
|
|
29
29
|
import torch.utils.data
|
|
30
30
|
|
|
31
|
-
from returnn.util.basic import NumbersDict
|
|
31
|
+
from returnn.util.basic import NumbersDict, get_fwd_compat_kwargs
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def create_tensor(array: numpy.ndarray) -> Union[torch.Tensor, numpy.ndarray]:
|
|
@@ -59,7 +59,7 @@ def collate_batch(batch: List[Dict[str, numpy.ndarray]]) -> Dict[str, Union[torc
|
|
|
59
59
|
|
|
60
60
|
res = {}
|
|
61
61
|
for key in data_keys:
|
|
62
|
-
if key
|
|
62
|
+
if key in ("num_seqs", "epoch"):
|
|
63
63
|
res[key] = batch[0][key] # it should always be the same
|
|
64
64
|
continue
|
|
65
65
|
ls = [create_tensor(sample[key]) for sample in batch]
|
|
@@ -119,7 +119,7 @@ class ChunkingIterDataPipe(torch.utils.data.IterDataPipe):
|
|
|
119
119
|
|
|
120
120
|
if not chunking_data_keys:
|
|
121
121
|
chunking_data_keys = list(data_dict.keys()) # use all if not configured separately
|
|
122
|
-
chunking_data_key_black_list = ["seq_tag", "seq_idx", "num_seqs"]
|
|
122
|
+
chunking_data_key_black_list = ["seq_tag", "seq_idx", "num_seqs", "epoch"]
|
|
123
123
|
for key in chunking_data_key_black_list:
|
|
124
124
|
if key in chunking_data_keys:
|
|
125
125
|
chunking_data_keys.remove(key)
|
|
@@ -208,20 +208,66 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
|
|
|
208
208
|
def __init__(self, dataset: torch.utils.data.IterableDataset, batch_size=1, max_seqs=None):
|
|
209
209
|
"""
|
|
210
210
|
:param dataset: dataset to apply batching to
|
|
211
|
-
:param int|dict[str,int]|None batch_size: Maximum number of time steps (e.g. audio frames / words)
|
|
212
|
-
batch (padding included).
|
|
211
|
+
:param int|dict[str,int]|None|function batch_size: Maximum number of time steps (e.g. audio frames / words)
|
|
212
|
+
in one batch (padding included).
|
|
213
213
|
If given as a dict data_key -> value, sets different individual limits per data key.
|
|
214
214
|
If None, no limit.
|
|
215
|
-
|
|
216
|
-
|
|
215
|
+
Can also be a callable with kwargs epoch, seq_idx, epoch_continuous, **_other_kwargs,
|
|
216
|
+
returning the batch size.
|
|
217
|
+
:param int|None|function max_seqs: maximum number of sequences in a batch,
|
|
218
|
+
None means unlimited (also -1 to match TF backend).
|
|
219
|
+
Can also be a callable with kwargs epoch, seq_idx, epoch_continuous, **_other_kwargs,
|
|
220
|
+
returning the max seqs.
|
|
217
221
|
"""
|
|
218
222
|
super().__init__()
|
|
219
223
|
self._dataset = dataset
|
|
220
|
-
self._max_batch_size =
|
|
221
|
-
self._max_seqs =
|
|
224
|
+
self._max_batch_size = self._parse_batch_size(batch_size)
|
|
225
|
+
self._max_seqs = self._parse_max_seqs(max_seqs)
|
|
222
226
|
|
|
223
|
-
|
|
224
|
-
|
|
227
|
+
if not callable(self._max_batch_size):
|
|
228
|
+
assert isinstance(self._max_batch_size, NumbersDict) and self._max_batch_size.min_value() > 0
|
|
229
|
+
if not callable(self._max_seqs):
|
|
230
|
+
assert isinstance(self._max_seqs, int) and self._max_seqs > 0
|
|
231
|
+
|
|
232
|
+
@staticmethod
|
|
233
|
+
def _parse_batch_size(
|
|
234
|
+
batch_size: Union[int, Dict[str, int], NumbersDict, None, Callable],
|
|
235
|
+
*,
|
|
236
|
+
data_dict: Optional[Dict[str, Any]] = None,
|
|
237
|
+
) -> Union[NumbersDict, Callable]:
|
|
238
|
+
"""
|
|
239
|
+
:param batch_size: see __init__()
|
|
240
|
+
:return: batch_size
|
|
241
|
+
"""
|
|
242
|
+
if callable(batch_size):
|
|
243
|
+
if data_dict:
|
|
244
|
+
batch_size = batch_size(**BatchingIterDataPipe._get_user_func_kwargs_from_data_dict(data_dict))
|
|
245
|
+
else:
|
|
246
|
+
return batch_size
|
|
247
|
+
return NumbersDict(sys.maxsize if batch_size is None else batch_size)
|
|
248
|
+
|
|
249
|
+
@staticmethod
|
|
250
|
+
def _parse_max_seqs(
|
|
251
|
+
max_seqs: Union[int, None, Callable], *, data_dict: Optional[Dict[str, Any]] = None
|
|
252
|
+
) -> Union[int, Callable]:
|
|
253
|
+
"""
|
|
254
|
+
:param max_seqs: see __init__()
|
|
255
|
+
:return: max_seqs
|
|
256
|
+
"""
|
|
257
|
+
if callable(max_seqs):
|
|
258
|
+
if data_dict:
|
|
259
|
+
max_seqs = max_seqs(**BatchingIterDataPipe._get_user_func_kwargs_from_data_dict(data_dict))
|
|
260
|
+
else:
|
|
261
|
+
return max_seqs
|
|
262
|
+
return sys.maxsize if (max_seqs is None or max_seqs == -1) else max_seqs
|
|
263
|
+
|
|
264
|
+
@staticmethod
|
|
265
|
+
def _get_user_func_kwargs_from_data_dict(data_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
266
|
+
epoch = int(data_dict["epoch"])
|
|
267
|
+
seq_idx = int(data_dict["seq_idx"])
|
|
268
|
+
num_seqs = int(data_dict["num_seqs"]) # >=1 if known, otherwise -1
|
|
269
|
+
epoch_continuous = (epoch - 1 + (seq_idx + 1) / num_seqs) if num_seqs > 0 else None
|
|
270
|
+
return {"epoch": epoch, "seq_idx": seq_idx, "epoch_continuous": epoch_continuous, **get_fwd_compat_kwargs()}
|
|
225
271
|
|
|
226
272
|
def __iter__(self):
|
|
227
273
|
"""
|
|
@@ -233,7 +279,12 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
|
|
|
233
279
|
current_max_sequence_lengths = NumbersDict(0) # data_key -> length of longest sequence in current batch
|
|
234
280
|
|
|
235
281
|
for data_dict in self._dataset:
|
|
236
|
-
|
|
282
|
+
max_seqs = self._parse_max_seqs(self._max_seqs, data_dict=data_dict)
|
|
283
|
+
max_batch_size = self._parse_batch_size(self._max_batch_size, data_dict=data_dict)
|
|
284
|
+
assert isinstance(max_seqs, int) and max_seqs > 0
|
|
285
|
+
assert isinstance(max_batch_size, NumbersDict) and max_batch_size.min_value() > 0
|
|
286
|
+
|
|
287
|
+
if len(current_batch) >= max_seqs:
|
|
237
288
|
yield current_batch
|
|
238
289
|
current_batch = []
|
|
239
290
|
current_max_sequence_lengths = NumbersDict(0)
|
|
@@ -246,7 +297,7 @@ class BatchingIterDataPipe(torch.utils.data.IterDataPipe):
|
|
|
246
297
|
max_sequence_lengths_if_included = NumbersDict.max([current_max_sequence_lengths, sequence_lengths])
|
|
247
298
|
batch_size_if_included = max_sequence_lengths_if_included * (len(current_batch) + 1) # including padding
|
|
248
299
|
|
|
249
|
-
if current_batch and batch_size_if_included.any_compare(
|
|
300
|
+
if current_batch and batch_size_if_included.any_compare(max_batch_size, (lambda a, b: a > b)):
|
|
250
301
|
yield current_batch
|
|
251
302
|
current_batch = [data_dict]
|
|
252
303
|
current_max_sequence_lengths = sequence_lengths
|
|
@@ -67,7 +67,14 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
|
|
|
67
67
|
|
|
68
68
|
def reset(self):
|
|
69
69
|
"""
|
|
70
|
-
|
|
70
|
+
This is called by PyTorch DataLoader mechanism once we create a new iterator over the DataLoader.
|
|
71
|
+
This happens at the beginning of each epoch.
|
|
72
|
+
|
|
73
|
+
(Note: The mechanism where ``reset()`` is actually called is very obfuscated in PyTorch.
|
|
74
|
+
As I understand it, there is a IterDataPipe metaclass (_IterDataPipeMeta)
|
|
75
|
+
which automatically registers a hook on ``__iter__`` via ``hook_iterator``.
|
|
76
|
+
Deep inside the complex logic of this hook, it calls ``_set_datapipe_valid_iterator_id``
|
|
77
|
+
which then calls ``reset()``.)
|
|
71
78
|
"""
|
|
72
79
|
self._reset_callback()
|
|
73
80
|
|
|
@@ -81,6 +88,8 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
|
|
|
81
88
|
except Exception: # might not work for all datasets
|
|
82
89
|
num_seqs = -1
|
|
83
90
|
num_seqs = numpy.array(num_seqs)
|
|
91
|
+
assert self._dataset.epoch is not None
|
|
92
|
+
epoch = numpy.array(self._dataset.epoch)
|
|
84
93
|
|
|
85
94
|
try:
|
|
86
95
|
data_keys = self._dataset.get_data_keys()
|
|
@@ -94,6 +103,8 @@ class ReturnnDatasetIterDataPipe(torch.utils.data.IterDataPipe):
|
|
|
94
103
|
# It's slightly redundant to have num_seqs in each entry,
|
|
95
104
|
# but it's difficult to pass this back to the main proc otherwise.
|
|
96
105
|
data["num_seqs"] = num_seqs
|
|
106
|
+
# epoch is also redundant, but that's the cleanest/simplest way to pass it on to BatchingIterDataPipe.
|
|
107
|
+
data["epoch"] = epoch
|
|
97
108
|
yield data
|
|
98
109
|
seq_index += 1
|
|
99
110
|
|
|
@@ -34,6 +34,7 @@ from returnn.util import NumbersDict
|
|
|
34
34
|
from returnn.util.basic import hms, NotSpecified
|
|
35
35
|
from returnn.util.result_with_reason import ResultWithReason
|
|
36
36
|
from returnn.util.debug import debug_shell
|
|
37
|
+
from returnn.util.math import simplify_and_format_number
|
|
37
38
|
from returnn.forward_iface import ForwardCallbackIface
|
|
38
39
|
|
|
39
40
|
from .updater import Updater
|
|
@@ -125,6 +126,7 @@ class Engine(EngineBase):
|
|
|
125
126
|
self._log_memory_usage = config.bool("torch_log_memory_usage", False)
|
|
126
127
|
self._log_batch_size = config.bool("log_batch_size", False) and log.verbose[5]
|
|
127
128
|
self._calculate_exp_loss = config.bool("calculate_exp_loss", False)
|
|
129
|
+
self._log_grad_norm = _parse_log_grad_norm(config)
|
|
128
130
|
self._reset_dev_memory_caches = config.bool("reset_dev_memory_caches", False)
|
|
129
131
|
self._forward_auto_split_batch_on_oom = config.bool("forward_auto_split_batch_on_oom", False)
|
|
130
132
|
self._stop_on_nonfinite_train_score = config.bool("stop_on_nonfinite_train_score", True)
|
|
@@ -383,6 +385,7 @@ class Engine(EngineBase):
|
|
|
383
385
|
del num_seqs_
|
|
384
386
|
if num_seqs is not None:
|
|
385
387
|
assert last_seq_idx < num_seqs
|
|
388
|
+
epoch_continuous = (self.epoch - 1 + (last_seq_idx + 1) / num_seqs) if num_seqs is not None else None
|
|
386
389
|
|
|
387
390
|
# clear the gradients when every gradient accumulation loop starts
|
|
388
391
|
if zero_grad_next_step:
|
|
@@ -415,7 +418,10 @@ class Engine(EngineBase):
|
|
|
415
418
|
|
|
416
419
|
if accum_grad_multiple_step_dyn:
|
|
417
420
|
accum_grad_multiple_step = accum_grad_multiple_step_dyn(
|
|
418
|
-
epoch=self.epoch,
|
|
421
|
+
epoch=self.epoch,
|
|
422
|
+
epoch_continuous=epoch_continuous,
|
|
423
|
+
global_train_step=self.global_train_step,
|
|
424
|
+
**util.get_fwd_compat_kwargs(),
|
|
419
425
|
)
|
|
420
426
|
cur_count_grad_accum += 1
|
|
421
427
|
perform_update_step = cur_count_grad_accum >= accum_grad_multiple_step
|
|
@@ -429,6 +435,12 @@ class Engine(EngineBase):
|
|
|
429
435
|
else:
|
|
430
436
|
total_loss.raw_tensor.backward()
|
|
431
437
|
|
|
438
|
+
if self._log_grad_norm and perform_update_step:
|
|
439
|
+
key = f"grad_norm:p{simplify_and_format_number(self._log_grad_norm)}"
|
|
440
|
+
assert key not in losses_dict
|
|
441
|
+
inv_norm_factors_dict[key] = 1.0 # once per update step
|
|
442
|
+
losses_dict[key] = _get_total_grad_norm(self._pt_model, p=self._log_grad_norm)
|
|
443
|
+
|
|
432
444
|
# only update the weights when every gradient accumulation loop ends
|
|
433
445
|
if perform_update_step:
|
|
434
446
|
self._updater.step(grad_scaler=self._grad_scaler)
|
|
@@ -469,9 +481,7 @@ class Engine(EngineBase):
|
|
|
469
481
|
step_idx += 1
|
|
470
482
|
self.global_train_step += 1
|
|
471
483
|
self._updater.set_current_train_step(
|
|
472
|
-
global_train_step=self.global_train_step,
|
|
473
|
-
epoch=self.epoch,
|
|
474
|
-
epoch_continuous=(self.epoch - 1 + (last_seq_idx + 1) / num_seqs) if num_seqs is not None else None,
|
|
484
|
+
global_train_step=self.global_train_step, epoch=self.epoch, epoch_continuous=epoch_continuous
|
|
475
485
|
)
|
|
476
486
|
except Exception as exc:
|
|
477
487
|
help_on_torch_exception(exc, step_idx=step_idx, model=self._orig_model, extern_data=extern_data)
|
|
@@ -480,8 +490,8 @@ class Engine(EngineBase):
|
|
|
480
490
|
elapsed = time.monotonic() - epoch_start_time
|
|
481
491
|
elapsed_computation_percentage = elapsed_computation_time / elapsed
|
|
482
492
|
print(
|
|
483
|
-
"Trained %i steps, %s elapsed (%.1f%% computing time)"
|
|
484
|
-
% (step_idx, hms(elapsed), (elapsed_computation_percentage * 100.0)),
|
|
493
|
+
"Epoch %i: Trained %i steps, %s elapsed (%.1f%% computing time)"
|
|
494
|
+
% (self.epoch, step_idx, hms(elapsed), (elapsed_computation_percentage * 100.0)),
|
|
485
495
|
file=log.v3,
|
|
486
496
|
)
|
|
487
497
|
|
|
@@ -501,7 +511,7 @@ class Engine(EngineBase):
|
|
|
501
511
|
if self._do_save():
|
|
502
512
|
self.learning_rate_control.save()
|
|
503
513
|
|
|
504
|
-
print(f"Total train loss:", _format_score(dict(accumulated_losses_dict)), file=log.v3)
|
|
514
|
+
print(f"Epoch {self.epoch}: Total train loss:", _format_score(dict(accumulated_losses_dict)), file=log.v3)
|
|
505
515
|
|
|
506
516
|
self._maybe_report_dev_memory_stats()
|
|
507
517
|
|
|
@@ -532,8 +542,6 @@ class Engine(EngineBase):
|
|
|
532
542
|
self._reset_dev_memory_stats()
|
|
533
543
|
|
|
534
544
|
eval_dump_str = []
|
|
535
|
-
score_keys = None
|
|
536
|
-
error_keys = None
|
|
537
545
|
|
|
538
546
|
for dataset_name, dataset in self.eval_datasets.items():
|
|
539
547
|
if skip_already_evaluated and self._is_dataset_evaluated(name=dataset_name):
|
|
@@ -575,10 +583,6 @@ class Engine(EngineBase):
|
|
|
575
583
|
self._run_step(extern_data, train_func=True)
|
|
576
584
|
train_ctx = rf.get_run_ctx()
|
|
577
585
|
|
|
578
|
-
if score_keys is None:
|
|
579
|
-
score_keys = set(name for name, loss in train_ctx.losses.items() if not loss.as_error)
|
|
580
|
-
error_keys = set(name for name, loss in train_ctx.losses.items() if loss.as_error)
|
|
581
|
-
|
|
582
586
|
losses_dict = NumbersDict(
|
|
583
587
|
{
|
|
584
588
|
name: (
|
|
@@ -615,14 +619,7 @@ class Engine(EngineBase):
|
|
|
615
619
|
self.learning_rate_control.save()
|
|
616
620
|
|
|
617
621
|
# Same format as the TF engine.
|
|
618
|
-
eval_dump_str += [
|
|
619
|
-
"%s: score %s error %s"
|
|
620
|
-
% (
|
|
621
|
-
dataset_name,
|
|
622
|
-
_format_score({name: accumulated_losses_dict[name] for name in score_keys}),
|
|
623
|
-
_format_score({name: accumulated_losses_dict[name] for name in error_keys}),
|
|
624
|
-
)
|
|
625
|
-
]
|
|
622
|
+
eval_dump_str += ["%s: %s" % (dataset_name, _format_score(dict(accumulated_losses_dict)))]
|
|
626
623
|
|
|
627
624
|
if self._torch_distributed_ctx:
|
|
628
625
|
assert self._torch_distributed_ctx.rank() == 0
|
|
@@ -630,7 +627,11 @@ class Engine(EngineBase):
|
|
|
630
627
|
torch.distributed.broadcast(_has_data, src=0)
|
|
631
628
|
|
|
632
629
|
if not self._torch_distributed_ctx or self._torch_distributed_ctx.rank() == 0:
|
|
633
|
-
print(
|
|
630
|
+
print(
|
|
631
|
+
f"Epoch {self.epoch} evaluation:",
|
|
632
|
+
" ".join(eval_dump_str) if eval_dump_str else "(No evaluations.)",
|
|
633
|
+
file=log.v1,
|
|
634
|
+
)
|
|
634
635
|
|
|
635
636
|
self._maybe_report_dev_memory_stats()
|
|
636
637
|
|
|
@@ -684,7 +685,7 @@ class Engine(EngineBase):
|
|
|
684
685
|
batch_size = self.config.typed_value("batch_size", -1)
|
|
685
686
|
batch_size = self.config.typed_value(f"batch_size_{'train' if train else 'dev'}", batch_size)
|
|
686
687
|
assert batch_size != -1, f"batch_size or batch_size_{'train' if train else 'dev'} not defined in config"
|
|
687
|
-
max_seqs = self.config.
|
|
688
|
+
max_seqs = self.config.typed_value("max_seqs", -1)
|
|
688
689
|
batches_dataset = data_pipeline.BatchingIterDataPipe(wrapped_dataset, batch_size=batch_size, max_seqs=max_seqs)
|
|
689
690
|
|
|
690
691
|
loader_opts = self.config.typed_value("torch_dataloader_opts") or {}
|
|
@@ -1286,9 +1287,9 @@ def _print_process(
|
|
|
1286
1287
|
if log.verbose[5]: # report every minibatch
|
|
1287
1288
|
info = [report_prefix, "step %i" % step]
|
|
1288
1289
|
if eval_info: # Such as score.
|
|
1289
|
-
info += ["%s %s" % (k,
|
|
1290
|
+
info += ["%s %s" % (k, _format_score_value(v)) for k, v in eval_info.items()]
|
|
1290
1291
|
if batch_size_info:
|
|
1291
|
-
info += ["%s %s" % (k,
|
|
1292
|
+
info += ["%s %s" % (k, _format_score_value(v)) for k, v in batch_size_info.items()]
|
|
1292
1293
|
if log_memory_usage_device:
|
|
1293
1294
|
dev = torch.device(log_memory_usage_device)
|
|
1294
1295
|
if dev.type == "cuda":
|
|
@@ -1324,11 +1325,11 @@ def _format_score(score: Dict[str, float]) -> str:
|
|
|
1324
1325
|
if not score:
|
|
1325
1326
|
return "None"
|
|
1326
1327
|
if len(score) == 1:
|
|
1327
|
-
return
|
|
1328
|
-
return " ".join(["%s %s" % (
|
|
1328
|
+
return _format_score_value(list(score.values())[0])
|
|
1329
|
+
return " ".join(["%s %s" % (k, _format_score_value(v)) for k, v in score.items()])
|
|
1329
1330
|
|
|
1330
1331
|
|
|
1331
|
-
def
|
|
1332
|
+
def _format_score_value(v: Any) -> str:
|
|
1332
1333
|
if isinstance(v, float):
|
|
1333
1334
|
if abs(v) > 1.0e3 or abs(v) < 1.0e-3:
|
|
1334
1335
|
return f"{v:.3e}"
|
|
@@ -1422,3 +1423,39 @@ def _set_torch_default_dtype_ctx_mgr(dtype: torch.dtype):
|
|
|
1422
1423
|
yield
|
|
1423
1424
|
finally:
|
|
1424
1425
|
torch.set_default_dtype(old_dtype)
|
|
1426
|
+
|
|
1427
|
+
|
|
1428
|
+
def _parse_log_grad_norm(config: Config) -> Optional[Union[int, float]]:
|
|
1429
|
+
log_grad_norm = config.opt_typed_value("log_grad_norm", False)
|
|
1430
|
+
if isinstance(log_grad_norm, str):
|
|
1431
|
+
if log_grad_norm.lower() in ["true", "false", "none"]:
|
|
1432
|
+
log_grad_norm = {"true": True, "false": False, "none": None}[log_grad_norm.lower()]
|
|
1433
|
+
else:
|
|
1434
|
+
raise ValueError(f"Invalid value for log_grad_norm: {log_grad_norm!r}")
|
|
1435
|
+
if log_grad_norm is None:
|
|
1436
|
+
pass
|
|
1437
|
+
elif isinstance(log_grad_norm, bool):
|
|
1438
|
+
if log_grad_norm:
|
|
1439
|
+
log_grad_norm = 2
|
|
1440
|
+
else:
|
|
1441
|
+
log_grad_norm = None
|
|
1442
|
+
elif isinstance(log_grad_norm, (int, float)):
|
|
1443
|
+
assert log_grad_norm > 0, f"log_grad_norm {log_grad_norm} > 0 expected" # otherwise fine...
|
|
1444
|
+
else:
|
|
1445
|
+
raise TypeError(f"Invalid type for log_grad_norm: {log_grad_norm!r} type {type(log_grad_norm)}")
|
|
1446
|
+
return log_grad_norm
|
|
1447
|
+
|
|
1448
|
+
|
|
1449
|
+
def _get_total_grad_norm(model: torch.nn.Module, p: float) -> float:
|
|
1450
|
+
return float(
|
|
1451
|
+
torch.norm(
|
|
1452
|
+
torch.stack(
|
|
1453
|
+
[
|
|
1454
|
+
param.grad.norm(p=p).detach().cpu()
|
|
1455
|
+
for param in model.parameters()
|
|
1456
|
+
if param.requires_grad and param.grad is not None
|
|
1457
|
+
]
|
|
1458
|
+
),
|
|
1459
|
+
p=p,
|
|
1460
|
+
).item()
|
|
1461
|
+
)
|
|
@@ -5,7 +5,7 @@ Various generic utilities, which are shared across different backend engines.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from __future__ import annotations
|
|
8
|
-
from typing import Optional, Union, Any, Generic, TypeVar, Iterable, Tuple, Dict, List, Callable
|
|
8
|
+
from typing import Optional, Union, Any, Generic, TypeVar, Iterable, Tuple, Dict, List, Set, Callable
|
|
9
9
|
|
|
10
10
|
import subprocess
|
|
11
11
|
from subprocess import CalledProcessError
|
|
@@ -554,12 +554,11 @@ def describe_torch_version() -> str:
|
|
|
554
554
|
return "%s (%s in %s)" % (version, git_info, tdir)
|
|
555
555
|
|
|
556
556
|
|
|
557
|
-
def get_tensorflow_version_tuple():
|
|
557
|
+
def get_tensorflow_version_tuple() -> Tuple[int, ...]:
|
|
558
558
|
"""
|
|
559
559
|
:return: tuple of ints, first entry is the major version
|
|
560
|
-
:rtype: tuple[int]
|
|
561
560
|
"""
|
|
562
|
-
import tensorflow as tf
|
|
561
|
+
import tensorflow as tf # noqa
|
|
563
562
|
import re
|
|
564
563
|
|
|
565
564
|
return tuple([int(re.sub("(-rc[0-9]|-dev[0-9]*)", "", s)) for s in tf.__version__.split(".")])
|
|
@@ -1963,9 +1962,9 @@ class NumbersDict:
|
|
|
1963
1962
|
self.value = broadcast_value
|
|
1964
1963
|
self.max = self._max_error
|
|
1965
1964
|
|
|
1966
|
-
def copy(self):
|
|
1965
|
+
def copy(self) -> NumbersDict:
|
|
1967
1966
|
"""
|
|
1968
|
-
:
|
|
1967
|
+
:return: copy
|
|
1969
1968
|
"""
|
|
1970
1969
|
return NumbersDict(self)
|
|
1971
1970
|
|
|
@@ -1982,11 +1981,10 @@ class NumbersDict:
|
|
|
1982
1981
|
numbers_dict={k: const_number for k in numbers_dict.dict.keys()},
|
|
1983
1982
|
)
|
|
1984
1983
|
|
|
1985
|
-
def copy_like(self, numbers_dict):
|
|
1984
|
+
def copy_like(self, numbers_dict: NumbersDict) -> NumbersDict:
|
|
1986
1985
|
"""
|
|
1987
|
-
:param
|
|
1986
|
+
:param numbers_dict:
|
|
1988
1987
|
:return: copy of self with same keys as numbers_dict as far as we have them
|
|
1989
|
-
:rtype: NumbersDict
|
|
1990
1988
|
"""
|
|
1991
1989
|
if self.value is not None:
|
|
1992
1990
|
return NumbersDict(
|
|
@@ -1999,11 +1997,11 @@ class NumbersDict:
|
|
|
1999
1997
|
)
|
|
2000
1998
|
|
|
2001
1999
|
@property
|
|
2002
|
-
def keys_set(self):
|
|
2000
|
+
def keys_set(self) -> Set[str]:
|
|
2003
2001
|
"""
|
|
2004
2002
|
Also see :func:`keys_union` if you want to have a deterministic order.
|
|
2005
2003
|
|
|
2006
|
-
:
|
|
2004
|
+
:return: set of keys
|
|
2007
2005
|
"""
|
|
2008
2006
|
return set(self.dict.keys())
|
|
2009
2007
|
|
|
@@ -2020,29 +2018,32 @@ class NumbersDict:
|
|
|
2020
2018
|
res.append(key)
|
|
2021
2019
|
return res
|
|
2022
2020
|
|
|
2023
|
-
def __getitem__(self, key):
|
|
2021
|
+
def __getitem__(self, key: str):
|
|
2024
2022
|
if self.value is not None:
|
|
2025
2023
|
return self.dict.get(key, self.value)
|
|
2026
2024
|
return self.dict[key]
|
|
2027
2025
|
|
|
2028
|
-
def __setitem__(self, key, value):
|
|
2026
|
+
def __setitem__(self, key: str, value):
|
|
2029
2027
|
self.dict[key] = value
|
|
2030
2028
|
|
|
2031
|
-
def __delitem__(self, key):
|
|
2029
|
+
def __delitem__(self, key: str):
|
|
2032
2030
|
del self.dict[key]
|
|
2033
2031
|
|
|
2034
|
-
def
|
|
2032
|
+
def __contains__(self, item: str):
|
|
2033
|
+
return item in self.dict
|
|
2034
|
+
|
|
2035
|
+
def get(self, key: str, default=None):
|
|
2035
2036
|
"""
|
|
2036
|
-
:param
|
|
2037
|
+
:param key:
|
|
2037
2038
|
:param T default:
|
|
2038
2039
|
:rtype: object|T
|
|
2039
2040
|
"""
|
|
2040
2041
|
# Keep consistent with self.__getitem__. If self.value is set, this will always be the default value.
|
|
2041
2042
|
return self.dict.get(key, self.value if self.value is not None else default)
|
|
2042
2043
|
|
|
2043
|
-
def pop(self, key, *args):
|
|
2044
|
+
def pop(self, key: str, *args):
|
|
2044
2045
|
"""
|
|
2045
|
-
:param
|
|
2046
|
+
:param key:
|
|
2046
2047
|
:param T args: default, or not
|
|
2047
2048
|
:rtype: object|T
|
|
2048
2049
|
"""
|
|
@@ -2055,22 +2056,21 @@ class NumbersDict:
|
|
|
2055
2056
|
# which would only make sense for our values, not the dict keys.
|
|
2056
2057
|
raise Exception("%s.__iter__ is undefined" % self.__class__.__name__)
|
|
2057
2058
|
|
|
2058
|
-
def keys(self):
|
|
2059
|
+
def keys(self) -> Iterable[str]:
|
|
2059
2060
|
"""
|
|
2060
2061
|
:rtype: set[str]
|
|
2061
2062
|
"""
|
|
2062
2063
|
return self.dict.keys()
|
|
2063
2064
|
|
|
2064
|
-
def values(self):
|
|
2065
|
+
def values(self) -> List[Any]:
|
|
2065
2066
|
"""
|
|
2066
|
-
:
|
|
2067
|
+
:return: values: dict values + self.value
|
|
2067
2068
|
"""
|
|
2068
2069
|
return list(self.dict.values()) + ([self.value] if self.value is not None else [])
|
|
2069
2070
|
|
|
2070
|
-
def items(self):
|
|
2071
|
+
def items(self) -> Iterable[Tuple[str, Any]]:
|
|
2071
2072
|
"""
|
|
2072
2073
|
:return: dict items. this excludes self.value
|
|
2073
|
-
:rtype: str[(str,object)]
|
|
2074
2074
|
"""
|
|
2075
2075
|
return self.dict.items()
|
|
2076
2076
|
|
|
@@ -2080,9 +2080,9 @@ class NumbersDict:
|
|
|
2080
2080
|
"""
|
|
2081
2081
|
return self.value is not None or key in self.dict
|
|
2082
2082
|
|
|
2083
|
-
def has_values(self):
|
|
2083
|
+
def has_values(self) -> bool:
|
|
2084
2084
|
"""
|
|
2085
|
-
:
|
|
2085
|
+
:return: any values in self.dict or self.value
|
|
2086
2086
|
"""
|
|
2087
2087
|
return bool(self.dict) or self.value is not None
|
|
2088
2088
|
|
|
@@ -2186,12 +2186,12 @@ class NumbersDict:
|
|
|
2186
2186
|
def __neg__(self):
|
|
2187
2187
|
return self.unary_op(op=lambda a: -a)
|
|
2188
2188
|
|
|
2189
|
-
def __bool__(self):
|
|
2189
|
+
def __bool__(self) -> bool:
|
|
2190
2190
|
return any(self.values())
|
|
2191
2191
|
|
|
2192
2192
|
__nonzero__ = __bool__ # Python 2
|
|
2193
2193
|
|
|
2194
|
-
def elem_eq(self, other, result_with_default=True):
|
|
2194
|
+
def elem_eq(self, other, result_with_default: bool = True) -> NumbersDict:
|
|
2195
2195
|
"""
|
|
2196
2196
|
Element-wise equality check with other.
|
|
2197
2197
|
Note about broadcast default value: Consider some key which is neither in self nor in other.
|
|
@@ -2202,8 +2202,8 @@ class NumbersDict:
|
|
|
2202
2202
|
You can control the behavior via result_with_default.
|
|
2203
2203
|
|
|
2204
2204
|
:param NumbersDict|T other:
|
|
2205
|
-
:param
|
|
2206
|
-
:
|
|
2205
|
+
:param result_with_default:
|
|
2206
|
+
:return: new NumbersDict with bool values
|
|
2207
2207
|
"""
|
|
2208
2208
|
|
|
2209
2209
|
def op(a, b):
|
|
@@ -2223,19 +2223,17 @@ class NumbersDict:
|
|
|
2223
2223
|
res.value = None
|
|
2224
2224
|
return res
|
|
2225
2225
|
|
|
2226
|
-
def __eq__(self, other):
|
|
2226
|
+
def __eq__(self, other) -> bool:
|
|
2227
2227
|
"""
|
|
2228
2228
|
:param NumbersDict|T other:
|
|
2229
2229
|
:return: whether self == other elemwise. see self.elem_eq
|
|
2230
|
-
:rtype: bool
|
|
2231
2230
|
"""
|
|
2232
2231
|
return all(self.elem_eq(other).values())
|
|
2233
2232
|
|
|
2234
|
-
def __ne__(self, other):
|
|
2233
|
+
def __ne__(self, other) -> bool:
|
|
2235
2234
|
"""
|
|
2236
2235
|
:param NumbersDict|T other:
|
|
2237
2236
|
:return: not (self == other)
|
|
2238
|
-
:rtype: bool
|
|
2239
2237
|
"""
|
|
2240
2238
|
return not (self == other)
|
|
2241
2239
|
|
|
@@ -2244,11 +2242,10 @@ class NumbersDict:
|
|
|
2244
2242
|
# and it would just confuse.
|
|
2245
2243
|
raise Exception("%s.__cmp__ is undefined" % self.__class__.__name__)
|
|
2246
2244
|
|
|
2247
|
-
def any_compare(self, other, cmp):
|
|
2245
|
+
def any_compare(self, other, cmp) -> bool:
|
|
2248
2246
|
"""
|
|
2249
2247
|
:param NumbersDict other:
|
|
2250
2248
|
:param ((object,object)->True) cmp:
|
|
2251
|
-
:rtype: True
|
|
2252
2249
|
"""
|
|
2253
2250
|
for key in self.keys():
|
|
2254
2251
|
if key in other.keys():
|
|
@@ -2281,11 +2278,11 @@ class NumbersDict:
|
|
|
2281
2278
|
return min(*args)
|
|
2282
2279
|
|
|
2283
2280
|
@classmethod
|
|
2284
|
-
def max(cls, items):
|
|
2281
|
+
def max(cls, items) -> NumbersDict:
|
|
2285
2282
|
"""
|
|
2286
2283
|
Element-wise maximum for item in items.
|
|
2284
|
+
|
|
2287
2285
|
:param list[NumbersDict|int|float] items:
|
|
2288
|
-
:rtype: NumbersDict
|
|
2289
2286
|
"""
|
|
2290
2287
|
assert items
|
|
2291
2288
|
if len(items) == 1:
|
|
@@ -2295,11 +2292,10 @@ class NumbersDict:
|
|
|
2295
2292
|
return cls.max([items[0], cls.max(items[1:])])
|
|
2296
2293
|
|
|
2297
2294
|
@classmethod
|
|
2298
|
-
def min(cls, items):
|
|
2295
|
+
def min(cls, items) -> NumbersDict:
|
|
2299
2296
|
"""
|
|
2300
2297
|
Element-wise minimum for item in items.
|
|
2301
2298
|
:param list[NumbersDict|int|float] items:
|
|
2302
|
-
:rtype: NumbersDict
|
|
2303
2299
|
"""
|
|
2304
2300
|
assert items
|
|
2305
2301
|
if len(items) == 1:
|
|
@@ -2325,7 +2321,7 @@ class NumbersDict:
|
|
|
2325
2321
|
"""
|
|
2326
2322
|
return min(self.values())
|
|
2327
2323
|
|
|
2328
|
-
def __repr__(self):
|
|
2324
|
+
def __repr__(self) -> str:
|
|
2329
2325
|
if self.value is None and not self.dict:
|
|
2330
2326
|
return "%s()" % self.__class__.__name__
|
|
2331
2327
|
if self.value is None and self.dict:
|