returnn 1.20230609.82609__tar.gz → 1.20230609.121734__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {returnn-1.20230609.82609/returnn.egg-info → returnn-1.20230609.121734}/PKG-INFO +1 -1
- returnn-1.20230609.121734/_setup_info_generated.py +2 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/__main__.py +16 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/basic.py +5 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/run_ctx.py +6 -2
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/log.py +12 -0
- returnn-1.20230609.121734/returnn/torch/distributed.py +187 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/engine.py +106 -23
- {returnn-1.20230609.82609 → returnn-1.20230609.121734/returnn.egg-info}/PKG-INFO +1 -1
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn.egg-info/SOURCES.txt +1 -0
- returnn-1.20230609.82609/_setup_info_generated.py +0 -2
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/.editorconfig +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/.gitignore +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/.gitmodules +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/.kateconfig +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/CHANGELOG.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/CODEOWNERS +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/CONTRIBUTING.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/LICENSE +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/MANIFEST.in +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/README.rst +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/12AX.cluster_map +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-fwd.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-list-devices.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-pretrain.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-rf.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-torch.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo.sh +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/pyproject.toml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/requirements.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/__setup__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/config.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/audio.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/cached.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/generating.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/lm.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/map.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/meta.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/engine/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/engine/base.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/engine/batch.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/forward_iface.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/_numpy_backend.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/array_.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/attention.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/cond.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/const.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/container.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/conv.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/device.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/dims.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/init.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/linear.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/loop.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/loss.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/math_.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/module.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/rand.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/rec.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/signal.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/state.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/frontend/types.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/import_/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/import_/common.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/import_/git.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/import_/import_.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/native_op.cpp +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/native_op.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/pretrain.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/sprint/cache.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/sprint/control.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/sprint/interface.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/_dim_extra.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/dim.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tensor/utils.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/compat.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/distributed.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/engine.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/horovod.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/native_op.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/network.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/sprint.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/updater.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/util/data.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/functional/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/functional/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/torch/updater.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/__init__.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/basic.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/bpe.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/debug.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/fsa.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/pprint.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/py-to-pickle.cpp +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/py_compat.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn/util/task_system.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/rnn.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/setup.cfg +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/setup.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/DummySprintExec.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/_setup_test_env.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/lint_common.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/pylint.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/rf_utils.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/spelling.dic +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_Config.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_Dataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_Fsa.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_Log.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_PTDataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_Pretrain.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_ResNet.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TFEngine.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TFUtil.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_Util.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_demos.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_fork_exec.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_array.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_attention.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_base.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_cond.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_container.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_conv.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_loop.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_math.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_rec.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_rf_signal.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_tensor.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_tools.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_torch_engine.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/collect-words.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/compile_native_op.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/dump-dataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/dump-forward.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/dump-network-json.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/dump-pickle.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/get-attention-weights.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/hdf_dump.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20230609.82609 → returnn-1.20230609.121734}/tools/torch_export_to_onnx.py +0 -0
|
@@ -334,6 +334,17 @@ def init_backend_engine():
|
|
|
334
334
|
|
|
335
335
|
returnn.tf.distributed.init_distributed_tf(config)
|
|
336
336
|
elif BackendEngine.is_torch_selected():
|
|
337
|
+
if config.typed_value("torch_distributed") is not None:
|
|
338
|
+
import socket
|
|
339
|
+
import returnn.torch.distributed
|
|
340
|
+
|
|
341
|
+
torch_distributed = returnn.torch.distributed.get_ctx(config=config)
|
|
342
|
+
print(
|
|
343
|
+
"Torch: Hostname %s, pid %i, using GPU %s."
|
|
344
|
+
% (socket.gethostname(), os.getpid(), str(torch_distributed.local_rank())),
|
|
345
|
+
file=log.v3,
|
|
346
|
+
)
|
|
347
|
+
|
|
337
348
|
print("PyTorch:", util.describe_torch_version(), file=log.v3)
|
|
338
349
|
else:
|
|
339
350
|
raise NotImplementedError
|
|
@@ -386,6 +397,11 @@ def finalize(error_occurred=False):
|
|
|
386
397
|
import horovod.tensorflow as hvd # noqa
|
|
387
398
|
|
|
388
399
|
hvd.shutdown()
|
|
400
|
+
elif BackendEngine.is_torch_selected():
|
|
401
|
+
if config.typed_value("torch_distributed") is not None:
|
|
402
|
+
from torch.distributed import destroy_process_group
|
|
403
|
+
|
|
404
|
+
destroy_process_group()
|
|
389
405
|
|
|
390
406
|
|
|
391
407
|
def need_data():
|
|
@@ -245,6 +245,11 @@ class Dataset(object):
|
|
|
245
245
|
|
|
246
246
|
if returnn.tf.horovod.get_ctx().is_dataset_distribution_random_seed_offset():
|
|
247
247
|
return returnn.tf.horovod.get_ctx().rank() * 16127
|
|
248
|
+
|
|
249
|
+
if config.typed_value("torch_distributed") is not None:
|
|
250
|
+
import returnn.torch.distributed
|
|
251
|
+
|
|
252
|
+
return returnn.torch.distributed.get_ctx().rank() * 16127
|
|
248
253
|
return 0
|
|
249
254
|
|
|
250
255
|
@staticmethod
|
|
@@ -217,6 +217,8 @@ class RunCtx:
|
|
|
217
217
|
# e.g. dynamic dims.
|
|
218
218
|
# Thus, we allow undefined dims in the expected output,
|
|
219
219
|
# and ignore them when checking for equality.
|
|
220
|
+
# The most important thing for the user is to define what dims are dynamic and what dims are static.
|
|
221
|
+
# This is also necessary for ONNX export.
|
|
220
222
|
assert len(expected_output.dims) == len(tensor.dims), (
|
|
221
223
|
f"mark_as_output: lengths of expected output {expected_output.dims}"
|
|
222
224
|
f" and actual output {tensor.dims} don't match."
|
|
@@ -235,10 +237,12 @@ class RunCtx:
|
|
|
235
237
|
f" Matching actual dim assumed to be dynamic, but got non-dynamic dim {actual_dim}."
|
|
236
238
|
)
|
|
237
239
|
elif expected_dim.is_static():
|
|
238
|
-
assert actual_dim
|
|
240
|
+
assert expected_dim.is_static() and actual_dim.dimension == expected_dim.dimension, (
|
|
239
241
|
f"mark_as_output: expected dim {expected_dim} is static."
|
|
240
|
-
f" Matching actual dim assumed to be the same static dim, but got {actual_dim}."
|
|
242
|
+
f" Matching actual dim assumed to be the same static dim value, but got {actual_dim}."
|
|
241
243
|
)
|
|
244
|
+
else:
|
|
245
|
+
assert False, f"mark_as_output: unexpected expected dim {expected_dim}."
|
|
242
246
|
assert expected_output.dtype == tensor.dtype, (
|
|
243
247
|
f"mark_as_output: {name!r} dtype mismatch from expected output,"
|
|
244
248
|
f" given {tensor.dtype}, expected {expected_output.dtype}"
|
|
@@ -192,6 +192,18 @@ class Log:
|
|
|
192
192
|
fn_ext = ".horovod-%i-%i%s" % (hvd.rank(), hvd.size(), fn_ext)
|
|
193
193
|
new_logs.append(fn_prefix + fn_ext)
|
|
194
194
|
logs = new_logs
|
|
195
|
+
|
|
196
|
+
if config.typed_value("torch_distributed") is not None:
|
|
197
|
+
import returnn.torch.distributed
|
|
198
|
+
|
|
199
|
+
torch_distributed = returnn.torch.distributed.get_ctx(config=config)
|
|
200
|
+
new_logs = []
|
|
201
|
+
for fn in logs:
|
|
202
|
+
fn_prefix, fn_ext = os.path.splitext(fn)
|
|
203
|
+
fn_ext = ".torch-distrib-%i-%i%s" % (torch_distributed.rank(), torch_distributed.size(), fn_ext)
|
|
204
|
+
new_logs.append(fn_prefix + fn_ext)
|
|
205
|
+
logs = new_logs
|
|
206
|
+
|
|
195
207
|
self.initialize(logs=logs, verbosity=log_verbosity, formatter=log_format)
|
|
196
208
|
|
|
197
209
|
def print_warning(self, text, prefix_text="WARNING:", extra_text=None):
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
torch.distributed utils
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import itertools
|
|
7
|
+
from typing import Optional
|
|
8
|
+
import os
|
|
9
|
+
import socket
|
|
10
|
+
|
|
11
|
+
from contextlib import contextmanager
|
|
12
|
+
import torch
|
|
13
|
+
from torch.distributed.algorithms.join import Join
|
|
14
|
+
|
|
15
|
+
from returnn.config import Config
|
|
16
|
+
import returnn.frontend as rf
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DistributedContext:
|
|
20
|
+
"""
|
|
21
|
+
This class setups some helper functions for torch distributed training
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, config):
|
|
25
|
+
"""
|
|
26
|
+
:param Config config:
|
|
27
|
+
"""
|
|
28
|
+
import torch.distributed as dist
|
|
29
|
+
|
|
30
|
+
dist.init_process_group("nccl")
|
|
31
|
+
|
|
32
|
+
self._config = config
|
|
33
|
+
self._local_rank = os.environ["LOCAL_RANK"]
|
|
34
|
+
self._local_size = os.environ["LOCAL_WORLD_SIZE"]
|
|
35
|
+
self._rank = dist.get_rank()
|
|
36
|
+
self._size = dist.get_world_size()
|
|
37
|
+
|
|
38
|
+
print(
|
|
39
|
+
"Torch distributed initialized. Hostname %s, pid %i, rank %i / size %i, local rank %s / local size %s."
|
|
40
|
+
% (socket.gethostname(), os.getpid(), self._rank, self._size, self._local_rank, self._local_size)
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def local_rank(self):
|
|
44
|
+
"""
|
|
45
|
+
:rtype: int
|
|
46
|
+
"""
|
|
47
|
+
return self._local_rank
|
|
48
|
+
|
|
49
|
+
def rank(self):
|
|
50
|
+
"""
|
|
51
|
+
:rtype: int
|
|
52
|
+
"""
|
|
53
|
+
return self._rank
|
|
54
|
+
|
|
55
|
+
def size(self):
|
|
56
|
+
"""
|
|
57
|
+
:rtype: int
|
|
58
|
+
"""
|
|
59
|
+
return self._size
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
_is_set_up = False
|
|
63
|
+
_ctx = None # type: Optional[DistributedContext]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_ctx(config=None):
|
|
67
|
+
"""
|
|
68
|
+
:param Config|None config:
|
|
69
|
+
:returns: the global context if Torch distributed is enabled, or None otherwise.
|
|
70
|
+
If we did not setup the context yet, it will automatically create it.
|
|
71
|
+
:rtype: DistributedContext|None
|
|
72
|
+
"""
|
|
73
|
+
global _is_set_up, _ctx
|
|
74
|
+
if _is_set_up:
|
|
75
|
+
return _ctx
|
|
76
|
+
if not config:
|
|
77
|
+
from returnn.config import get_global_config
|
|
78
|
+
|
|
79
|
+
config = get_global_config(raise_exception=False)
|
|
80
|
+
if not config:
|
|
81
|
+
return None
|
|
82
|
+
_is_set_up = True
|
|
83
|
+
if config.typed_value("torch_distributed") is None:
|
|
84
|
+
return None
|
|
85
|
+
_ctx = DistributedContext(config=config)
|
|
86
|
+
return _ctx
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_device_ids():
|
|
90
|
+
"""
|
|
91
|
+
It depends on the specific setup what to return here,
|
|
92
|
+
how CUDA_VISIBLE_DEVICES is set up, etc.
|
|
93
|
+
This is currently a reasonable assumption,
|
|
94
|
+
but we might extend the logic later,
|
|
95
|
+
or make it configurable.
|
|
96
|
+
"""
|
|
97
|
+
return [get_local_rank()]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def get_local_rank():
|
|
101
|
+
"""
|
|
102
|
+
torch.distributed does not seem to provide a function for this.
|
|
103
|
+
Via mpirun (OpenMPI), this env variable would be set.
|
|
104
|
+
It should fail with an error otherwise.
|
|
105
|
+
"""
|
|
106
|
+
return int(os.environ["LOCAL_RANK"])
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _find_tensors(obj):
|
|
110
|
+
"""
|
|
111
|
+
Recursively find all tensors contained in the specified object,
|
|
112
|
+
cf. torch.nn.parallel.distributed._find_tensors
|
|
113
|
+
"""
|
|
114
|
+
if isinstance(obj, torch.Tensor):
|
|
115
|
+
return [obj]
|
|
116
|
+
if isinstance(obj, (list, tuple)):
|
|
117
|
+
return itertools.chain(*map(_find_tensors, obj))
|
|
118
|
+
if isinstance(obj, dict):
|
|
119
|
+
return itertools.chain(*map(_find_tensors, obj.values()))
|
|
120
|
+
return []
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@contextmanager
|
|
124
|
+
def ddp_train_forward_ctx(pt_model):
|
|
125
|
+
"""
|
|
126
|
+
the original (unwrapped) module is passed to the train step, therefore here we set up the right context
|
|
127
|
+
as what DistributedDataParallel.forward does internally
|
|
128
|
+
"""
|
|
129
|
+
if torch.is_grad_enabled() and pt_model.require_backward_grad_sync:
|
|
130
|
+
assert pt_model.logger is not None
|
|
131
|
+
pt_model.logger.set_runtime_stats_and_log()
|
|
132
|
+
pt_model.num_iterations += 1
|
|
133
|
+
pt_model.reducer.prepare_for_forward()
|
|
134
|
+
|
|
135
|
+
with torch.autograd.profiler.record_function("DistributedDataParallel.forward"):
|
|
136
|
+
if torch.is_grad_enabled() and pt_model.require_backward_grad_sync:
|
|
137
|
+
assert pt_model.logger is not None
|
|
138
|
+
pt_model.logger.set_runtime_stats_and_log()
|
|
139
|
+
pt_model.num_iterations += 1
|
|
140
|
+
pt_model.reducer.prepare_for_forward()
|
|
141
|
+
|
|
142
|
+
work = Join.notify_join_context(pt_model)
|
|
143
|
+
if work:
|
|
144
|
+
# noinspection PyProtectedMember
|
|
145
|
+
pt_model.reducer._set_forward_pass_work_handle(work, pt_model._divide_by_initial_world_size)
|
|
146
|
+
|
|
147
|
+
# noinspection PyProtectedMember
|
|
148
|
+
if torch.is_grad_enabled() and pt_model.reducer._rebuild_buckets():
|
|
149
|
+
pt_model._has_rebuilt_buckets = True
|
|
150
|
+
|
|
151
|
+
# noinspection PyProtectedMember
|
|
152
|
+
if pt_model._check_sync_bufs_pre_fwd():
|
|
153
|
+
# noinspection PyProtectedMember
|
|
154
|
+
pt_model._sync_buffers()
|
|
155
|
+
|
|
156
|
+
# noinspection PyProtectedMember
|
|
157
|
+
if pt_model._join_config.enable:
|
|
158
|
+
# Notify joined ranks whether they should sync in backwards pass or not.
|
|
159
|
+
# noinspection PyProtectedMember
|
|
160
|
+
pt_model._check_global_requires_backward_grad_sync(is_joined_rank=False)
|
|
161
|
+
|
|
162
|
+
# noinspection PyProtectedMember
|
|
163
|
+
with pt_model._inside_ddp_forward():
|
|
164
|
+
yield
|
|
165
|
+
|
|
166
|
+
# noinspection PyProtectedMember
|
|
167
|
+
if pt_model._check_sync_bufs_post_fwd():
|
|
168
|
+
# noinspection PyProtectedMember
|
|
169
|
+
pt_model._sync_buffers()
|
|
170
|
+
|
|
171
|
+
if torch.is_grad_enabled() and pt_model.require_backward_grad_sync:
|
|
172
|
+
pt_model.require_forward_param_sync = True
|
|
173
|
+
# We'll return the output object verbatim since it is a freeform
|
|
174
|
+
# object. We need to find any tensors in this object, though,
|
|
175
|
+
# because we need to figure out which parameters were used during
|
|
176
|
+
# this forward pass, to ensure we short circuit reduction for any
|
|
177
|
+
# unused parameters. Only if `find_unused_parameters` is set.
|
|
178
|
+
if pt_model.find_unused_parameters and not pt_model.static_graph:
|
|
179
|
+
# Do not need to populate this for static graph.
|
|
180
|
+
train_ctx = rf.get_run_ctx()
|
|
181
|
+
loss = list(train_ctx.losses.values())[0].loss.raw_tensor
|
|
182
|
+
# noinspection PyProtectedMember
|
|
183
|
+
pt_model.reducer.prepare_for_backward(list(_find_tensors(loss)))
|
|
184
|
+
else:
|
|
185
|
+
pt_model.reducer.prepare_for_backward([])
|
|
186
|
+
else:
|
|
187
|
+
pt_model.require_forward_param_sync = False
|
|
@@ -9,6 +9,9 @@ from contextlib import nullcontext
|
|
|
9
9
|
import os
|
|
10
10
|
import numpy
|
|
11
11
|
import torch
|
|
12
|
+
import time
|
|
13
|
+
from torch.distributed import init_process_group
|
|
14
|
+
from torch.nn.parallel import DistributedDataParallel
|
|
12
15
|
import torch.utils.data.datapipes as dp
|
|
13
16
|
from torch import autocast
|
|
14
17
|
from torch.cuda import amp
|
|
@@ -23,7 +26,7 @@ from returnn.tensor import TensorDict, Tensor, Dim
|
|
|
23
26
|
from returnn.datasets.basic import init_dataset, Dataset
|
|
24
27
|
from returnn.util import basic as util
|
|
25
28
|
from returnn.util import NumbersDict
|
|
26
|
-
from returnn.util.basic import NotSpecified
|
|
29
|
+
from returnn.util.basic import hms, NotSpecified
|
|
27
30
|
from returnn.forward_iface import ForwardCallbackIface
|
|
28
31
|
|
|
29
32
|
from .updater import Updater
|
|
@@ -72,6 +75,18 @@ class Engine(EngineBase):
|
|
|
72
75
|
self._device = _get_device_from_config(config)
|
|
73
76
|
print("Using device:", self._device, file=log.v2)
|
|
74
77
|
|
|
78
|
+
self._use_torch_distributed = False
|
|
79
|
+
self._torch_distributed_class = None # type: Optional[Callable]
|
|
80
|
+
self._torch_distributed_options = None # type: Optional[dict]
|
|
81
|
+
self._ddp_pt_model = None # type: Optional[torch.nn.Module]
|
|
82
|
+
self._accum_grad_multiple_step = config.int("accum_grad_multiple_step", 1)
|
|
83
|
+
|
|
84
|
+
torch_distributed = config.typed_value("torch_distributed")
|
|
85
|
+
if torch_distributed is not None:
|
|
86
|
+
self._use_torch_distributed = True
|
|
87
|
+
self._torch_distributed_class = torch_distributed.get("class", None)
|
|
88
|
+
self._torch_distributed_options = torch_distributed.get("options", None)
|
|
89
|
+
|
|
75
90
|
amp_options = self.config.typed_value("torch_amp")
|
|
76
91
|
grad_scaler_opts = self.config.typed_value("grad_scaler", NotSpecified)
|
|
77
92
|
if amp_options is not None:
|
|
@@ -130,6 +145,14 @@ class Engine(EngineBase):
|
|
|
130
145
|
assert config is self.config or not config
|
|
131
146
|
super().init_train_from_config(config=config)
|
|
132
147
|
|
|
148
|
+
if self._use_torch_distributed:
|
|
149
|
+
import returnn.torch.distributed
|
|
150
|
+
|
|
151
|
+
torch_distributed = returnn.torch.distributed.get_ctx(config=config)
|
|
152
|
+
local_rank = torch_distributed.local_rank()
|
|
153
|
+
print(f"Start running torch distributed training on local rank {local_rank}.", file=log.v2)
|
|
154
|
+
self._device = f"cuda:{local_rank}"
|
|
155
|
+
|
|
133
156
|
self.train_dataset = train_data
|
|
134
157
|
self.eval_datasets.clear()
|
|
135
158
|
if dev_data:
|
|
@@ -151,6 +174,13 @@ class Engine(EngineBase):
|
|
|
151
174
|
|
|
152
175
|
self._save_model_epoch_interval = config.int("save_interval", 1)
|
|
153
176
|
|
|
177
|
+
if self._use_torch_distributed:
|
|
178
|
+
from returnn.torch.distributed import get_device_ids
|
|
179
|
+
|
|
180
|
+
# wrap the model use torch distributed class
|
|
181
|
+
self._ddp_pt_model = self._torch_distributed_class(
|
|
182
|
+
self._pt_model, device_ids=get_device_ids(), **self._torch_distributed_options
|
|
183
|
+
)
|
|
154
184
|
self._updater = Updater(self.config, self._pt_model, self.learning_rate)
|
|
155
185
|
self._updater.create_optimizer()
|
|
156
186
|
if self._start_epoch > 1:
|
|
@@ -202,14 +232,44 @@ class Engine(EngineBase):
|
|
|
202
232
|
accumulated_losses_dict = NumbersDict()
|
|
203
233
|
accumulated_inv_norm_factors_dict = NumbersDict()
|
|
204
234
|
step_idx = 0
|
|
205
|
-
|
|
206
|
-
|
|
235
|
+
epoch_start_time = time.time()
|
|
236
|
+
|
|
237
|
+
data_iter = iter(self._train_dataloader)
|
|
238
|
+
elapsed_computation_time = 0
|
|
239
|
+
|
|
240
|
+
while True:
|
|
241
|
+
extern_data_raw = next(data_iter, None)
|
|
242
|
+
# WARNING: torch.distributed works only for the registered device,
|
|
243
|
+
# as it uses only one mechanism for communication, like NCCL.
|
|
244
|
+
# This is suboptimal here as we have the roundtrip CPU -> GPU -> NCCL -> GPU -> CPU.
|
|
245
|
+
# TODO: Use more direct CPU -> Ethernet -> CPU communication.
|
|
246
|
+
_has_data = torch.tensor([extern_data_raw is not None], dtype=torch.int8).to(self._device)
|
|
247
|
+
|
|
248
|
+
if self._use_torch_distributed:
|
|
249
|
+
# use all reduce to check if all workers have data, if at least one worker does not have data,
|
|
250
|
+
# all workers finish this epoch
|
|
251
|
+
torch.distributed.all_reduce(_has_data, op=torch.distributed.ReduceOp.MIN)
|
|
252
|
+
if not _has_data[0]:
|
|
253
|
+
break
|
|
254
|
+
|
|
255
|
+
# clear the gradients when every gradient accumulation loop starts
|
|
256
|
+
if step_idx % self._accum_grad_multiple_step == 0:
|
|
257
|
+
self._updater.get_optimizer().zero_grad()
|
|
258
|
+
|
|
259
|
+
step_begin_time = time.time()
|
|
260
|
+
|
|
207
261
|
extern_data = _raw_dict_to_extern_data(
|
|
208
262
|
extern_data_raw, extern_data_template=self.extern_data, device=self._device
|
|
209
263
|
)
|
|
210
|
-
self._run_step(extern_data,
|
|
264
|
+
self._run_step(extern_data, train_flag=True, train_func=True)
|
|
211
265
|
|
|
212
266
|
train_ctx = rf.get_run_ctx()
|
|
267
|
+
|
|
268
|
+
# scale the loss to account for gradient accumulation
|
|
269
|
+
if self._accum_grad_multiple_step > 1:
|
|
270
|
+
for loss_name in train_ctx.losses.keys():
|
|
271
|
+
train_ctx.losses[loss_name].loss /= self._accum_grad_multiple_step
|
|
272
|
+
|
|
213
273
|
total_loss = train_ctx.total_loss()
|
|
214
274
|
losses_dict = NumbersDict(
|
|
215
275
|
{
|
|
@@ -221,13 +281,23 @@ class Engine(EngineBase):
|
|
|
221
281
|
{name: float(_to_raw(loss.get_inv_norm_factor())) for name, loss in train_ctx.losses.items()}
|
|
222
282
|
)
|
|
223
283
|
|
|
224
|
-
if self.
|
|
225
|
-
self.
|
|
226
|
-
|
|
227
|
-
self._grad_scaler
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
284
|
+
with self._ddp_pt_model.no_sync() if self._use_torch_distributed and (
|
|
285
|
+
step_idx % self._accum_grad_multiple_step
|
|
286
|
+
) != (self._accum_grad_multiple_step - 1) else nullcontext():
|
|
287
|
+
if self._grad_scaler is not None:
|
|
288
|
+
self._grad_scaler.scale(total_loss).backward()
|
|
289
|
+
else:
|
|
290
|
+
total_loss.raw_tensor.backward()
|
|
291
|
+
|
|
292
|
+
# only update the weights when every gradient accumulation loop ends
|
|
293
|
+
if (step_idx % self._accum_grad_multiple_step) == (self._accum_grad_multiple_step - 1):
|
|
294
|
+
if self._grad_scaler is not None:
|
|
295
|
+
self._grad_scaler.step(self._updater.get_optimizer())
|
|
296
|
+
self._grad_scaler.update()
|
|
297
|
+
else:
|
|
298
|
+
self._updater.get_optimizer().step()
|
|
299
|
+
|
|
300
|
+
elapsed_computation_time += time.time() - step_begin_time
|
|
231
301
|
|
|
232
302
|
accumulated_losses_dict += losses_dict
|
|
233
303
|
accumulated_inv_norm_factors_dict += inv_norm_factors_dict
|
|
@@ -240,21 +310,28 @@ class Engine(EngineBase):
|
|
|
240
310
|
step_idx += 1
|
|
241
311
|
self.global_train_step += 1
|
|
242
312
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
313
|
+
elapsed = time.time() - epoch_start_time
|
|
314
|
+
elapsed_computation_percentage = elapsed_computation_time / elapsed
|
|
315
|
+
print(
|
|
316
|
+
"Trained %i steps, %s elapsed (%.1f%% computing time)"
|
|
317
|
+
% (step_idx, hms(elapsed), (elapsed_computation_percentage * 100.0)),
|
|
318
|
+
file=log.v3,
|
|
248
319
|
)
|
|
249
|
-
self.learning_rate_control.save()
|
|
250
320
|
|
|
251
|
-
|
|
321
|
+
if (not self._use_torch_distributed) or (self._use_torch_distributed and torch.distributed.get_rank() == 0):
|
|
322
|
+
accumulated_losses_dict = accumulated_losses_dict / accumulated_inv_norm_factors_dict
|
|
323
|
+
self.learning_rate_control.set_epoch_error(
|
|
324
|
+
self.epoch, {f"train_loss_{k}": v for k, v in accumulated_losses_dict.items()}
|
|
325
|
+
)
|
|
326
|
+
self.learning_rate_control.save()
|
|
327
|
+
|
|
328
|
+
print(f"Total train loss:", _format_score(dict(accumulated_losses_dict)), file=log.v3)
|
|
252
329
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
330
|
+
if self.epoch % self._save_model_epoch_interval == 0 or self.epoch == self._final_epoch:
|
|
331
|
+
self._save_model()
|
|
332
|
+
self._save_optimizer()
|
|
256
333
|
|
|
257
|
-
|
|
334
|
+
self.eval_model()
|
|
258
335
|
|
|
259
336
|
def eval_model(self):
|
|
260
337
|
"""
|
|
@@ -370,7 +447,13 @@ class Engine(EngineBase):
|
|
|
370
447
|
assert self._forward_step_func is not None, "define forward_step in the config"
|
|
371
448
|
rf.init_forward_step_run_ctx(expected_outputs=self._forward_step_expected_outputs)
|
|
372
449
|
|
|
373
|
-
|
|
450
|
+
from returnn.torch.distributed import ddp_train_forward_ctx
|
|
451
|
+
|
|
452
|
+
with autocast(
|
|
453
|
+
device_type=self._device, dtype=self._autocast_dtype
|
|
454
|
+
) if self._use_autocast else nullcontext(), ddp_train_forward_ctx(pt_model=self._ddp_pt_model) if isinstance(
|
|
455
|
+
self._ddp_pt_model, DistributedDataParallel
|
|
456
|
+
) else nullcontext():
|
|
374
457
|
sentinel_kw = {"__fwd_compatible_random_arg_%i" % int(random() * 100): None}
|
|
375
458
|
if train_func:
|
|
376
459
|
self._train_step_func(model=self._orig_model, extern_data=extern_data, **sentinel_kw)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-record-and-push-to-webserver.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-chunking-blstm.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-contribrnn-lstm.12ax.config
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-maxgradnorm-lstm.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-native-lstm-lowmem.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-native-lstm.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-native-lstm2.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-native-lstm2.12ax.tuned.config
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-neural-transducer.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-rec-explicit-lstm.config
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-rec-explicit-rnn.config
RENAMED
|
File without changes
|
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-search-compiled-graph.py
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-tf-vanilla-lstm.12ax.config
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/demo-upd-mult-model.lstm.12ax.config
RENAMED
|
File without changes
|
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png
RENAMED
|
File without changes
|
{returnn-1.20230609.82609 → returnn-1.20230609.121734}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|