returnn 1.20250820.123936__tar.gz → 1.20250821.93927__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of returnn might be problematic. Click here for more details.
- {returnn-1.20250820.123936/returnn.egg-info → returnn-1.20250821.93927}/PKG-INFO +1 -1
- returnn-1.20250821.93927/_setup_info_generated.py +2 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/updater.py +47 -18
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/basic.py +14 -1
- {returnn-1.20250820.123936 → returnn-1.20250821.93927/returnn.egg-info}/PKG-INFO +1 -1
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_torch_engine.py +122 -0
- returnn-1.20250820.123936/_setup_info_generated.py +0 -2
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/.editorconfig +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/.gitignore +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/.gitmodules +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/.kateconfig +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/CHANGELOG.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/CODEOWNERS +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/CONTRIBUTING.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/LICENSE +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/MANIFEST.in +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/README.rst +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/12AX.cluster_map +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-fwd.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-list-devices.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-pretrain.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-rf-pt-benchmark.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-rf.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-torch.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo.sh +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/pyproject.toml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/requirements.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/__main__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/__setup__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/config.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/audio.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/basic.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/cached.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/distrib_files.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/generating.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/lm.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/map.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/meta.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/postprocessing.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/text_dict.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/engine/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/engine/base.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/engine/batch.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/forward_iface.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_backend.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_cache.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_native/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_native/backend.cpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_native/backend.hpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_native/module.cpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_native/module.hpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_native/py_utils.hpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_native/tensor_ops.cpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_native/tensor_ops.hpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_numpy_backend.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_random_journal.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/array_.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/attention.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/audio/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/audio/mel.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/audio/specaugment.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/backend.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/build_from_dict.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/cond.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/const.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/container.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/conv.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/conversions/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/conversions/espnet_e_branchformer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/conversions/hf_llama.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/conversions/torch_nn.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/decoder/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/decoder/transformer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/device.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/dims.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/encoder/conformer_v2.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/encoder/e_branchformer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/encoder/transformer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/gradient.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/graph.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/hooks.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/init.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/label_smoothing.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/linear.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/loop.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/loss.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/math_.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/module.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/nested.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/parametrizations.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/parametrize.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/piecewise_linear.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/rand.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/rec.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/signal.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/state.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/stepwise_scheduler.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/frontend/types.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/import_/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/import_/common.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/import_/git.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/import_/import_.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/log.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/native_op.cpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/native_op.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/pretrain.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/sprint/cache.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/sprint/control.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/sprint/interface.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/_dim_extra.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/dim.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tensor/utils.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/compat.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/distributed.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/engine.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/_backend.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/loop.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/masked_computation.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/horovod.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/layers/basic.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/native_op.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/network.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/sprint.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/updater.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/util/data.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/data/extern_data.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/data/queued_data_iter.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/distributed.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/engine.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/frontend/_backend.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/frontend/raw_ops.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/optim/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/optim/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/optim/lion.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/array_.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/debug_inf_nan.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/diagnose_gpu.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/exception_helper.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/gradient_checkpoint.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/module.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/torch/util/scaled_gradient.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/__init__.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/bpe.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/debug.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/file_cache.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/fsa.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/lru_cache.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/math.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/multi_proc_non_daemonic_spawn.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/native_code_compiler.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/pprint.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/py-to-pickle.cpp +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/py_ext_mod_compiler.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/result_with_reason.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/task_system.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/train_proc_manager.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/util/watch_memory.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn.egg-info/SOURCES.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn.egg-info/requires.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/rnn.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/setup.cfg +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/setup.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/DummySprintExec.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/_setup_test_env.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/lint_common.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/pylint.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/rf_utils.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/spelling.dic +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_Config.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_Dataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_Fsa.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_Log.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_Pretrain.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_ResNet.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TFEngine.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TFUtil.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_Util.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_demos.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_fork_exec.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_array.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_attention.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_base.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_cond.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_const.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_container.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_conv.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_decoder_transformer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_gradient.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_label_smoothing.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_loop.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_math.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_piecewise_linear.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_rec.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_rf_signal.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_tensor.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_threading.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_tools.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_torch_dataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/test_torch_util.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tests/torch_utils.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/collect-words.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/compile_native_op.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/dump-dataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/dump-forward.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/dump-network-json.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/dump-pickle.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/get-attention-weights.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/hdf_dump.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/torch_avg_checkpoints.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/torch_export_to_onnx.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/torch_inspect_checkpoint.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/torch_inspect_checkpoint_and_opt.py +0 -0
- {returnn-1.20250820.123936 → returnn-1.20250821.93927}/tools/torch_scale_tuning.py +0 -0
|
@@ -95,6 +95,8 @@ class Updater:
|
|
|
95
95
|
Wraps a torch.optim.Optimizer, and extends it by some further functionality.
|
|
96
96
|
"""
|
|
97
97
|
|
|
98
|
+
_OptimizerParamGroupsExtraOpts = ("learning_rate_multiplier",)
|
|
99
|
+
|
|
98
100
|
def __init__(self, *, config, network, device, initial_learning_rate=1.0):
|
|
99
101
|
"""
|
|
100
102
|
:param returnn.config.Config config: config defining the training conditions.
|
|
@@ -131,6 +133,7 @@ class Updater:
|
|
|
131
133
|
|
|
132
134
|
self._optimizer_opts: Optional[Dict[str, Any]] = None
|
|
133
135
|
self.optimizer: Optional[torch.optim.Optimizer] = None
|
|
136
|
+
self._optimizer_param_groups_extra_opts: Optional[List[Dict[str, Any]]] = None
|
|
134
137
|
|
|
135
138
|
self._grad_clip = self.config.float("gradient_clip", 0.0)
|
|
136
139
|
self._grad_clip_global_norm = self.config.float("gradient_clip_global_norm", 0.0)
|
|
@@ -189,8 +192,15 @@ class Updater:
|
|
|
189
192
|
)
|
|
190
193
|
self._effective_learning_rate = float(lr)
|
|
191
194
|
if self.optimizer:
|
|
192
|
-
|
|
193
|
-
|
|
195
|
+
if self._optimizer_param_groups_extra_opts:
|
|
196
|
+
assert len(self.optimizer.param_groups) == len(self._optimizer_param_groups_extra_opts)
|
|
197
|
+
lr_multiplies = [
|
|
198
|
+
opts.get("learning_rate_multiplier", 1.0) for opts in self._optimizer_param_groups_extra_opts
|
|
199
|
+
]
|
|
200
|
+
else:
|
|
201
|
+
lr_multiplies = [1.0] * len(self.optimizer.param_groups)
|
|
202
|
+
for i, param_group in enumerate(self.optimizer.param_groups):
|
|
203
|
+
param_group["lr"] = self._effective_learning_rate * lr_multiplies[i]
|
|
194
204
|
|
|
195
205
|
def set_current_train_step(self, *, global_train_step: int, epoch: int, epoch_continuous: Optional[float] = None):
|
|
196
206
|
"""
|
|
@@ -273,7 +283,7 @@ class Updater:
|
|
|
273
283
|
if optimizer_opts is None:
|
|
274
284
|
raise ValueError("config field 'optimizer' needs to be set explicitely for the Torch backend")
|
|
275
285
|
self._optimizer_opts = optimizer_opts
|
|
276
|
-
self.optimizer = self._create_optimizer(optimizer_opts)
|
|
286
|
+
self.optimizer, self._optimizer_param_groups_extra_opts = self._create_optimizer(optimizer_opts)
|
|
277
287
|
|
|
278
288
|
def load_optimizer(self, filename):
|
|
279
289
|
"""
|
|
@@ -421,21 +431,20 @@ class Updater:
|
|
|
421
431
|
"""
|
|
422
432
|
return self.optimizer
|
|
423
433
|
|
|
424
|
-
def _create_optimizer(self, optimizer_opts):
|
|
434
|
+
def _create_optimizer(self, optimizer_opts) -> Tuple[torch.optim.Optimizer, Optional[List[Dict[str, Any]]]]:
|
|
425
435
|
"""
|
|
426
436
|
Returns a valid optimizer considering the dictionary given by the user in the config.
|
|
427
437
|
|
|
428
438
|
:param dict[str]|str optimizer_opts: Optimizer configuration specified by the user.
|
|
429
439
|
If it's a dict, it must contain "class" with the optimizer name or callable.
|
|
430
440
|
If it's a str, it must be the optimizer name.
|
|
431
|
-
:return:
|
|
432
|
-
:rtype: torch.optim.Optimizer
|
|
441
|
+
:return: tuple (optimizer, optional optimizer_param_groups_extra_opts).
|
|
433
442
|
"""
|
|
434
443
|
lr = self.learning_rate
|
|
435
444
|
|
|
436
445
|
# If the parameter is already a valid optimizer, return it without further processing
|
|
437
446
|
if isinstance(optimizer_opts, torch.optim.Optimizer):
|
|
438
|
-
return optimizer_opts
|
|
447
|
+
return optimizer_opts, None
|
|
439
448
|
elif callable(optimizer_opts):
|
|
440
449
|
optimizer_opts: Dict[str, Any] = {"class": optimizer_opts}
|
|
441
450
|
else:
|
|
@@ -461,12 +470,23 @@ class Updater:
|
|
|
461
470
|
lr = lr * opt_kwargs.pop("learning_rate_multiplier", 1.0)
|
|
462
471
|
opt_kwargs["lr"] = lr
|
|
463
472
|
|
|
464
|
-
|
|
465
|
-
|
|
473
|
+
param_groups = self._get_optimizer_param_groups(optim_class, opt_kwargs)
|
|
474
|
+
param_groups = list(param_groups)
|
|
475
|
+
assert len(param_groups) > 0, "got an empty parameter list?"
|
|
476
|
+
if not isinstance(param_groups[0], dict):
|
|
477
|
+
param_groups = [{"params": param_groups}]
|
|
478
|
+
optimizer_param_groups_extra_opts: Optional[List[Dict[str, Any]]] = None
|
|
479
|
+
if any(any(key in group for key in self._OptimizerParamGroupsExtraOpts) for group in param_groups):
|
|
480
|
+
param_groups = [dict(group) for group in param_groups] # copy to make sure we can modify it
|
|
481
|
+
optimizer_param_groups_extra_opts = [
|
|
482
|
+
{key: group.pop(key) for key in self._OptimizerParamGroupsExtraOpts if key in group}
|
|
483
|
+
for group in param_groups
|
|
484
|
+
]
|
|
485
|
+
optimizer = optim_class(param_groups, **opt_kwargs)
|
|
466
486
|
print("Optimizer: %s" % optimizer, file=log.v1)
|
|
467
487
|
assert isinstance(optimizer, torch.optim.Optimizer)
|
|
468
488
|
|
|
469
|
-
return optimizer
|
|
489
|
+
return optimizer, optimizer_param_groups_extra_opts
|
|
470
490
|
|
|
471
491
|
def _create_default_optimizer(self):
|
|
472
492
|
"""
|
|
@@ -514,7 +534,11 @@ class Updater:
|
|
|
514
534
|
assert callable(custom_param_groups), f"invalid param_groups_custom {custom_param_groups!r}"
|
|
515
535
|
rf_model = wrapped_pt_module_to_rf_module(self.network)
|
|
516
536
|
custom_param_groups_ = custom_param_groups(
|
|
517
|
-
model=self.network,
|
|
537
|
+
model=self.network,
|
|
538
|
+
rf_model=rf_model,
|
|
539
|
+
optimizer_class=optim_class,
|
|
540
|
+
optimizer_opts=optimizer_opts,
|
|
541
|
+
**get_fwd_compat_kwargs(),
|
|
518
542
|
)
|
|
519
543
|
assert isinstance(custom_param_groups_, Iterable) and all(
|
|
520
544
|
isinstance(group, dict) for group in custom_param_groups_
|
|
@@ -547,11 +571,9 @@ class Updater:
|
|
|
547
571
|
# Parameters without weight decay: biases + LayerNorm/Embedding layers.
|
|
548
572
|
wd_params = set()
|
|
549
573
|
no_wd_params = set()
|
|
550
|
-
blacklist_wd_modules
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
else:
|
|
554
|
-
blacklist_wd_modules = _wrap_user_blacklist_wd_modules(blacklist_wd_modules)
|
|
574
|
+
blacklist_wd_modules = wrap_user_blacklist_wd_modules(
|
|
575
|
+
optimizer_opts.pop("weight_decay_modules_blacklist", None)
|
|
576
|
+
)
|
|
555
577
|
custom_include_check = optimizer_opts.pop("weight_decay_custom_include_check", None)
|
|
556
578
|
if custom_include_check:
|
|
557
579
|
assert callable(custom_include_check), f"invalid weight_decay_custom_include_check {custom_include_check!r}"
|
|
@@ -598,9 +620,16 @@ class Updater:
|
|
|
598
620
|
return optim_groups
|
|
599
621
|
|
|
600
622
|
|
|
601
|
-
def
|
|
602
|
-
mods: Sequence[Union[str, Type[rf.Module], Type[torch.nn.Module]]],
|
|
623
|
+
def wrap_user_blacklist_wd_modules(
|
|
624
|
+
mods: Optional[Sequence[Union[str, Type[rf.Module], Type[torch.nn.Module]]]],
|
|
603
625
|
) -> Tuple[type, ...]:
|
|
626
|
+
"""
|
|
627
|
+
Wraps the user-provided blacklist_weight_decay_modules into a tuple of types.
|
|
628
|
+
This supports both pure PyTorch modules (e.g. "torch.nn.LayerNorm")
|
|
629
|
+
and RF modules (e.g. "rf.LayerNorm"), which can be specified as strings or types.
|
|
630
|
+
"""
|
|
631
|
+
if mods is None:
|
|
632
|
+
return torch.nn.LayerNorm, torch.nn.Embedding
|
|
604
633
|
assert isinstance(mods, (list, tuple)), f"invalid blacklist_weight_decay_modules {mods!r}"
|
|
605
634
|
res = []
|
|
606
635
|
for mod in mods:
|
|
@@ -2459,8 +2459,12 @@ class DictRefKeys(Generic[K, V]):
|
|
|
2459
2459
|
Like `dict`, but hash and equality of the keys
|
|
2460
2460
|
"""
|
|
2461
2461
|
|
|
2462
|
-
def __init__(self):
|
|
2462
|
+
def __init__(self, items: Union[None, Iterable[Tuple[K, V]], Dict[K, V]] = None, /, **kwargs):
|
|
2463
2463
|
self._d = {} # type: Dict[RefIdEq[K], V]
|
|
2464
|
+
if items is not None:
|
|
2465
|
+
self.update(items)
|
|
2466
|
+
if kwargs:
|
|
2467
|
+
self.update(kwargs)
|
|
2464
2468
|
|
|
2465
2469
|
def __repr__(self):
|
|
2466
2470
|
return "DictRefKeys(%s)" % ", ".join(["%r: %r" % (k, v) for (k, v) in self.items()])
|
|
@@ -2489,6 +2493,15 @@ class DictRefKeys(Generic[K, V]):
|
|
|
2489
2493
|
def __contains__(self, item: K):
|
|
2490
2494
|
return RefIdEq(item) in self._d
|
|
2491
2495
|
|
|
2496
|
+
def update(self, other: Union[Dict[K, V], Iterable[Tuple[K, V]]], /):
|
|
2497
|
+
"""
|
|
2498
|
+
:param other: dict or iterable of (key, value) tuples
|
|
2499
|
+
"""
|
|
2500
|
+
if isinstance(other, dict):
|
|
2501
|
+
other = other.items()
|
|
2502
|
+
for k, v in other:
|
|
2503
|
+
self[k] = v
|
|
2504
|
+
|
|
2492
2505
|
|
|
2493
2506
|
def make_dll_name(basename):
|
|
2494
2507
|
"""
|
|
@@ -582,6 +582,128 @@ def test_load_optimizer_old_format():
|
|
|
582
582
|
updater.load_optimizer(tmp_dir + "/model.opt.new_format.pt")
|
|
583
583
|
|
|
584
584
|
|
|
585
|
+
def test_updater_weight_decay_blacklist():
|
|
586
|
+
from returnn.util.basic import DictRefKeys
|
|
587
|
+
|
|
588
|
+
# Don't specify weight_decay_modules_blacklist, so it should use the default,
|
|
589
|
+
# which should exclude Embedding and LayerNorm, and all biases.
|
|
590
|
+
# So this also tests that the default behavior does not change unexpectedly.
|
|
591
|
+
config = Config(dict(optimizer={"class": "adamw", "weight_decay": 1e-3}))
|
|
592
|
+
model = torch.nn.Sequential(
|
|
593
|
+
torch.nn.Embedding(10, 5),
|
|
594
|
+
torch.nn.LayerNorm(5),
|
|
595
|
+
torch.nn.Linear(5, 5),
|
|
596
|
+
torch.nn.ReLU(),
|
|
597
|
+
)
|
|
598
|
+
updater = Updater(config=config, network=model, device=torch.device("cpu"))
|
|
599
|
+
updater.create_optimizer()
|
|
600
|
+
updater.set_current_train_step(global_train_step=0, epoch=1)
|
|
601
|
+
|
|
602
|
+
opt = updater.get_optimizer()
|
|
603
|
+
assert isinstance(opt, torch.optim.AdamW)
|
|
604
|
+
assert len(opt.param_groups) == 2
|
|
605
|
+
groups_by_wd = {pg.get("weight_decay", 0.0): pg for pg in opt.param_groups}
|
|
606
|
+
assert set(groups_by_wd.keys()) == {0.0, 1e-3}
|
|
607
|
+
param_to_name = DictRefKeys((param, name) for name, param in model.named_parameters())
|
|
608
|
+
params_by_wd = {wd: set(map(param_to_name.__getitem__, group["params"])) for wd, group in groups_by_wd.items()}
|
|
609
|
+
print("params by wd:", params_by_wd)
|
|
610
|
+
assert params_by_wd[0.0] == {"0.weight", "1.weight", "1.bias", "2.bias"}
|
|
611
|
+
assert params_by_wd[1e-3] == {"2.weight"}
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def test_updater_lr_multipliers():
|
|
615
|
+
from collections import defaultdict
|
|
616
|
+
from fnmatch import fnmatchcase
|
|
617
|
+
from typing import Dict, List, Set, Any
|
|
618
|
+
from returnn.util.basic import DictRefKeys, FrozenDict
|
|
619
|
+
from returnn.torch.updater import wrap_user_blacklist_wd_modules
|
|
620
|
+
from returnn.torch.frontend.bridge import wrapped_pt_module_to_rf_module
|
|
621
|
+
|
|
622
|
+
# noinspection PyShadowingNames
|
|
623
|
+
def _param_groups_custom(*, model: torch.nn.Module, optimizer_opts: Dict[str, Any], **_kwargs):
|
|
624
|
+
default_weight_decay = optimizer_opts.get("weight_decay", 0.0)
|
|
625
|
+
|
|
626
|
+
blacklist_wd_modules = wrap_user_blacklist_wd_modules(
|
|
627
|
+
optimizer_opts.pop("weight_decay_modules_blacklist", None)
|
|
628
|
+
)
|
|
629
|
+
lr_multipliers_by_patterns = optimizer_opts.pop("learning_rate_multipliers_by_patterns")
|
|
630
|
+
|
|
631
|
+
# Tracker of visited parameters to only add each parameter once, in case two modules share common parameters.
|
|
632
|
+
# We need the wrapper class RefIdEq because Parameters are compared by value and not by reference.
|
|
633
|
+
params_by_opts: defaultdict[FrozenDict, List[torch.nn.Parameter]] = defaultdict(list)
|
|
634
|
+
visited_params = DictRefKeys()
|
|
635
|
+
for module_name, module in model.named_modules():
|
|
636
|
+
module_name: str
|
|
637
|
+
module: torch.nn.Module
|
|
638
|
+
rf_module = wrapped_pt_module_to_rf_module(module)
|
|
639
|
+
for param_name, param in module.named_parameters(recurse=False):
|
|
640
|
+
param_name: str
|
|
641
|
+
param: torch.nn.Parameter
|
|
642
|
+
if param in visited_params:
|
|
643
|
+
continue
|
|
644
|
+
visited_params[param] = True
|
|
645
|
+
full_param_name = "%s.%s" % (module_name, param_name) if module_name else param_name
|
|
646
|
+
|
|
647
|
+
opts = {}
|
|
648
|
+
if (
|
|
649
|
+
param_name.endswith("bias")
|
|
650
|
+
or isinstance(module, blacklist_wd_modules)
|
|
651
|
+
or isinstance(rf_module, blacklist_wd_modules)
|
|
652
|
+
):
|
|
653
|
+
opts["weight_decay"] = 0.0
|
|
654
|
+
else:
|
|
655
|
+
opts["weight_decay"] = default_weight_decay
|
|
656
|
+
for pattern, lr_multiplier in lr_multipliers_by_patterns.items():
|
|
657
|
+
if fnmatchcase(full_param_name, pattern):
|
|
658
|
+
if lr_multiplier != 1.0:
|
|
659
|
+
opts["learning_rate_multiplier"] = lr_multiplier
|
|
660
|
+
break
|
|
661
|
+
params_by_opts[FrozenDict(opts)].append(param)
|
|
662
|
+
|
|
663
|
+
return [{"params": params, **opts} for opts, params in params_by_opts.items()]
|
|
664
|
+
|
|
665
|
+
config = Config(
|
|
666
|
+
dict(
|
|
667
|
+
optimizer={
|
|
668
|
+
"class": "adamw",
|
|
669
|
+
"weight_decay": 1e-3,
|
|
670
|
+
"param_groups_custom": _param_groups_custom,
|
|
671
|
+
"learning_rate_multipliers_by_patterns": {"0.*": 1.0, "1.*": 0.5, "2.*": 0.1},
|
|
672
|
+
}
|
|
673
|
+
)
|
|
674
|
+
)
|
|
675
|
+
model = torch.nn.Sequential(
|
|
676
|
+
torch.nn.Embedding(10, 5),
|
|
677
|
+
torch.nn.LayerNorm(5),
|
|
678
|
+
torch.nn.Linear(5, 5),
|
|
679
|
+
torch.nn.ReLU(),
|
|
680
|
+
)
|
|
681
|
+
updater = Updater(config=config, network=model, device=torch.device("cpu"))
|
|
682
|
+
updater.create_optimizer()
|
|
683
|
+
updater.set_current_train_step(global_train_step=0, epoch=1)
|
|
684
|
+
|
|
685
|
+
param_to_name = DictRefKeys((param, name) for name, param in model.named_parameters())
|
|
686
|
+
opt = updater.get_optimizer()
|
|
687
|
+
param_names_by_opts: Dict[FrozenDict, Set[str]] = {}
|
|
688
|
+
for group in opt.param_groups:
|
|
689
|
+
group_opts = FrozenDict({k: group[k] for k in ["weight_decay", "lr"]})
|
|
690
|
+
assert group_opts not in param_names_by_opts # unique
|
|
691
|
+
param_names_by_opts[group_opts] = {param_to_name[p] for p in group["params"]}
|
|
692
|
+
assert len(param_names_by_opts) == 4, "Expected 4 param groups"
|
|
693
|
+
for opts, ref_param_names in [
|
|
694
|
+
({"weight_decay": 0.0, "lr": 1.0}, {"0.weight"}),
|
|
695
|
+
({"weight_decay": 0.0, "lr": 0.5}, {"1.weight", "1.bias"}),
|
|
696
|
+
({"weight_decay": 0.001, "lr": 0.1}, {"2.weight"}),
|
|
697
|
+
({"weight_decay": 0.0, "lr": 0.1}, {"2.bias"}),
|
|
698
|
+
]:
|
|
699
|
+
opts = FrozenDict(opts)
|
|
700
|
+
assert opts in param_names_by_opts, f"Expected param group with opts {opts} not found"
|
|
701
|
+
param_names = param_names_by_opts[opts]
|
|
702
|
+
assert param_names == ref_param_names, (
|
|
703
|
+
f"For opts {opts}, expected param names {ref_param_names} but got {param_names}"
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
|
|
585
707
|
def test_optimizer_convert_aux_param():
|
|
586
708
|
# See rf_module_to_pt_module aux_params_as_buffers option.
|
|
587
709
|
# This causes a change in the optimizer state dict.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-record-and-push-to-webserver.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-chunking-blstm.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-contribrnn-lstm.12ax.config
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-maxgradnorm-lstm.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-native-lstm-lowmem.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-native-lstm.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-native-lstm2.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-native-lstm2.12ax.tuned.config
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-neural-transducer.12ax.config
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-rec-explicit-lstm.config
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-rec-explicit-rnn.config
RENAMED
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-search-compiled-graph.py
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-tf-vanilla-lstm.12ax.config
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/demo-upd-mult-model.lstm.12ax.config
RENAMED
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/create_IAM_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/IAM/features/raw/demo.h5
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial/create_test_h5.py
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial/forwardconfig
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial_rgb/create_test_h5.py
RENAMED
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial_rgb/forwardconfig
RENAMED
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/demos/mdlstm/artificial_rgb/trainconfig
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/normalization_data.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/datasets/util/feature_extraction.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{returnn-1.20250820.123936 → returnn-1.20250821.93927}/returnn/extern/WarpRna/warp-rna/.gitignore
RENAMED
|
File without changes
|