returnn 1.20230814.164933__tar.gz → 1.20230815.191535__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {returnn-1.20230814.164933/returnn.egg-info → returnn-1.20230815.191535}/PKG-INFO +1 -1
- returnn-1.20230815.191535/_setup_info_generated.py +2 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/__init__.py +1 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/_backend.py +38 -0
- returnn-1.20230815.191535/returnn/frontend/gradient.py +74 -0
- returnn-1.20230815.191535/returnn/frontend/label_smoothing.py +114 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/signal.py +50 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/_backend.py +32 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/basic.py +39 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/frontend/_backend.py +39 -0
- returnn-1.20230815.191535/returnn/torch/functional/scaled_gradient.py +79 -0
- returnn-1.20230815.191535/returnn/util/math.py +11 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535/returnn.egg-info}/PKG-INFO +1 -1
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn.egg-info/SOURCES.txt +5 -0
- returnn-1.20230815.191535/tests/test_rf_gradient.py +33 -0
- returnn-1.20230815.191535/tests/test_rf_label_smoothing.py +39 -0
- returnn-1.20230814.164933/_setup_info_generated.py +0 -2
- returnn-1.20230814.164933/returnn/frontend/gradient.py +0 -15
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/.editorconfig +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/.gitignore +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/.gitmodules +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/.kateconfig +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/CHANGELOG.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/CODEOWNERS +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/CONTRIBUTING.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/LICENSE +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/MANIFEST.in +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/README.rst +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/12AX.cluster_map +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/_setup_returnn_env.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-fwd.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-horovod-mpi.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-horovod-mpi.py.sh +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-horovod-mpi.sh +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-hyper-param-tuning.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-iter-dataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-list-devices.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-lua-torch-layer.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-pretrain.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-record-and-push-to-webserver.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-returnn-as-framework.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-rf.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-rhn-enwik8.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-sprint-interface.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-att-copy.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-attention.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-chunking-blstm.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-contribrnn-lstm.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-enc-dec.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-hard-att-copy.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-lstm-benchmark.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-maxgradnorm-lstm.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-native-lstm-lowmem.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-native-lstm.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-native-lstm2.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-native-lstm2.12ax.tuned.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-neural-transducer.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-rec-explicit-lstm.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-rec-explicit-rnn.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-rec-self-att.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-search-compiled-graph.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-tf-vanilla-lstm.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-timit-lstm-ctc.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-torch.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo-upd-mult-model.lstm.12ax.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/demo.sh +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/IAM_lines/a01-000u-00.png +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/IAM_lines/a01-007-04.png +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/IAM_lines/a01-007-06.png +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/README.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/chars.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/config_demo +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/config_fwd +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/config_real +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/create_IAM_dataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/decode.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/features/raw/demo.h5 +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/go.sh +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/lines.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/split/eval.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/split/train.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/IAM/split/valid.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial/create_test_h5.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial/forwardconfig +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial/go.sh +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial/trainconfig +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial_rgb/create_test_h5.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial_rgb/forwardconfig +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial_rgb/go.sh +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/demos/mdlstm/artificial_rgb/trainconfig +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/pyproject.toml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/requirements.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/__main__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/__old_mod_loader__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/__setup__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/config.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/audio.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/basic.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/bundle_file.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/cached.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/cached2.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/generating.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/hdf.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/lm.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/map.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/meta.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/multi_proc.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/normalization_data.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/numpy_dump.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/raw_wav.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/sprint.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/stereo.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/util/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/util/feature_extraction.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/util/strings.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/datasets/util/vocabulary.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/engine/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/engine/base.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/engine/batch.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/__main__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/.git +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/.gitignore +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/LICENSE +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/aligner.gif +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/check.png +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/core.cu +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/core.h +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/core_cpu.cpp +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/LICENSE +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/MANIFEST.in +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/binding.cpp +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.cu +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/core.h +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/requirements.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/setup.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/pytorch_binding/warp_rna/test.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/ref_rna.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/setup.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op.cc +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/src/warp_rna_op_kernel_tmpl.h +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/tensorflow_binding/warp_rna/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/WarpRna/warp-rna/test.cpp +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/edit.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/reroute.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/select.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/subgraph.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/transform.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/extern/graph_editor/util.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/forward_iface.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/_numpy_backend.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/_utils.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/array_.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/attention.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/cond.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/const.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/container.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/control_flow_ctx.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/conv.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/device.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/dims.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/dropout.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/dtype.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/encoder/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/encoder/base.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/encoder/conformer.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/init.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/linear.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/loop.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/loss.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/math_.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/matmul.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/module.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/normalization.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/parameter.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/rand.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/rec.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/reduce.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/run_ctx.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/state.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/tensor_array.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/frontend/types.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/import_/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/import_/common.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/import_/git.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/import_/import_.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/learning_rate_control.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/log.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/native_op.cpp +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/native_op.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/pretrain.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/cache.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/control.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/error_signals.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/extern_interface.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/sprint/interface.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/_dim_extra.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/_tensor_extra.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/_tensor_mixin_base.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/_tensor_op_overloads.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/control_flow_ctx.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/dim.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/marked_dim.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/tensor.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/tensor_dict.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tensor/utils.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/compat.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/data_pipeline.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/distributed.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/engine.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/_utils.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/cond.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/config_entry_points.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/debug_eager_mode.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/dims.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/layer.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/make_layer.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/parameter_assign.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/prev_tensor_ref.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_low_level/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_low_level/_backend.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/horovod.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/hyper_param_tuning.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/base.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/rec.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/segmental_model.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/signal_processing.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/layers/variable.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/native_op.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/network.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/sprint.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/updater.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/basic.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/data.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/gradient_checkpoint.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/ken_lm.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/util/open_fst.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/data/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/data/pipeline.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/data/returnn_dataset_wrapper.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/data/tensor_utils.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/distributed.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/engine.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/frontend/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/frontend/_rand.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/frontend/bridge.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/functional/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/functional/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/torch/updater.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/__init__.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/basic.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/better_exchook.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/bpe.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/debug.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/debug_helpers.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/fsa.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/literal_py_to_pickle.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/pprint.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/py-to-pickle.cpp +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/py_compat.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/sig_proc.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/util/task_system.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn.egg-info/dependency_links.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn.egg-info/top_level.txt +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/rnn.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/setup.cfg +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/setup.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/DummySprintExec.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm-inspection-profile.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/.gitignore +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/.name +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/codeStyleSettings.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/codeStyles/Project.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/codeStyles/codeStyleConfig.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/inspectionProfiles/Project_Default.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/inspectionProfiles/profiles_settings.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/misc.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/modules.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/returnn.iml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/PyCharm.idea/scopes/scope_settings.xml +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/_set_num_threads1.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/_setup_returnn_env.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/_setup_test_env.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/bpe-unicode-demo.codes +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/bpe-unicode-demo.vocab +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lexicon_opt.fst +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lexicon_opt.isyms +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lexicon_opt.jpg +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lexicon_opt.osyms +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/lint_common.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/pycharm-inspect.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/pylint.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/returnn-as-framework.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/rf_utils.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/spelling.dic +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Config.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Dataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Fsa.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_GeneratingDataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_HDFDataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_LearningRateControl.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Log.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_MultiProcDataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_PTDataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Pretrain.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_ResNet.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_SprintDataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_SprintInterface.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFEngine.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFNativeOp.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFNetworkLayer.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFNetworkRecLayer.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFNetworkSigProcLayer.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFUpdater.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TFUtil.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TF_determinism.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TaskSystem.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TaskSystem_SharedMem.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_TranslationDataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_Util.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_demos.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_fork_exec.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_hdf_dump.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_array.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_attention.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_base.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_cond.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_const.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_container.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_conv.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_encoder_conformer.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_loop.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_math.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_normalization.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_rec.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_reduce.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_rf_signal.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_tensor.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_tools.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_torch_engine.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_torch_frontend.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tests/test_torch_internal_frontend.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/_setup_returnn_env.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/analyze-dataset-batches.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bliss-collect-seq-lens.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bliss-dump-text.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bliss-get-segment-names.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bliss-to-ogg-zip.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/bpe-create-lexicon.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/calculate-word-error-rate.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/cleanup-old-models.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/collect-orth-symbols.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/collect-words.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/compile_native_op.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/compile_tf_graph.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/debug-dump-search-scores.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/debug-plot-search-scores.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-dataset-raw-strings.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-dataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-forward-stats.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-forward.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-network-json.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/dump-pickle.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/extract_state_tying_from_dataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/get-attention-weights.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/get-best-model-epoch.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/hdf_dump.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/hdf_dump_translation_dataset.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/import-blocks-mt-model.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/import-t2t-mt-model.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/.gitignore +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/Makefile +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/README.md +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/libs_list +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/network.040/i600_m600_m600.sgd_b16_lr0_cl2.newbobabs.keep_over_epoch.lstm2.config +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/rescore_lattice.sh +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/state_vars_list +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/example/tensor_names_list +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/file.h +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/htklatticerescorer.cc +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/htklatticerescorer.h +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/main.cc +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/rescorer.h +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/vocabulary.cc +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/lattice_rescorer/vocabulary.h +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/tf_avg_checkpoints.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/tf_inspect_checkpoint.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/tf_inspect_summary_log.py +0 -0
- {returnn-1.20230814.164933 → returnn-1.20230815.191535}/tools/torch_export_to_onnx.py +0 -0
|
@@ -288,6 +288,22 @@ class Backend(Generic[T]):
|
|
|
288
288
|
res.raw_tensor = tensor._raw_backend.cast_raw(tensor.raw_tensor, dtype)
|
|
289
289
|
return res
|
|
290
290
|
|
|
291
|
+
@staticmethod
|
|
292
|
+
def set_requires_gradient(tensor: Tensor):
|
|
293
|
+
"""
|
|
294
|
+
:param tensor:
|
|
295
|
+
"""
|
|
296
|
+
raise NotImplementedError
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def gradient(y: Tensor, x: Tensor) -> Tensor:
|
|
300
|
+
"""
|
|
301
|
+
:param y:
|
|
302
|
+
:param x:
|
|
303
|
+
:return: gradient of y w.r.t. x
|
|
304
|
+
"""
|
|
305
|
+
raise NotImplementedError
|
|
306
|
+
|
|
291
307
|
@staticmethod
|
|
292
308
|
def stop_gradient(tensor: Tensor) -> Tensor:
|
|
293
309
|
"""
|
|
@@ -296,6 +312,28 @@ class Backend(Generic[T]):
|
|
|
296
312
|
"""
|
|
297
313
|
raise NotImplementedError
|
|
298
314
|
|
|
315
|
+
@staticmethod
|
|
316
|
+
def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
|
|
317
|
+
"""
|
|
318
|
+
:param tensor:
|
|
319
|
+
:param scale:
|
|
320
|
+
:return: tensor with scaled gradient
|
|
321
|
+
"""
|
|
322
|
+
raise NotImplementedError
|
|
323
|
+
|
|
324
|
+
@staticmethod
|
|
325
|
+
def scaled_gradient_ext(
|
|
326
|
+
x: Tensor, *, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[Dim] = None
|
|
327
|
+
):
|
|
328
|
+
"""
|
|
329
|
+
:param x:
|
|
330
|
+
:param scale: will scale gradient by this value
|
|
331
|
+
:param shift: will shift gradient by this value
|
|
332
|
+
:param scale_shift_by_sum_over_axis: if given, will scale and shift by the sum over the given axis
|
|
333
|
+
:return: just x, but gradient in backward pass will be transformed accordingly
|
|
334
|
+
"""
|
|
335
|
+
raise NotImplementedError
|
|
336
|
+
|
|
299
337
|
@staticmethod
|
|
300
338
|
def merge_dims(
|
|
301
339
|
source: Tensor,
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities which affect the gradient
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from typing import Optional, Union
|
|
7
|
+
from returnn.tensor import Tensor, Dim
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
__all__ = ["set_requires_gradient", "gradient", "stop_gradient", "scaled_gradient", "scaled_gradient_ext"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def set_requires_gradient(source: Tensor):
|
|
14
|
+
"""
|
|
15
|
+
:param source:
|
|
16
|
+
:return: nothing, modifies source in-place
|
|
17
|
+
"""
|
|
18
|
+
# noinspection PyProtectedMember
|
|
19
|
+
return source._raw_backend.set_requires_gradient(source)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def gradient(y: Tensor, x: Tensor) -> Tensor:
|
|
23
|
+
"""
|
|
24
|
+
:param y: some scalar
|
|
25
|
+
:param x: some tensor
|
|
26
|
+
:return: gradient of y w.r.t. x
|
|
27
|
+
"""
|
|
28
|
+
# noinspection PyProtectedMember
|
|
29
|
+
return y._raw_backend.gradient(y, x)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def stop_gradient(source: Tensor) -> Tensor:
|
|
33
|
+
"""wraps tf.stop_gradient or torch detach"""
|
|
34
|
+
# noinspection PyProtectedMember
|
|
35
|
+
return source._raw_backend.stop_gradient(source)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def scaled_gradient(source: Tensor, scale: Union[float, Tensor]) -> Tensor:
|
|
39
|
+
"""
|
|
40
|
+
:param source:
|
|
41
|
+
:param scale: if constant 0., will use :func:`stop_gradient`.
|
|
42
|
+
Can be used as gradient reversal layer (with negative factor).
|
|
43
|
+
:return: source with scaled gradient
|
|
44
|
+
"""
|
|
45
|
+
if not isinstance(scale, Tensor) and scale == 0.0:
|
|
46
|
+
return stop_gradient(source)
|
|
47
|
+
# noinspection PyProtectedMember
|
|
48
|
+
return source._raw_backend.scaled_gradient(source, scale)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def scaled_gradient_ext(
|
|
52
|
+
source: Tensor,
|
|
53
|
+
*,
|
|
54
|
+
scale: Union[float, Tensor],
|
|
55
|
+
shift: Optional[Union[float, Tensor]] = None,
|
|
56
|
+
scale_shift_by_sum_over_axis: Optional[Dim] = None,
|
|
57
|
+
) -> Tensor:
|
|
58
|
+
"""
|
|
59
|
+
Just `identity` in the forward pass.
|
|
60
|
+
Scales the gradient by some factor in backprop.
|
|
61
|
+
Can be used as gradient reversal layer (with negative factor).
|
|
62
|
+
For TF, uses :func:`returnn.tf.util.basic.scaled_gradient`, or :func:`tf.stop_gradient`
|
|
63
|
+
|
|
64
|
+
:param source:
|
|
65
|
+
:param scale: if constant 0. and no shift, will use :func:`stop_gradient`
|
|
66
|
+
:param shift:
|
|
67
|
+
:param scale_shift_by_sum_over_axis: if given, calculates the sum over this axis (absolute values)
|
|
68
|
+
and multiplies the shift value by this sum.
|
|
69
|
+
:return: source with transformed gradient
|
|
70
|
+
"""
|
|
71
|
+
# noinspection PyProtectedMember
|
|
72
|
+
return source._raw_backend.scaled_gradient_ext(
|
|
73
|
+
source, scale=scale, shift=shift, scale_shift_by_sum_over_axis=scale_shift_by_sum_over_axis
|
|
74
|
+
)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Label smoothing
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
from typing import Optional, Union, Sequence
|
|
7
|
+
from returnn.tensor import Tensor, Dim
|
|
8
|
+
import returnn.frontend as rf
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
__all__ = ["label_smoothing", "smooth_one_hot", "label_smoothed_log_prob_gradient"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def label_smoothing(prob: Tensor, smoothing: Union[Tensor, float], *, axis: Optional[Dim] = None) -> Tensor:
|
|
15
|
+
"""
|
|
16
|
+
Label smoothing, often used for cross entropy.
|
|
17
|
+
|
|
18
|
+
In case of sparse data, it will become dense (via :func:`smooth_one_hot`)
|
|
19
|
+
and the target label will get probability (1 - smoothing).
|
|
20
|
+
"""
|
|
21
|
+
if not axis:
|
|
22
|
+
assert prob.feature_dim or prob.sparse_dim
|
|
23
|
+
axis = prob.feature_dim or prob.sparse_dim
|
|
24
|
+
if prob.sparse_dim:
|
|
25
|
+
assert prob.sparse_dim == axis
|
|
26
|
+
return rf.smooth_one_hot(prob, label_prob=1.0 - smoothing)
|
|
27
|
+
else:
|
|
28
|
+
assert axis in prob.dims_set
|
|
29
|
+
# Make it consistent to the sparse case.
|
|
30
|
+
# Value of 1.0 should result in (1 - smoothing).
|
|
31
|
+
# Value of 0.0 should result in smoothing / (dim - 1).
|
|
32
|
+
# Sum over all should still remain 1.0.
|
|
33
|
+
dim = axis.dimension
|
|
34
|
+
floor_prob = smoothing / (dim - 1)
|
|
35
|
+
factor = 1.0 - dim * floor_prob
|
|
36
|
+
# Case for prob[i] == 0 is clear.
|
|
37
|
+
# Case for prob[i] == 1: 1 - dim * floor_prob + floor_prob = 1 + (1 - dim) * floor_prob = 1 - smoothing
|
|
38
|
+
# Sum over all: 1 - dim * floor_prob + floor_prob * dim = 1
|
|
39
|
+
return prob * factor + floor_prob
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def smooth_one_hot(source: Tensor, *, label_prob: Union[Tensor, float]) -> Tensor:
|
|
43
|
+
"""
|
|
44
|
+
Smooth variant of :func:`one_hot`.
|
|
45
|
+
Uses ``label_prob`` for the labels and ``(1 - label_prob) / (dim - 1)`` for the remaining values.
|
|
46
|
+
This is used for label smoothing.
|
|
47
|
+
"""
|
|
48
|
+
assert source.sparse_dim
|
|
49
|
+
if source.sparse_dim.dimension is None:
|
|
50
|
+
raise NotImplementedError(f"smooth_one_hot({source}) not implemented for dynamic dims")
|
|
51
|
+
return rf.sparse_to_dense(
|
|
52
|
+
source, label_value=label_prob, other_value=(1.0 - label_prob) / (source.sparse_dim.dimension - 1)
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def label_smoothed_log_prob_gradient(
|
|
57
|
+
log_prob: Tensor,
|
|
58
|
+
smoothing: Union[Tensor, float],
|
|
59
|
+
*,
|
|
60
|
+
axis: Optional[Dim] = None,
|
|
61
|
+
exclude_labels: Optional[Sequence[int]] = None,
|
|
62
|
+
) -> Tensor:
|
|
63
|
+
"""
|
|
64
|
+
:param log_prob: shape [...,D] (not necessarily the same as loss)
|
|
65
|
+
:param smoothing: smoothing factor, for :func:`label_smoothing`
|
|
66
|
+
:param axis: label axis. uses feature_dim by default
|
|
67
|
+
:param exclude_labels: list of labels to exclude from smoothing (e.g. blank)
|
|
68
|
+
|
|
69
|
+
Assume some cross-entropy-like loss:
|
|
70
|
+
|
|
71
|
+
loss = - sum_i target_prob[i] * log_prob[i] .
|
|
72
|
+
|
|
73
|
+
The sum is over the label indices i (corresponding to the ``axis`` argument).
|
|
74
|
+
Then the gradient of loss w.r.t. log_prob[i] is:
|
|
75
|
+
|
|
76
|
+
grad_logprob[i] loss = -target_prob[i] .
|
|
77
|
+
|
|
78
|
+
We assume that the negative gradient is a probability distribution, and apply :func:`label_smoothing` on it.
|
|
79
|
+
More specifically, we apply the same scale and shift as in the :func:`label_smoothing` function
|
|
80
|
+
via :func:`scaled_gradient`.
|
|
81
|
+
|
|
82
|
+
Just as a side remark: assume
|
|
83
|
+
|
|
84
|
+
log_prob = log_softmax(z) .
|
|
85
|
+
|
|
86
|
+
The gradient of log_softmax is:
|
|
87
|
+
|
|
88
|
+
grad_z[j] log_prob[i] = delta(i==j) - softmax(z)[j] .
|
|
89
|
+
|
|
90
|
+
Then the gradient w.r.t. z[j] is:
|
|
91
|
+
|
|
92
|
+
grad_z[j] loss = sum_i (grad_logprob[i] loss) (grad_z[j] logprob[i])
|
|
93
|
+
= sum_i -target_prob[i] delta(i==j) + target_prob[i] softmax(z)[j]
|
|
94
|
+
= -target_prob[j] + (sum_i target_prob[i]) softmax(z)[j]
|
|
95
|
+
= softmax(z)[j] - target_prob[j] # assuming (sum_i target_prob[i]) == 1
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
if not axis:
|
|
99
|
+
assert log_prob.feature_dim
|
|
100
|
+
axis = log_prob.feature_dim
|
|
101
|
+
# See formula above for label_smoothing.
|
|
102
|
+
dim = axis.dimension
|
|
103
|
+
floor_prob = smoothing / (dim - 1)
|
|
104
|
+
factor = 1.0 - dim * floor_prob
|
|
105
|
+
if exclude_labels:
|
|
106
|
+
indices = rf.range_over_dim(axis)
|
|
107
|
+
mask = True
|
|
108
|
+
for label in exclude_labels:
|
|
109
|
+
mask = mask & (indices != label)
|
|
110
|
+
factor = rf.where(mask, factor, 1.0)
|
|
111
|
+
floor_prob = rf.where(mask, floor_prob, 0.0)
|
|
112
|
+
# The gradient is expected to be the negative target prob, thus negative floor_prob.
|
|
113
|
+
# The gradient is expected to be 0. for masked frames, thus the clipping logic.
|
|
114
|
+
return rf.scaled_gradient_ext(log_prob, scale=factor, shift=-floor_prob, scale_shift_by_sum_over_axis=axis)
|
|
@@ -5,12 +5,17 @@ stft etc
|
|
|
5
5
|
|
|
6
6
|
from __future__ import annotations
|
|
7
7
|
from typing import Optional, Union, Tuple
|
|
8
|
+
import math
|
|
8
9
|
import numpy
|
|
9
10
|
import functools
|
|
11
|
+
from returnn.util import math as util_math
|
|
10
12
|
from returnn.tensor import Tensor, Dim
|
|
11
13
|
import returnn.frontend as rf
|
|
12
14
|
|
|
13
15
|
|
|
16
|
+
__all__ = ["stft", "mel_filterbank", "log_mel_filterbank_from_raw"]
|
|
17
|
+
|
|
18
|
+
|
|
14
19
|
def stft(
|
|
15
20
|
x: Tensor,
|
|
16
21
|
*,
|
|
@@ -230,3 +235,48 @@ def _mel_filter_bank_matrix_np(
|
|
|
230
235
|
f_mat[i1, i2 - 1] = el_val
|
|
231
236
|
|
|
232
237
|
return f_mat
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def log_mel_filterbank_from_raw(
|
|
241
|
+
raw_audio: Tensor,
|
|
242
|
+
*,
|
|
243
|
+
in_spatial_dim: Dim,
|
|
244
|
+
out_dim: Dim,
|
|
245
|
+
sampling_rate: int = 16_000,
|
|
246
|
+
window_len: float = 0.025,
|
|
247
|
+
step_len: float = 0.010,
|
|
248
|
+
n_fft: Optional[int] = None,
|
|
249
|
+
log_base: Union[int, float] = 10,
|
|
250
|
+
) -> Tuple[Tensor, Dim]:
|
|
251
|
+
"""
|
|
252
|
+
log mel filterbank features
|
|
253
|
+
|
|
254
|
+
:param raw_audio: (..., in_spatial_dim, ...). if it has a feature_dim with dimension 1, it is squeezed away.
|
|
255
|
+
:param in_spatial_dim:
|
|
256
|
+
:param out_dim: nr of mel filters.
|
|
257
|
+
:param sampling_rate: samples per second
|
|
258
|
+
:param window_len: in seconds
|
|
259
|
+
:param step_len: in seconds
|
|
260
|
+
:param n_fft: fft_size, n_fft. Should match fft_length from :func:`stft`.
|
|
261
|
+
If not provided, next power-of-two from window_num_frames.
|
|
262
|
+
:param log_base: e.g. 10 or math.e
|
|
263
|
+
"""
|
|
264
|
+
if raw_audio.feature_dim and raw_audio.feature_dim.dimension == 1:
|
|
265
|
+
raw_audio = rf.squeeze(raw_audio, axis=raw_audio.feature_dim)
|
|
266
|
+
window_num_frames = int(window_len * sampling_rate)
|
|
267
|
+
step_num_frames = int(step_len * sampling_rate)
|
|
268
|
+
if not n_fft:
|
|
269
|
+
n_fft = util_math.next_power_of_two(window_num_frames)
|
|
270
|
+
spectrogram, out_spatial_dim, in_dim_ = rf.stft(
|
|
271
|
+
raw_audio,
|
|
272
|
+
in_spatial_dim=in_spatial_dim,
|
|
273
|
+
frame_step=step_num_frames,
|
|
274
|
+
frame_length=window_num_frames,
|
|
275
|
+
fft_length=n_fft,
|
|
276
|
+
)
|
|
277
|
+
power_spectrogram = rf.abs(spectrogram) ** 2.0
|
|
278
|
+
mel_fbank = rf.mel_filterbank(power_spectrogram, in_dim=in_dim_, out_dim=out_dim, sampling_rate=sampling_rate)
|
|
279
|
+
log_mel_fbank = rf.safe_log(mel_fbank, eps=1e-10)
|
|
280
|
+
if log_base != math.e:
|
|
281
|
+
log_mel_fbank = log_mel_fbank * (1.0 / math.log(log_base))
|
|
282
|
+
return log_mel_fbank, out_spatial_dim
|
{returnn-1.20230814.164933 → returnn-1.20230815.191535}/returnn/tf/frontend_layers/_backend.py
RENAMED
|
@@ -141,11 +141,43 @@ class ReturnnLayersBackend(Backend[Layer]):
|
|
|
141
141
|
"""cast"""
|
|
142
142
|
return rfl.make_layer({"class": "cast", "from": tensor, "dtype": dtype}, name="cast")
|
|
143
143
|
|
|
144
|
+
@staticmethod
|
|
145
|
+
def set_requires_gradient(tensor: Tensor):
|
|
146
|
+
"""
|
|
147
|
+
set requires gradient; not needed for TensorFlow, will always calculate whatever is needed
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def gradient(y: Tensor, x: Tensor) -> Tensor:
|
|
152
|
+
"""gradient"""
|
|
153
|
+
return rfl.make_layer({"class": "gradient", "y": y, "x": x}, name="gradient")
|
|
154
|
+
|
|
144
155
|
@staticmethod
|
|
145
156
|
def stop_gradient(tensor: Tensor) -> Tensor:
|
|
146
157
|
"""stop grad"""
|
|
147
158
|
return rfl.make_layer({"class": "scaled_grad", "from": tensor, "scale": 0}, name="stop_gradient")
|
|
148
159
|
|
|
160
|
+
@staticmethod
|
|
161
|
+
def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
|
|
162
|
+
"""scaled gradient"""
|
|
163
|
+
return rfl.make_layer({"class": "scaled_grad", "from": tensor, "scale": scale}, name="scaled_gradient")
|
|
164
|
+
|
|
165
|
+
@staticmethod
|
|
166
|
+
def scaled_gradient_ext(
|
|
167
|
+
x: Tensor, *, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[Dim] = None
|
|
168
|
+
):
|
|
169
|
+
"""scaled gradient ext"""
|
|
170
|
+
return rfl.make_layer(
|
|
171
|
+
{
|
|
172
|
+
"class": "scaled_grad",
|
|
173
|
+
"from": x,
|
|
174
|
+
"scale": scale,
|
|
175
|
+
"shift": shift,
|
|
176
|
+
"scale_shift_by_sum_over_axis": scale_shift_by_sum_over_axis,
|
|
177
|
+
},
|
|
178
|
+
name="scaled_gradient_ext",
|
|
179
|
+
)
|
|
180
|
+
|
|
149
181
|
@staticmethod
|
|
150
182
|
def merge_dims(
|
|
151
183
|
source: Tensor,
|
|
@@ -11153,6 +11153,45 @@ class FastBaumWelchLayer(_ConcatInputLayer):
|
|
|
11153
11153
|
return get_concat_sources_data_template(sources, name="%s_output" % name).copy_as_time_major()
|
|
11154
11154
|
|
|
11155
11155
|
|
|
11156
|
+
class GradientLayer(_ConcatInputLayer):
|
|
11157
|
+
"""
|
|
11158
|
+
Calculates the gradient of y w.r.t. x.
|
|
11159
|
+
"""
|
|
11160
|
+
|
|
11161
|
+
layer_class = "gradient"
|
|
11162
|
+
|
|
11163
|
+
def __init__(self, y: LayerBase, x: LayerBase, **kwargs):
|
|
11164
|
+
"""
|
|
11165
|
+
:param y:
|
|
11166
|
+
:param x:
|
|
11167
|
+
"""
|
|
11168
|
+
super(GradientLayer, self).__init__(**kwargs)
|
|
11169
|
+
self.output.placeholder = tf.gradients(ys=y.output.placeholder, xs=x.output.placeholder)[0]
|
|
11170
|
+
|
|
11171
|
+
@classmethod
|
|
11172
|
+
def transform_config_dict(cls, d, network, get_layer):
|
|
11173
|
+
"""
|
|
11174
|
+
:param dict[str] d:
|
|
11175
|
+
:param returnn.tf.network.TFNetwork network:
|
|
11176
|
+
:param get_layer:
|
|
11177
|
+
"""
|
|
11178
|
+
d.setdefault("from", [])
|
|
11179
|
+
super(GradientLayer, cls).transform_config_dict(d, network=network, get_layer=get_layer)
|
|
11180
|
+
d["y"] = get_layer(d["y"])
|
|
11181
|
+
d["x"] = get_layer(d["x"])
|
|
11182
|
+
|
|
11183
|
+
@classmethod
|
|
11184
|
+
def get_out_data_from_opts(cls, y: LayerBase, x: LayerBase, name: str, **kwargs):
|
|
11185
|
+
"""
|
|
11186
|
+
:param LayerBase y:
|
|
11187
|
+
:param LayerBase x:
|
|
11188
|
+
:param str name:
|
|
11189
|
+
:rtype: Data
|
|
11190
|
+
"""
|
|
11191
|
+
assert y.output.batch_ndim == 0, f"GradientLayer {name!r}: y should be a scalar, got {y}"
|
|
11192
|
+
return x.output.copy_template(name="%s_output" % name)
|
|
11193
|
+
|
|
11194
|
+
|
|
11156
11195
|
class SyntheticGradientLayer(_ConcatInputLayer):
|
|
11157
11196
|
"""
|
|
11158
11197
|
This is a generalized way to be able to replace the true gradient with any kind of predicted gradient.
|
|
@@ -181,6 +181,18 @@ class TorchBackend(Backend[torch.Tensor]):
|
|
|
181
181
|
"""cast"""
|
|
182
182
|
return raw_tensor.to(dtype=TorchBackend.as_dtype_raw(dtype))
|
|
183
183
|
|
|
184
|
+
@staticmethod
|
|
185
|
+
def set_requires_gradient(tensor: Tensor[torch.Tensor]):
|
|
186
|
+
"""set requires grad"""
|
|
187
|
+
tensor.raw_tensor.requires_grad = True
|
|
188
|
+
|
|
189
|
+
@staticmethod
|
|
190
|
+
def gradient(y: Tensor, x: Tensor) -> Tensor:
|
|
191
|
+
"""gradient"""
|
|
192
|
+
out = x.copy_template(name="gradient")
|
|
193
|
+
out.raw_tensor = torch.autograd.grad(y.raw_tensor, x.raw_tensor, create_graph=True)[0]
|
|
194
|
+
return out
|
|
195
|
+
|
|
184
196
|
@staticmethod
|
|
185
197
|
def stop_gradient(tensor: Tensor) -> Tensor:
|
|
186
198
|
"""stop grad"""
|
|
@@ -188,6 +200,33 @@ class TorchBackend(Backend[torch.Tensor]):
|
|
|
188
200
|
out.raw_tensor = out.raw_tensor.detach()
|
|
189
201
|
return out
|
|
190
202
|
|
|
203
|
+
@staticmethod
|
|
204
|
+
def scaled_gradient(tensor: Tensor, scale: Union[float, Tensor]) -> Tensor:
|
|
205
|
+
"""scaled gradient"""
|
|
206
|
+
from returnn.torch.functional.scaled_gradient import scaled_gradient
|
|
207
|
+
|
|
208
|
+
out = tensor.copy()
|
|
209
|
+
out.raw_tensor = scaled_gradient(out.raw_tensor, scale=scale)
|
|
210
|
+
return out
|
|
211
|
+
|
|
212
|
+
@staticmethod
|
|
213
|
+
def scaled_gradient_ext(
|
|
214
|
+
x: Tensor, *, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[Dim] = None
|
|
215
|
+
):
|
|
216
|
+
"""scaled gradient ext"""
|
|
217
|
+
from returnn.torch.functional.scaled_gradient import scaled_gradient_ext
|
|
218
|
+
|
|
219
|
+
out = x.copy()
|
|
220
|
+
out.raw_tensor = scaled_gradient_ext(
|
|
221
|
+
out.raw_tensor,
|
|
222
|
+
scale=scale,
|
|
223
|
+
shift=shift,
|
|
224
|
+
scale_shift_by_sum_over_axis=x.get_axis_from_description(scale_shift_by_sum_over_axis, allow_int=False)
|
|
225
|
+
if scale_shift_by_sum_over_axis is not None
|
|
226
|
+
else None,
|
|
227
|
+
)
|
|
228
|
+
return out
|
|
229
|
+
|
|
191
230
|
@staticmethod
|
|
192
231
|
def merge_dims(
|
|
193
232
|
source: Tensor,
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Scaled gradients for backward pass.
|
|
3
|
+
This also covers gradient reversal, which is simply the case with scale=-1.
|
|
4
|
+
We actually extend the simple scaling by some further optional transformations like shifting.
|
|
5
|
+
|
|
6
|
+
The code is adapted from our TF implementation, see :func:`returnn.tf.util.basic.scaled_gradient`.
|
|
7
|
+
|
|
8
|
+
For some discussion on the specific implementation, see:
|
|
9
|
+
https://discuss.pytorch.org/t/gradient-scaling-reversal/186392
|
|
10
|
+
|
|
11
|
+
Also see other reference implementations:
|
|
12
|
+
https://github.com/facebookresearch/fairseq/blob/100cd91db19bb/fairseq/modules/grad_multiply.py
|
|
13
|
+
https://github.com/janfreyberg/pytorch-revgrad/blob/449fa763a76d/src/pytorch_revgrad/functional.py
|
|
14
|
+
https://github.com/tadeephuy/GradientReversal/blob/5d9857d63/gradient_reversal/functional.py
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
from typing import Optional
|
|
20
|
+
import torch
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# noinspection PyMethodOverriding,PyAbstractClass,PyMissingOrEmptyDocstring
|
|
24
|
+
class _ScaledGradient(torch.autograd.Function):
|
|
25
|
+
@staticmethod
|
|
26
|
+
def forward(ctx, x: torch.Tensor, scale: float) -> torch.Tensor:
|
|
27
|
+
ctx.scale = scale
|
|
28
|
+
return x
|
|
29
|
+
|
|
30
|
+
@staticmethod
|
|
31
|
+
def backward(ctx, grad_output):
|
|
32
|
+
return grad_output * ctx.scale, None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def scaled_gradient(x: torch.Tensor, scale: float) -> torch.Tensor:
|
|
36
|
+
"""
|
|
37
|
+
:param x:
|
|
38
|
+
:param scale:
|
|
39
|
+
:return: just x, however, in backward pass, the gradient is scaled by the given factor
|
|
40
|
+
"""
|
|
41
|
+
return _ScaledGradient.apply(x, scale)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# noinspection PyMethodOverriding,PyAbstractClass,PyMissingOrEmptyDocstring
|
|
45
|
+
class _ScaledGradientExt(torch.autograd.Function):
|
|
46
|
+
@staticmethod
|
|
47
|
+
def forward(
|
|
48
|
+
ctx, x: torch.Tensor, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[int] = None
|
|
49
|
+
):
|
|
50
|
+
ctx.scale = scale
|
|
51
|
+
ctx.shift = shift
|
|
52
|
+
ctx.scale_shift_by_sum_over_axis = scale_shift_by_sum_over_axis
|
|
53
|
+
return x
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def backward(ctx, grad):
|
|
57
|
+
grad_out = grad
|
|
58
|
+
if isinstance(ctx.scale, torch.Tensor) or ctx.scale != 1:
|
|
59
|
+
grad_out = grad_out * ctx.scale
|
|
60
|
+
if isinstance(ctx.shift, torch.Tensor) or ctx.shift != 0:
|
|
61
|
+
if ctx.scale_shift_by_sum_over_axis is not None:
|
|
62
|
+
m = torch.sum(torch.abs(grad), dim=ctx.scale_shift_by_sum_over_axis, keepdim=True)
|
|
63
|
+
grad_out = grad_out + ctx.shift * m
|
|
64
|
+
else:
|
|
65
|
+
grad_out = grad_out + ctx.shift
|
|
66
|
+
return grad_out, None, None, None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def scaled_gradient_ext(
|
|
70
|
+
x: torch.Tensor, *, scale: float = 1.0, shift: float = 0.0, scale_shift_by_sum_over_axis: Optional[int] = None
|
|
71
|
+
):
|
|
72
|
+
"""
|
|
73
|
+
:param x:
|
|
74
|
+
:param scale: will scale gradient by this value
|
|
75
|
+
:param shift: will shift gradient by this value
|
|
76
|
+
:param scale_shift_by_sum_over_axis: if given, will scale and shift by the sum over the given axis
|
|
77
|
+
:return: just x, but gradient in backward pass will be transformed accordingly
|
|
78
|
+
"""
|
|
79
|
+
return _ScaledGradientExt.apply(x, scale, shift, scale_shift_by_sum_over_axis)
|
|
@@ -169,6 +169,7 @@ returnn/frontend/dropout.py
|
|
|
169
169
|
returnn/frontend/dtype.py
|
|
170
170
|
returnn/frontend/gradient.py
|
|
171
171
|
returnn/frontend/init.py
|
|
172
|
+
returnn/frontend/label_smoothing.py
|
|
172
173
|
returnn/frontend/linear.py
|
|
173
174
|
returnn/frontend/loop.py
|
|
174
175
|
returnn/frontend/loss.py
|
|
@@ -262,6 +263,7 @@ returnn/torch/frontend/_rand.py
|
|
|
262
263
|
returnn/torch/frontend/bridge.py
|
|
263
264
|
returnn/torch/functional/README.md
|
|
264
265
|
returnn/torch/functional/__init__.py
|
|
266
|
+
returnn/torch/functional/scaled_gradient.py
|
|
265
267
|
returnn/util/__init__.py
|
|
266
268
|
returnn/util/basic.py
|
|
267
269
|
returnn/util/better_exchook.py
|
|
@@ -270,6 +272,7 @@ returnn/util/debug.py
|
|
|
270
272
|
returnn/util/debug_helpers.py
|
|
271
273
|
returnn/util/fsa.py
|
|
272
274
|
returnn/util/literal_py_to_pickle.py
|
|
275
|
+
returnn/util/math.py
|
|
273
276
|
returnn/util/pprint.py
|
|
274
277
|
returnn/util/py-to-pickle.cpp
|
|
275
278
|
returnn/util/py_compat.py
|
|
@@ -328,6 +331,8 @@ tests/test_rf_const.py
|
|
|
328
331
|
tests/test_rf_container.py
|
|
329
332
|
tests/test_rf_conv.py
|
|
330
333
|
tests/test_rf_encoder_conformer.py
|
|
334
|
+
tests/test_rf_gradient.py
|
|
335
|
+
tests/test_rf_label_smoothing.py
|
|
331
336
|
tests/test_rf_loop.py
|
|
332
337
|
tests/test_rf_math.py
|
|
333
338
|
tests/test_rf_normalization.py
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RETURNN frontend (returnn.frontend) tests
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import _setup_test_env # noqa
|
|
7
|
+
import returnn.frontend as rf
|
|
8
|
+
from returnn.tensor import Tensor, Dim, TensorDict, batch_dim
|
|
9
|
+
from rf_utils import run_model
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_scaled_gradient():
|
|
13
|
+
time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
|
|
14
|
+
in_dim = Dim(7, name="in")
|
|
15
|
+
extern_data = TensorDict(
|
|
16
|
+
{
|
|
17
|
+
"data": Tensor("data", [batch_dim, time_dim, in_dim], dtype="float32"),
|
|
18
|
+
}
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
# noinspection PyShadowingNames
|
|
22
|
+
def _forward_step(*, model: rf.Module, extern_data: TensorDict):
|
|
23
|
+
model # noqa # unused
|
|
24
|
+
data = extern_data["data"]
|
|
25
|
+
rf.set_requires_gradient(data)
|
|
26
|
+
|
|
27
|
+
out = rf.scaled_gradient(data, scale=-0.5)
|
|
28
|
+
out.mark_as_default_output(shape=(batch_dim, time_dim, in_dim))
|
|
29
|
+
|
|
30
|
+
grad = rf.gradient(rf.reduce_sum(out, axis=out.dims, use_mask=False), data)
|
|
31
|
+
grad.mark_as_output("grad")
|
|
32
|
+
|
|
33
|
+
run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RETURNN frontend (returnn.frontend) tests
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
import _setup_test_env # noqa
|
|
7
|
+
import returnn.frontend as rf
|
|
8
|
+
from returnn.tensor import Tensor, Dim, TensorDict, batch_dim
|
|
9
|
+
from rf_utils import run_model
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_label_smoothed_log_prob_gradient():
|
|
13
|
+
time_dim = Dim(Tensor("time", [batch_dim], dtype="int32"))
|
|
14
|
+
vocab_dim = Dim(7, name="in")
|
|
15
|
+
extern_data = TensorDict(
|
|
16
|
+
{
|
|
17
|
+
"data": Tensor("data", [batch_dim, time_dim, vocab_dim], dtype="float32", feature_dim=vocab_dim),
|
|
18
|
+
"targets": Tensor("targets", [batch_dim, time_dim], dtype="int32", sparse_dim=vocab_dim),
|
|
19
|
+
}
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# noinspection PyShadowingNames
|
|
23
|
+
def _forward_step(*, model: rf.Module, extern_data: TensorDict):
|
|
24
|
+
model # noqa # unused
|
|
25
|
+
data = extern_data["data"]
|
|
26
|
+
targets = extern_data["targets"]
|
|
27
|
+
rf.set_requires_gradient(data)
|
|
28
|
+
|
|
29
|
+
log_prob = rf.log_softmax(data, axis=vocab_dim)
|
|
30
|
+
out = rf.label_smoothed_log_prob_gradient(log_prob, 0.1)
|
|
31
|
+
loss = rf.cross_entropy(target=targets, estimated=log_prob, estimated_type="log-probs", axis=vocab_dim)
|
|
32
|
+
|
|
33
|
+
out.mark_as_default_output(shape=(batch_dim, time_dim, vocab_dim))
|
|
34
|
+
loss.mark_as_output("loss")
|
|
35
|
+
|
|
36
|
+
grad = rf.gradient(rf.reduce_sum(loss, axis=loss.dims), data)
|
|
37
|
+
grad.mark_as_output("grad")
|
|
38
|
+
|
|
39
|
+
run_model(extern_data, lambda *, epoch, step: rf.Module(), _forward_step)
|