Audyn 0.0.1.dev6__tar.gz → 0.0.1.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/Audyn.egg-info/PKG-INFO +1 -1
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/Audyn.egg-info/SOURCES.txt +3 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/PKG-INFO +1 -1
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/__init__.py +1 -1
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/optim/lr_scheduler.py +36 -7
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/__init__.py +2 -1
- audyn-0.0.1.dev7/audyn/utils/data/birdclef/_common/__init__.py +81 -0
- audyn-0.0.1.dev7/audyn/utils/data/birdclef/birdclef2022/__init__.py +67 -0
- audyn-0.0.1.dev7/audyn/utils/data/birdclef/birdclef2022/_download.py +36 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/birdclef/birdclef2024/__init__.py +3 -77
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/birdclef/birdclef2024/_download.py +6 -2
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/birdclef/birdclef2024/composer.py +3 -1
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/composer.py +26 -1
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/Audyn.egg-info/dependency_links.txt +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/Audyn.egg-info/requires.txt +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/Audyn.egg-info/top_level.txt +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/LICENSE +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/MANIFEST.in +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/README.md +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/autoregressive.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/base.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/contrastive.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/distance.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/fastspeech.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/flow.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/gan.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/glowtts.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/hifigan.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/lsgan.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/melspectrogram.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/rvqvae.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/ssast.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/criterion/vqvae.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/functional/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/functional/activation.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/functional/clustering.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/functional/vector_quantization.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/metrics/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/metrics/base.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/metrics/crossmodal.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/metrics/retrieval.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/ast.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/encodec.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/fastspeech.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/gan.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/glowtts.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/hifigan.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/lextransformer.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/passt.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/pixelsnail.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/roformer.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/rvqvae.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/soundstream.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/ssast.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/text_to_wave.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/vae.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/vqvae.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/waveglow.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/models/wavenet.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/activation.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/duration_predictor.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/encodec.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/fastspeech.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/film.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/flow.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/glow.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/glowtts.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/normalization.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/passt.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/pixelcnn.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/pixelsnail.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/positional_encoding.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/rvq.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/soundstream.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/vit.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/vq.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/vqvae.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/waveglow.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/modules/wavenet.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/optim/optimizer.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/transforms/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/transforms/ast.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/transforms/birdclef.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/transforms/cqt.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/transforms/hifigan.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/transforms/hubert.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/transforms/kaldi.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/transforms/librosa.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/alignment/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/alignment/monotonic_align.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/clip_grad.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/audioset/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/audioset/_download.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/audioset/ast.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/audioset/composer.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/audioset/dataset.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/audioset/distributed.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/audioset/sampler.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/birdclef/birdclef2024/collator.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/birdclef/birdclef2024/dataset.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/birdclef/birdclef2024/models/baseline.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/clotho/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/clotho/text/indexing.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/clotho/text/normalization.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/clotho/text/symbols.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/clotho/text/tokenization.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/cmudict/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/cmudict/indexing.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/collator.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/dataloader.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/dataset.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/distributed.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/postprocess.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/sampler.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/tacotron/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/tacotron/text/indexing.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/tacotron/text/normalization.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/tacotron/text/numbers.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/tacotron/text/symbols.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/tacotron/text/tokenization.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/data/webdataset.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/distributed/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/config.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/criterion/cross_entropy.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/criterion/vqvae.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/data/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/lr_scheduler/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/lr_scheduler/none.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/lr_scheduler/transformer.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/model/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/model/gan.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/optimizer/adam.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/optimizer/adam_ema.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/optimizer/adamw.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/optimizer/gan.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/preprocess/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/preprocess/ljspeech_text-to-feat.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/preprocess/text-to-feat.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/preprocess/text-to-wave.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/system/cpu.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/system/cuda.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/system/cuda_amp.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/system/cuda_ddp.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/system/cuda_ddp_amp.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/system/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/system/mps.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/test/dataloader/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/test/dataset/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/test/dataset/torch.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/test/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/test/text_to_wave.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/clip_gradient/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/clip_gradient/gan.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/dataloader/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/dataloader/sequential-batch.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/dataset/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/dataset/sortable-torch.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/dataset/torch.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/dataset/webdataset.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/gan.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/record/defaults.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/record/template.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/template.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_conf_template/train/text-to-feat+pretrained_feat-to-wave.yaml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/_decorator.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/base.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/feat_to_wave.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/gan.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/text_to_feat.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/driver/text_to_wave.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/duration/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/github/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/hydra/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/hydra/utils.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/lab/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/logging/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/model/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/music/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/parallel/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/tensorboard/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/text/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/text/indexing.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/text/normalization.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/text/pronunciation.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/text/tokenization.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/audyn/utils/textgrid/__init__.py +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/cpp_extensions/monotonic_align/monotonic_align.cpp +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/pyproject.toml +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/setup.cfg +0 -0
- {audyn-0.0.1.dev6 → audyn-0.0.1.dev7}/setup.py +0 -0
|
@@ -101,6 +101,9 @@ audyn/utils/data/audioset/composer.py
|
|
|
101
101
|
audyn/utils/data/audioset/dataset.py
|
|
102
102
|
audyn/utils/data/audioset/distributed.py
|
|
103
103
|
audyn/utils/data/audioset/sampler.py
|
|
104
|
+
audyn/utils/data/birdclef/_common/__init__.py
|
|
105
|
+
audyn/utils/data/birdclef/birdclef2022/__init__.py
|
|
106
|
+
audyn/utils/data/birdclef/birdclef2022/_download.py
|
|
104
107
|
audyn/utils/data/birdclef/birdclef2024/__init__.py
|
|
105
108
|
audyn/utils/data/birdclef/birdclef2024/_download.py
|
|
106
109
|
audyn/utils/data/birdclef/birdclef2024/collator.py
|
|
@@ -11,11 +11,17 @@ except ImportError:
|
|
|
11
11
|
|
|
12
12
|
__all__ = [
|
|
13
13
|
"_DummyLRScheduler",
|
|
14
|
+
"_DummyLR",
|
|
14
15
|
"TransformerLRScheduler",
|
|
15
16
|
"NoamScheduler",
|
|
17
|
+
"TransformerLR",
|
|
18
|
+
"NoamLR",
|
|
16
19
|
"ExponentialWarmupLinearCooldownLRScheduler",
|
|
20
|
+
"ExponentialWarmupLinearCooldownLR",
|
|
17
21
|
"MultiLRSchedulers",
|
|
22
|
+
"MultiLR",
|
|
18
23
|
"GANLRScheduler",
|
|
24
|
+
"GANLR",
|
|
19
25
|
]
|
|
20
26
|
|
|
21
27
|
|
|
@@ -35,6 +41,10 @@ class _DummyLRScheduler:
|
|
|
35
41
|
assert len(state_dict) == 0
|
|
36
42
|
|
|
37
43
|
|
|
44
|
+
class _DummyLR(_DummyLRScheduler):
|
|
45
|
+
"""Alias of _DummyLRScheduler."""
|
|
46
|
+
|
|
47
|
+
|
|
38
48
|
class TransformerLRScheduler(_LRScheduler):
|
|
39
49
|
def __init__(
|
|
40
50
|
self,
|
|
@@ -63,6 +73,14 @@ class NoamScheduler(TransformerLRScheduler):
|
|
|
63
73
|
"""Alias of TransformerLRScheduler."""
|
|
64
74
|
|
|
65
75
|
|
|
76
|
+
class TransformerLR(TransformerLRScheduler):
|
|
77
|
+
"""Alias of TransformerLRScheduler."""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class NoamLR(NoamScheduler):
|
|
81
|
+
"""Alias of NoamLR."""
|
|
82
|
+
|
|
83
|
+
|
|
66
84
|
class ExponentialWarmupLinearCooldownLRScheduler(LambdaLR):
|
|
67
85
|
"""Exponential warm-up + linear cool-down of learning rate.
|
|
68
86
|
|
|
@@ -71,7 +89,7 @@ class ExponentialWarmupLinearCooldownLRScheduler(LambdaLR):
|
|
|
71
89
|
Args:
|
|
72
90
|
optimizer (Optimizer): Optimizer to adjust learning rate.
|
|
73
91
|
warmup_steps (int): Number of exponential warm-up steps.
|
|
74
|
-
|
|
92
|
+
suspend_steps (int): Number of constant learning rate steps between warm-up and cool-down.
|
|
75
93
|
cooldown_steps (int): Number of linear cool-down steps after constant learning rate.
|
|
76
94
|
last_factor (float): Scale factor of learning rate at last step.
|
|
77
95
|
|
|
@@ -81,7 +99,7 @@ class ExponentialWarmupLinearCooldownLRScheduler(LambdaLR):
|
|
|
81
99
|
self,
|
|
82
100
|
optimizer: Optimizer,
|
|
83
101
|
warmup_steps: int,
|
|
84
|
-
|
|
102
|
+
suspend_steps: int,
|
|
85
103
|
cooldown_steps: int,
|
|
86
104
|
last_factor: float = 1,
|
|
87
105
|
last_epoch: int = -1,
|
|
@@ -89,14 +107,13 @@ class ExponentialWarmupLinearCooldownLRScheduler(LambdaLR):
|
|
|
89
107
|
) -> None:
|
|
90
108
|
def _lr_scheduler_lambda(step: int) -> float:
|
|
91
109
|
if step < warmup_steps:
|
|
92
|
-
step = min(step, warmup_steps)
|
|
93
110
|
normalized_step = 1 - step / warmup_steps
|
|
94
111
|
factor = math.exp(-5.0 * normalized_step**2)
|
|
95
|
-
elif step < warmup_steps +
|
|
112
|
+
elif step < warmup_steps + suspend_steps:
|
|
96
113
|
factor = 1
|
|
97
|
-
elif step < warmup_steps +
|
|
98
|
-
|
|
99
|
-
normalized_step =
|
|
114
|
+
elif step < warmup_steps + suspend_steps + cooldown_steps:
|
|
115
|
+
step_after_suspend = step - (warmup_steps + suspend_steps)
|
|
116
|
+
normalized_step = step_after_suspend / cooldown_steps
|
|
100
117
|
factor = last_factor + (1 - last_factor) * normalized_step
|
|
101
118
|
else:
|
|
102
119
|
factor = last_factor
|
|
@@ -111,6 +128,10 @@ class ExponentialWarmupLinearCooldownLRScheduler(LambdaLR):
|
|
|
111
128
|
)
|
|
112
129
|
|
|
113
130
|
|
|
131
|
+
class ExponentialWarmupLinearCooldownLR(ExponentialWarmupLinearCooldownLRScheduler):
|
|
132
|
+
"""Alias of ExponentialWarmupLinearCooldownLRScheduler."""
|
|
133
|
+
|
|
134
|
+
|
|
114
135
|
class MultiLRSchedulers:
|
|
115
136
|
"""Module to manage multiple learning rate schedulers."""
|
|
116
137
|
|
|
@@ -172,6 +193,10 @@ class MultiLRSchedulers:
|
|
|
172
193
|
lr_scheduler.load_state_dict(state_dict[name])
|
|
173
194
|
|
|
174
195
|
|
|
196
|
+
class MultiLR(MultiLRSchedulers):
|
|
197
|
+
"""Alias of MultiLRSchedulers."""
|
|
198
|
+
|
|
199
|
+
|
|
175
200
|
class GANLRScheduler:
|
|
176
201
|
def __init__(self, generator: _LRScheduler, discriminator: _LRScheduler) -> None:
|
|
177
202
|
self.generator = generator
|
|
@@ -180,3 +205,7 @@ class GANLRScheduler:
|
|
|
180
205
|
def step(self, *args, **kwargs) -> None:
|
|
181
206
|
self.generator.step(*args, **kwargs)
|
|
182
207
|
self.discriminator.step(*args, **kwargs)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
class GANLR(GANLRScheduler):
|
|
211
|
+
"""Alias of GANLRScheduler."""
|
|
@@ -6,7 +6,7 @@ from omegaconf import DictConfig
|
|
|
6
6
|
from torch.utils.data import DataLoader
|
|
7
7
|
|
|
8
8
|
from .collator import Collator, default_collate_fn, rename_webdataset_keys
|
|
9
|
-
from .composer import AudioFeatureExtractionComposer, Composer
|
|
9
|
+
from .composer import AudioFeatureExtractionComposer, Composer, SequentialComposer
|
|
10
10
|
from .dataloader import (
|
|
11
11
|
DistributedDataLoader,
|
|
12
12
|
DistributedDynamicBatchDataLoader,
|
|
@@ -28,6 +28,7 @@ __all__ = [
|
|
|
28
28
|
"DistributedDynamicBatchDataLoader",
|
|
29
29
|
"Composer",
|
|
30
30
|
"AudioFeatureExtractionComposer",
|
|
31
|
+
"SequentialComposer",
|
|
31
32
|
"Collator",
|
|
32
33
|
"slice_feautures",
|
|
33
34
|
"take_log_features",
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict, List
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"decode_csv_line",
|
|
7
|
+
]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def decode_csv_line(line: List[str]) -> Dict[str, Any]:
|
|
11
|
+
"""Decode line of train_metadata.csv.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
line (list): One line of train_metadata.csv split by comma (,).
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
dict: Dictionary containing metadata of given line.
|
|
18
|
+
|
|
19
|
+
.. note::
|
|
20
|
+
|
|
21
|
+
Returned dictionary contains following values.
|
|
22
|
+
|
|
23
|
+
- filename (str): Filename with out extension. e.g. ``asbfly/XC134896``.
|
|
24
|
+
- primary_label (str): Primary label of bird species.
|
|
25
|
+
- secondary_label (list): Secondary labels of bird species.
|
|
26
|
+
- type (list): Chirp types.
|
|
27
|
+
- latitude (float, optional): Latitude of recording.
|
|
28
|
+
- longitude (float, optional): Longitude of recording.
|
|
29
|
+
- scientific_name (str): Scientific name of bird.
|
|
30
|
+
- common_name (str): Common name of bird.
|
|
31
|
+
- rating (float): Rating.
|
|
32
|
+
- path (str): Path to audio file equivalent to ``filename`` + ``.ogg``.
|
|
33
|
+
e.g. ``asbfly/XC134896.ogg``.
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
(
|
|
37
|
+
primary_label,
|
|
38
|
+
secondary_labels,
|
|
39
|
+
chirp_types,
|
|
40
|
+
latitude,
|
|
41
|
+
longitude,
|
|
42
|
+
scientific_name,
|
|
43
|
+
common_name,
|
|
44
|
+
_,
|
|
45
|
+
_,
|
|
46
|
+
rating,
|
|
47
|
+
_,
|
|
48
|
+
path,
|
|
49
|
+
) = line
|
|
50
|
+
|
|
51
|
+
secondary_labels = ast.literal_eval(secondary_labels)
|
|
52
|
+
chirp_types = ast.literal_eval(chirp_types)
|
|
53
|
+
secondary_labels = [secondary_label.lower() for secondary_label in secondary_labels]
|
|
54
|
+
chirp_types = [chirp_type.lower() for chirp_type in chirp_types]
|
|
55
|
+
|
|
56
|
+
filename, _ = os.path.splitext(path)
|
|
57
|
+
|
|
58
|
+
if len(latitude) > 0:
|
|
59
|
+
latitude = float(latitude)
|
|
60
|
+
else:
|
|
61
|
+
latitude = None
|
|
62
|
+
|
|
63
|
+
if len(longitude) > 0:
|
|
64
|
+
longitude = float(longitude)
|
|
65
|
+
else:
|
|
66
|
+
longitude = None
|
|
67
|
+
|
|
68
|
+
data = {
|
|
69
|
+
"filename": filename,
|
|
70
|
+
"primary_label": primary_label,
|
|
71
|
+
"secondary_label": secondary_labels,
|
|
72
|
+
"type": chirp_types,
|
|
73
|
+
"latitude": latitude,
|
|
74
|
+
"longitude": longitude,
|
|
75
|
+
"scientific_name": scientific_name,
|
|
76
|
+
"common_name": common_name,
|
|
77
|
+
"rating": float(rating),
|
|
78
|
+
"path": path,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return data
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
|
|
6
|
+
from .._common import decode_csv_line
|
|
7
|
+
from ._download import download_birdclef2022_primary_labels
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"primary_labels",
|
|
11
|
+
"num_primary_labels",
|
|
12
|
+
"stratified_split",
|
|
13
|
+
"decode_csv_line", # for compatibility with birdclef2024
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
primary_labels = download_birdclef2022_primary_labels()
|
|
17
|
+
num_primary_labels = len(primary_labels)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def stratified_split(
|
|
21
|
+
path: str,
|
|
22
|
+
train_ratio: float,
|
|
23
|
+
seed: int = 0,
|
|
24
|
+
) -> Tuple[List[str], List[str]]:
|
|
25
|
+
"""Split dataset into training and validation.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
path (str): Path to csv file.
|
|
29
|
+
train_ratio (float): Ratio of training set.
|
|
30
|
+
seed (int): Random seed.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
tuple: Splits of filenames.
|
|
34
|
+
|
|
35
|
+
- list: List of training filenames.
|
|
36
|
+
- list: List of validation filenames.
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
g = torch.Generator()
|
|
40
|
+
g.manual_seed(seed)
|
|
41
|
+
|
|
42
|
+
filenames = {primary_label: [] for primary_label in primary_labels}
|
|
43
|
+
train_filenames = []
|
|
44
|
+
validation_filenames = []
|
|
45
|
+
|
|
46
|
+
with open(path) as f:
|
|
47
|
+
reader = csv.reader(f)
|
|
48
|
+
|
|
49
|
+
for idx, line in enumerate(reader):
|
|
50
|
+
if idx < 1:
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
primary_label, *_, filename = line
|
|
54
|
+
filenames[primary_label].append(filename)
|
|
55
|
+
|
|
56
|
+
# split dataset
|
|
57
|
+
for primary_label, _filenames in filenames.items():
|
|
58
|
+
num_files = len(_filenames)
|
|
59
|
+
indices = torch.randperm(num_files, generator=g).tolist()
|
|
60
|
+
|
|
61
|
+
for idx in indices[: int(train_ratio * num_files)]:
|
|
62
|
+
train_filenames.append(_filenames[idx])
|
|
63
|
+
|
|
64
|
+
for idx in indices[int(train_ratio * num_files) :]:
|
|
65
|
+
validation_filenames.append(_filenames[idx])
|
|
66
|
+
|
|
67
|
+
return train_filenames, validation_filenames
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from .....utils import audyn_cache_dir
|
|
5
|
+
from ....github import download_file_from_github_release
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"download_birdclef2022_primary_labels",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def download_birdclef2022_primary_labels(
|
|
13
|
+
root: Optional[str] = None, url: Optional[str] = None
|
|
14
|
+
) -> List[str]:
|
|
15
|
+
filename = "primary-labels.txt"
|
|
16
|
+
|
|
17
|
+
if root is None:
|
|
18
|
+
root = os.path.join(audyn_cache_dir, "data", "birdclef2022")
|
|
19
|
+
|
|
20
|
+
if url is None:
|
|
21
|
+
url = "https://github.com/tky823/Audyn/releases/download/v0.0.1.dev6/birdclef2022-primary-labels.txt" # noqa: E501
|
|
22
|
+
|
|
23
|
+
path = os.path.join(root, filename)
|
|
24
|
+
|
|
25
|
+
if not os.path.exists(path):
|
|
26
|
+
os.makedirs(root, exist_ok=True)
|
|
27
|
+
download_file_from_github_release(url, path)
|
|
28
|
+
|
|
29
|
+
labels = []
|
|
30
|
+
|
|
31
|
+
with open(path) as f:
|
|
32
|
+
for line in f:
|
|
33
|
+
label = line.strip()
|
|
34
|
+
labels.append(label)
|
|
35
|
+
|
|
36
|
+
return labels
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import ast
|
|
2
1
|
import csv
|
|
3
2
|
import glob
|
|
4
3
|
import os
|
|
5
|
-
from typing import
|
|
4
|
+
from typing import List, Tuple
|
|
6
5
|
|
|
7
6
|
import torch
|
|
8
7
|
|
|
8
|
+
from .._common import decode_csv_line
|
|
9
9
|
from ._download import download_birdclef2024_primary_labels
|
|
10
10
|
|
|
11
11
|
__all__ = [
|
|
@@ -13,7 +13,7 @@ __all__ = [
|
|
|
13
13
|
"num_primary_labels",
|
|
14
14
|
"stratified_split",
|
|
15
15
|
"split",
|
|
16
|
-
"decode_csv_line",
|
|
16
|
+
"decode_csv_line", # for backward compatibility
|
|
17
17
|
]
|
|
18
18
|
|
|
19
19
|
primary_labels = download_birdclef2024_primary_labels()
|
|
@@ -120,77 +120,3 @@ def split(
|
|
|
120
120
|
validation_filenames = [filenames[idx] for idx in indices[int(num_files * train_ratio) :]]
|
|
121
121
|
|
|
122
122
|
return train_filenames, validation_filenames
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def decode_csv_line(line: List[str]) -> Dict[str, Any]:
|
|
126
|
-
"""Decode line of train_metadata.csv.
|
|
127
|
-
|
|
128
|
-
Args:
|
|
129
|
-
line (list): One line of train_metadata.csv split by comma (,).
|
|
130
|
-
|
|
131
|
-
Returns:
|
|
132
|
-
dict: Dictionary containing metadata of given line.
|
|
133
|
-
|
|
134
|
-
.. note::
|
|
135
|
-
|
|
136
|
-
Returned dictionary contains following values.
|
|
137
|
-
|
|
138
|
-
- filename (str): Filename with out extension. e.g. ``asbfly/XC134896``.
|
|
139
|
-
- primary_label (str): Primary label of bird species.
|
|
140
|
-
- secondary_label (list): Secondary labels of bird species.
|
|
141
|
-
- type (list): Chirp types.
|
|
142
|
-
- latitude (float, optional): Latitude of recording.
|
|
143
|
-
- longitude (float, optional): Longitude of recording.
|
|
144
|
-
- scientific_name (str): Scientific name of bird.
|
|
145
|
-
- common_name (str): Common name of bird.
|
|
146
|
-
- rating (float): Rating.
|
|
147
|
-
- path (str): Path to audio file equivalent to ``filename`` + ``.ogg``.
|
|
148
|
-
e.g. ``asbfly/XC134896.ogg``.
|
|
149
|
-
|
|
150
|
-
"""
|
|
151
|
-
(
|
|
152
|
-
primary_label,
|
|
153
|
-
secondary_labels,
|
|
154
|
-
chirp_types,
|
|
155
|
-
latitude,
|
|
156
|
-
longitude,
|
|
157
|
-
scientific_name,
|
|
158
|
-
common_name,
|
|
159
|
-
_,
|
|
160
|
-
_,
|
|
161
|
-
rating,
|
|
162
|
-
_,
|
|
163
|
-
path,
|
|
164
|
-
) = line
|
|
165
|
-
|
|
166
|
-
secondary_labels = ast.literal_eval(secondary_labels)
|
|
167
|
-
chirp_types = ast.literal_eval(chirp_types)
|
|
168
|
-
secondary_labels = [secondary_label.lower() for secondary_label in secondary_labels]
|
|
169
|
-
chirp_types = [chirp_type.lower() for chirp_type in chirp_types]
|
|
170
|
-
|
|
171
|
-
filename, _ = os.path.splitext(path)
|
|
172
|
-
|
|
173
|
-
if len(latitude) > 0:
|
|
174
|
-
latitude = float(latitude)
|
|
175
|
-
else:
|
|
176
|
-
latitude = None
|
|
177
|
-
|
|
178
|
-
if len(longitude) > 0:
|
|
179
|
-
longitude = float(longitude)
|
|
180
|
-
else:
|
|
181
|
-
longitude = None
|
|
182
|
-
|
|
183
|
-
data = {
|
|
184
|
-
"filename": filename,
|
|
185
|
-
"primary_label": primary_label,
|
|
186
|
-
"secondary_label": secondary_labels,
|
|
187
|
-
"type": chirp_types,
|
|
188
|
-
"latitude": latitude,
|
|
189
|
-
"longitude": longitude,
|
|
190
|
-
"scientific_name": scientific_name,
|
|
191
|
-
"common_name": common_name,
|
|
192
|
-
"rating": float(rating),
|
|
193
|
-
"path": path,
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
return data
|
|
@@ -4,17 +4,21 @@ from typing import List, Optional
|
|
|
4
4
|
from .....utils import audyn_cache_dir
|
|
5
5
|
from ....github import download_file_from_github_release
|
|
6
6
|
|
|
7
|
+
__all__ = [
|
|
8
|
+
"download_birdclef2024_primary_labels",
|
|
9
|
+
]
|
|
10
|
+
|
|
7
11
|
|
|
8
12
|
def download_birdclef2024_primary_labels(
|
|
9
13
|
root: Optional[str] = None, url: Optional[str] = None
|
|
10
14
|
) -> List[str]:
|
|
11
|
-
filename = "
|
|
15
|
+
filename = "primary-labels.txt"
|
|
12
16
|
|
|
13
17
|
if root is None:
|
|
14
18
|
root = os.path.join(audyn_cache_dir, "data", "birdclef2024")
|
|
15
19
|
|
|
16
20
|
if url is None:
|
|
17
|
-
url = "https://github.com/tky823/Audyn/releases/download/v0.0.1.
|
|
21
|
+
url = "https://github.com/tky823/Audyn/releases/download/v0.0.1.dev6/birdclef2024-primary-labels.txt" # noqa: E501
|
|
18
22
|
|
|
19
23
|
path = os.path.join(root, filename)
|
|
20
24
|
|
|
@@ -247,12 +247,13 @@ class BirdCLEF2024AudioComposer(Composer):
|
|
|
247
247
|
|
|
248
248
|
if sample_rate != target_sample_rate:
|
|
249
249
|
audio = aF.resample(audio, sample_rate, target_sample_rate)
|
|
250
|
+
sample_rate = target_sample_rate
|
|
250
251
|
sample[sample_rate_key] = torch.full(
|
|
251
252
|
(), fill_value=sample_rate, dtype=sample_rate_dtype
|
|
252
253
|
)
|
|
253
254
|
|
|
254
255
|
if duration is not None:
|
|
255
|
-
length = int(
|
|
256
|
+
length = int(sample_rate * duration)
|
|
256
257
|
padding = length - audio.size(-1)
|
|
257
258
|
|
|
258
259
|
if padding > 0:
|
|
@@ -284,6 +285,7 @@ class BirdCLEF2024AudioComposer(Composer):
|
|
|
284
285
|
output = {
|
|
285
286
|
waveform_key: audio,
|
|
286
287
|
melspectrogram_key: melspectrogram,
|
|
288
|
+
sample_rate_key: sample[sample_rate_key],
|
|
287
289
|
filename_key: sample[filename_key],
|
|
288
290
|
}
|
|
289
291
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from typing import Any, Callable, Dict, Iterable
|
|
2
|
+
from typing import Any, Callable, Dict, Iterable, List
|
|
3
3
|
|
|
4
4
|
import torch
|
|
5
5
|
import torch.nn as nn
|
|
@@ -9,6 +9,7 @@ from .webdataset import decode_audio, supported_audio_extensions
|
|
|
9
9
|
__all__ = [
|
|
10
10
|
"Composer",
|
|
11
11
|
"AudioFeatureExtractionComposer",
|
|
12
|
+
"SequentialComposer",
|
|
12
13
|
]
|
|
13
14
|
|
|
14
15
|
|
|
@@ -108,3 +109,27 @@ class AudioFeatureExtractionComposer(Composer):
|
|
|
108
109
|
sample[feature_key] = feature
|
|
109
110
|
|
|
110
111
|
return sample
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class SequentialComposer(Composer):
|
|
115
|
+
"""Module to apply multiple composers."""
|
|
116
|
+
|
|
117
|
+
def __init__(
|
|
118
|
+
self,
|
|
119
|
+
*composers,
|
|
120
|
+
decode_audio_as_waveform: bool = True,
|
|
121
|
+
decode_audio_as_monoral: bool = True,
|
|
122
|
+
) -> None:
|
|
123
|
+
super().__init__(
|
|
124
|
+
decode_audio_as_waveform=decode_audio_as_waveform,
|
|
125
|
+
decode_audio_as_monoral=decode_audio_as_monoral,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
self.composers: List[Composer] = list(composers)
|
|
129
|
+
|
|
130
|
+
def process(self, sample: Dict[str, Any]) -> Dict[str, Any]:
|
|
131
|
+
"""Process to edit each sample."""
|
|
132
|
+
for composer in self.composers:
|
|
133
|
+
sample = composer.process(sample)
|
|
134
|
+
|
|
135
|
+
return sample
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|