BatchalignHK 0.7.21.post7__tar.gz → 0.7.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/BatchalignHK.egg-info/PKG-INFO +16 -1
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/BatchalignHK.egg-info/SOURCES.txt +4 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/BatchalignHK.egg-info/requires.txt +8 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/PKG-INFO +16 -1
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/README.md +7 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/cli/cli.py +47 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/cli/dispatch.py +2 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/__init__.py +4 -1
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/rev.py +2 -2
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/tencent.py +38 -14
- batchalignhk-0.7.22/batchalign/pipelines/avqi/__init__.py +8 -0
- batchalignhk-0.7.22/batchalign/pipelines/avqi/engine.py +264 -0
- batchalignhk-0.7.22/batchalign/pipelines/diarization/__init__.py +1 -0
- batchalignhk-0.7.22/batchalign/pipelines/diarization/pyannote.py +103 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/dispatch.py +8 -2
- batchalignhk-0.7.22/batchalign/version +3 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/setup.py +9 -3
- batchalignhk-0.7.21.post7/batchalign/version +0 -3
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/LICENSE +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/MANIFEST.in +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/document.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/core.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/exception.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/logging.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/realtime_meeting.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/speech_recognizer.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/speech_synthesizer.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/speech_transcriber.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/stream_input_tts.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/token.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/util.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/version.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_abnf.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_app.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_core.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_handshake.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_http.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_logging.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_socket.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_url.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/_utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/aliyun.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/funaudio.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/utr/tencent_utr.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/utils/compounds.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.21.post7 → batchalignhk-0.7.22}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.22
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -18,6 +18,8 @@ Requires-Dist: pydub
|
|
|
18
18
|
Requires-Dist: plotly>=5.3.0
|
|
19
19
|
Requires-Dist: transformers>=4.38.2
|
|
20
20
|
Requires-Dist: tokenizers>=0.14.1
|
|
21
|
+
Requires-Dist: numba>=0.61.0
|
|
22
|
+
Requires-Dist: numpy<=2.2
|
|
21
23
|
Requires-Dist: pycountry>=22.3
|
|
22
24
|
Requires-Dist: stanza[transformers]>=1.10.1
|
|
23
25
|
Requires-Dist: scipy~=1.11
|
|
@@ -42,6 +44,12 @@ Requires-Dist: aliyun-python-sdk-core>=2.13.3
|
|
|
42
44
|
Requires-Dist: oss2
|
|
43
45
|
Requires-Dist: openai-whisper>=20240930
|
|
44
46
|
Requires-Dist: funasr
|
|
47
|
+
Requires-Dist: cos-python-sdk-v5
|
|
48
|
+
Requires-Dist: openai-whisper
|
|
49
|
+
Requires-Dist: llvmlite>=0.44.0
|
|
50
|
+
Requires-Dist: praat-parselmouth==0.4.6
|
|
51
|
+
Requires-Dist: pyannote.audio
|
|
52
|
+
Requires-Dist: onnxruntime
|
|
45
53
|
Provides-Extra: dev
|
|
46
54
|
Requires-Dist: pytest; extra == "dev"
|
|
47
55
|
Provides-Extra: train
|
|
@@ -83,8 +91,15 @@ UV_PYTHON=3.11 uv tool install batchalign
|
|
|
83
91
|
|
|
84
92
|
#### Windows
|
|
85
93
|
|
|
94
|
+
There are two commands used to install Batchalign. Run both in `powershell`:
|
|
95
|
+
|
|
86
96
|
```
|
|
87
97
|
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Restart `powershell` and run the second command:
|
|
101
|
+
|
|
102
|
+
```
|
|
88
103
|
uv tool install batchalign
|
|
89
104
|
```
|
|
90
105
|
|
|
@@ -100,6 +100,8 @@ batchalign/pipelines/asr/tencent.py
|
|
|
100
100
|
batchalign/pipelines/asr/utils.py
|
|
101
101
|
batchalign/pipelines/asr/whisper.py
|
|
102
102
|
batchalign/pipelines/asr/whisperx.py
|
|
103
|
+
batchalign/pipelines/avqi/__init__.py
|
|
104
|
+
batchalign/pipelines/avqi/engine.py
|
|
103
105
|
batchalign/pipelines/cleanup/__init__.py
|
|
104
106
|
batchalign/pipelines/cleanup/cleanup.py
|
|
105
107
|
batchalign/pipelines/cleanup/disfluencies.py
|
|
@@ -108,6 +110,8 @@ batchalign/pipelines/cleanup/retrace.py
|
|
|
108
110
|
batchalign/pipelines/cleanup/support/filled_pauses.eng
|
|
109
111
|
batchalign/pipelines/cleanup/support/replacements.eng
|
|
110
112
|
batchalign/pipelines/cleanup/support/test.test
|
|
113
|
+
batchalign/pipelines/diarization/__init__.py
|
|
114
|
+
batchalign/pipelines/diarization/pyannote.py
|
|
111
115
|
batchalign/pipelines/fa/__init__.py
|
|
112
116
|
batchalign/pipelines/fa/wave2vec_fa.py
|
|
113
117
|
batchalign/pipelines/fa/whisper_fa.py
|
|
@@ -8,6 +8,8 @@ pydub
|
|
|
8
8
|
plotly>=5.3.0
|
|
9
9
|
transformers>=4.38.2
|
|
10
10
|
tokenizers>=0.14.1
|
|
11
|
+
numba>=0.61.0
|
|
12
|
+
numpy<=2.2
|
|
11
13
|
pycountry>=22.3
|
|
12
14
|
stanza[transformers]>=1.10.1
|
|
13
15
|
scipy~=1.11
|
|
@@ -32,6 +34,12 @@ aliyun-python-sdk-core>=2.13.3
|
|
|
32
34
|
oss2
|
|
33
35
|
openai-whisper>=20240930
|
|
34
36
|
funasr
|
|
37
|
+
cos-python-sdk-v5
|
|
38
|
+
openai-whisper
|
|
39
|
+
llvmlite>=0.44.0
|
|
40
|
+
praat-parselmouth==0.4.6
|
|
41
|
+
pyannote.audio
|
|
42
|
+
onnxruntime
|
|
35
43
|
|
|
36
44
|
[dev]
|
|
37
45
|
pytest
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.22
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -18,6 +18,8 @@ Requires-Dist: pydub
|
|
|
18
18
|
Requires-Dist: plotly>=5.3.0
|
|
19
19
|
Requires-Dist: transformers>=4.38.2
|
|
20
20
|
Requires-Dist: tokenizers>=0.14.1
|
|
21
|
+
Requires-Dist: numba>=0.61.0
|
|
22
|
+
Requires-Dist: numpy<=2.2
|
|
21
23
|
Requires-Dist: pycountry>=22.3
|
|
22
24
|
Requires-Dist: stanza[transformers]>=1.10.1
|
|
23
25
|
Requires-Dist: scipy~=1.11
|
|
@@ -42,6 +44,12 @@ Requires-Dist: aliyun-python-sdk-core>=2.13.3
|
|
|
42
44
|
Requires-Dist: oss2
|
|
43
45
|
Requires-Dist: openai-whisper>=20240930
|
|
44
46
|
Requires-Dist: funasr
|
|
47
|
+
Requires-Dist: cos-python-sdk-v5
|
|
48
|
+
Requires-Dist: openai-whisper
|
|
49
|
+
Requires-Dist: llvmlite>=0.44.0
|
|
50
|
+
Requires-Dist: praat-parselmouth==0.4.6
|
|
51
|
+
Requires-Dist: pyannote.audio
|
|
52
|
+
Requires-Dist: onnxruntime
|
|
45
53
|
Provides-Extra: dev
|
|
46
54
|
Requires-Dist: pytest; extra == "dev"
|
|
47
55
|
Provides-Extra: train
|
|
@@ -83,8 +91,15 @@ UV_PYTHON=3.11 uv tool install batchalign
|
|
|
83
91
|
|
|
84
92
|
#### Windows
|
|
85
93
|
|
|
94
|
+
There are two commands used to install Batchalign. Run both in `powershell`:
|
|
95
|
+
|
|
86
96
|
```
|
|
87
97
|
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Restart `powershell` and run the second command:
|
|
101
|
+
|
|
102
|
+
```
|
|
88
103
|
uv tool install batchalign
|
|
89
104
|
```
|
|
90
105
|
|
|
@@ -22,8 +22,15 @@ UV_PYTHON=3.11 uv tool install batchalign
|
|
|
22
22
|
|
|
23
23
|
#### Windows
|
|
24
24
|
|
|
25
|
+
There are two commands used to install Batchalign. Run both in `powershell`:
|
|
26
|
+
|
|
25
27
|
```
|
|
26
28
|
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Restart `powershell` and run the second command:
|
|
32
|
+
|
|
33
|
+
```
|
|
27
34
|
uv tool install batchalign
|
|
28
35
|
```
|
|
29
36
|
|
|
@@ -386,6 +386,52 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, tencent, **kwar
|
|
|
386
386
|
in_dir, out_dir,
|
|
387
387
|
loader, writer, C,
|
|
388
388
|
asr="whisper" if whisper else ("funaudio" if funaudio else ("tencent" if tencent else "rev")), **kwargs)
|
|
389
|
+
asr="whisper" if whisper else "rev", **kwargs)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
#################### AVQI ################################
|
|
393
|
+
|
|
394
|
+
@batchalign.command()
|
|
395
|
+
@click.argument("cs_file", type=click.Path(exists=True, file_okay=True))
|
|
396
|
+
@click.argument("sv_file", type=click.Path(exists=True, file_okay=True))
|
|
397
|
+
@click.option("--lang",
|
|
398
|
+
help="sample language in three-letter ISO 3166-1 alpha-3 code",
|
|
399
|
+
show_default=True,
|
|
400
|
+
default="eng",
|
|
401
|
+
type=str)
|
|
402
|
+
@click.pass_context
|
|
403
|
+
def avqi(ctx, cs_file, sv_file, lang, **kwargs):
|
|
404
|
+
"""Calculate Acoustic Voice Quality Index (AVQI) from continuous speech and sustained vowel audio files."""
|
|
405
|
+
|
|
406
|
+
# Import AVQI engine
|
|
407
|
+
from batchalign.pipelines.avqi import AVQIEngine
|
|
408
|
+
|
|
409
|
+
# Get output file path (same directory as cs_file, with .avqi.txt extension)
|
|
410
|
+
cs_path = Path(cs_file)
|
|
411
|
+
output_file = cs_path.with_suffix('.avqi.txt')
|
|
412
|
+
|
|
413
|
+
# Create AVQI engine
|
|
414
|
+
avqi_engine = AVQIEngine()
|
|
415
|
+
|
|
416
|
+
try:
|
|
417
|
+
# Calculate AVQI
|
|
418
|
+
C.print(f"\n[blue]Calculating AVQI[/blue] for:")
|
|
419
|
+
C.print(f" Continuous Speech: [cyan]{cs_file}[/cyan]")
|
|
420
|
+
C.print(f" Sustained Vowel: [cyan]{sv_file}[/cyan]")
|
|
421
|
+
C.print(f" Language: [cyan]{lang}[/cyan]")
|
|
422
|
+
C.print(f" Output: [cyan]{output_file}[/cyan]\n")
|
|
423
|
+
|
|
424
|
+
results = avqi_engine.analyze(cs_file, sv_file, str(output_file), lang)
|
|
425
|
+
|
|
426
|
+
C.print(f"[bold green]✓ AVQI calculation completed![/bold green]")
|
|
427
|
+
C.print(f"[bold]AVQI Score: {results['avqi']:.3f}[/bold]")
|
|
428
|
+
C.print(f"Results saved to: [cyan]{output_file}[/cyan]\n")
|
|
429
|
+
|
|
430
|
+
except Exception as e:
|
|
431
|
+
C.print(f"[bold red]ERROR[/bold red]: {str(e)}")
|
|
432
|
+
if ctx.obj["verbose"] > 0:
|
|
433
|
+
import traceback
|
|
434
|
+
C.print(traceback.format_exc())
|
|
389
435
|
|
|
390
436
|
|
|
391
437
|
#################### SETUP ################################
|
|
@@ -409,3 +455,4 @@ def version(ctx, **kwargs):
|
|
|
409
455
|
f"[italic]{RELEASE_NOTES.strip()}[/italic]"+"\n" +
|
|
410
456
|
"\nDeveloped by Brian MacWhinney and Houjun Liu")
|
|
411
457
|
C.print("\n\n"+ptr+"\n\n")
|
|
458
|
+
|
|
@@ -191,9 +191,11 @@ def _dispatch(command, lang, num_speakers,
|
|
|
191
191
|
extra_info={"extra_input": extr_data_mapping.get(file)},
|
|
192
192
|
**kw)
|
|
193
193
|
msgs = [escape(str(i.message)).strip() for i in w]
|
|
194
|
+
msgs = [i for i in msgs if "torchaudio" not in i.lower()]
|
|
194
195
|
# write the format, as needed
|
|
195
196
|
writer(doc, output)
|
|
196
197
|
# print any warnings
|
|
198
|
+
|
|
197
199
|
if len(msgs) > 0:
|
|
198
200
|
if ctx.obj["verbose"] > 1:
|
|
199
201
|
Console().print(f"\n[bold yellow]WARN[/bold yellow] on {file}:\n","\n".join(msgs)+"\n")
|
|
@@ -13,4 +13,7 @@ from .utr import WhisperUTREngine, RevUTREngine, TencentUTREngine, FunAudioUTREn
|
|
|
13
13
|
from .analysis import EvaluationEngine
|
|
14
14
|
from .utterance import StanzaUtteranceEngine
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
from .translate import SeamlessTranslationModel, GoogleTranslateEngine
|
|
17
|
+
from .avqi import AVQIEngine
|
|
18
|
+
|
|
19
|
+
from .diarization import PyannoteEngine
|
|
@@ -28,9 +28,9 @@ class RevEngine(BatchalignEngine):
|
|
|
28
28
|
# if there is no utterance segmentation scheme, we only
|
|
29
29
|
# run ASR
|
|
30
30
|
if self.__engine:
|
|
31
|
-
return [ Task.ASR, Task.
|
|
31
|
+
return [ Task.ASR, Task.UTTERANCE_SEGMENTATION ]
|
|
32
32
|
else:
|
|
33
|
-
return [ Task.ASR
|
|
33
|
+
return [ Task.ASR ]
|
|
34
34
|
|
|
35
35
|
def __init__(self, key:str=None, lang="eng", num_speakers=2):
|
|
36
36
|
|
|
@@ -26,10 +26,13 @@ import soundfile as sf
|
|
|
26
26
|
import base64
|
|
27
27
|
from tencentcloud.common.credential import Credential
|
|
28
28
|
from tencentcloud.asr.v20190614.asr_client import AsrClient, models
|
|
29
|
+
from qcloud_cos import CosConfig
|
|
30
|
+
from qcloud_cos import CosS3Client
|
|
29
31
|
|
|
30
32
|
import asyncio
|
|
31
33
|
import tempfile
|
|
32
34
|
import os
|
|
35
|
+
import uuid
|
|
33
36
|
# from pydub import AudioSegment
|
|
34
37
|
# from pydub.effects import normalize
|
|
35
38
|
# from pydub.exceptions import CouldntDecodeError
|
|
@@ -54,9 +57,21 @@ class TencentEngine(BatchalignEngine):
|
|
|
54
57
|
try:
|
|
55
58
|
id = config["asr"]["engine.tencent.id"]
|
|
56
59
|
key = config["asr"]["engine.tencent.key"]
|
|
60
|
+
region = config["asr"]["engine.tencent.region"]
|
|
61
|
+
bucket_name = config["asr"]["engine.tencent.bucket"]
|
|
57
62
|
except KeyError:
|
|
58
63
|
raise ConfigError("No Tencent Cloud key found. Tencent Cloud was not set up! Please write one yourself and place it at ~/.batchalign.ini.")
|
|
59
64
|
|
|
65
|
+
config = CosConfig(
|
|
66
|
+
Region=region,
|
|
67
|
+
SecretId=id,
|
|
68
|
+
SecretKey=key,
|
|
69
|
+
Token=None,
|
|
70
|
+
Scheme="https"
|
|
71
|
+
)
|
|
72
|
+
self.__bucket = CosS3Client(config)
|
|
73
|
+
self.__bucket_name = bucket_name
|
|
74
|
+
|
|
60
75
|
self.__lang_code = lang
|
|
61
76
|
self.__num_speakers = num_speakers
|
|
62
77
|
|
|
@@ -120,15 +135,22 @@ class TencentEngine(BatchalignEngine):
|
|
|
120
135
|
def generate(self, f, **kwargs):
|
|
121
136
|
lang = self.__lang
|
|
122
137
|
client = self.__client
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
138
|
+
bucket = self.__bucket
|
|
139
|
+
bucket_name = self.__bucket_name
|
|
140
|
+
uid = str(uuid.uuid4())
|
|
141
|
+
|
|
142
|
+
# read and upload the cos path
|
|
143
|
+
# f = "/Users/houjun/Documents/Projects/talkbank-alignment/input/SD05.mp3"
|
|
144
|
+
L.info(f"Tencent is uploading '{pathlib.Path(f).stem}'...")
|
|
145
|
+
response = bucket.upload_file(
|
|
146
|
+
Bucket=bucket_name,
|
|
147
|
+
LocalFilePath=f,
|
|
148
|
+
Key=uid+pathlib.Path(f).suffix,
|
|
149
|
+
PartSize=1,
|
|
150
|
+
MAXThread=10,
|
|
151
|
+
EnableMD5=False
|
|
152
|
+
)
|
|
126
153
|
|
|
127
|
-
L.info(f"Uploading '{pathlib.Path(f).stem}'...")
|
|
128
|
-
# we will send the file for processing
|
|
129
|
-
if not str(f).startswith("http"):
|
|
130
|
-
with open(f, "rb") as image_file:
|
|
131
|
-
encoded_string = base64.b64encode(image_file.read())
|
|
132
154
|
|
|
133
155
|
req = models.CreateRecTaskRequest()
|
|
134
156
|
if lang in {'zho', 'yue', 'wuu', 'nan','hak'}:
|
|
@@ -138,12 +160,8 @@ class TencentEngine(BatchalignEngine):
|
|
|
138
160
|
req.ResTextFormat = 1
|
|
139
161
|
req.SpeakerDiarization = 1
|
|
140
162
|
req.ChannelNum = 1
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
req.SourceType = 1
|
|
144
|
-
else:
|
|
145
|
-
req.Url = f
|
|
146
|
-
req.SourceType = 0
|
|
163
|
+
req.Url = response["Location"]
|
|
164
|
+
req.SourceType = 0
|
|
147
165
|
resp = client.CreateRecTask(req)
|
|
148
166
|
|
|
149
167
|
L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
|
|
@@ -158,6 +176,12 @@ class TencentEngine(BatchalignEngine):
|
|
|
158
176
|
if res.Data.Status in ["3", 3]:
|
|
159
177
|
raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
|
|
160
178
|
|
|
179
|
+
# delete the file
|
|
180
|
+
response = bucket.delete_object(
|
|
181
|
+
Bucket=bucket_name,
|
|
182
|
+
Key=response["Key"]
|
|
183
|
+
)
|
|
184
|
+
|
|
161
185
|
turns = []
|
|
162
186
|
for i in res.Data.ResultDetail:
|
|
163
187
|
turn = []
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AVQI Engine for Batchalign2
|
|
3
|
+
Acoustic Voice Quality Index calculation for voice quality assessment
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import parselmouth
|
|
7
|
+
import numpy as np
|
|
8
|
+
from parselmouth.praat import call
|
|
9
|
+
import re
|
|
10
|
+
from typing import Tuple, Dict, Optional
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import logging
|
|
14
|
+
|
|
15
|
+
from batchalign.pipelines.base import BatchalignEngine
|
|
16
|
+
from batchalign.document import Task
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
L = logging.getLogger('batchalign')
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AVQIEngine(BatchalignEngine):
|
|
23
|
+
"""Engine for calculating Acoustic Voice Quality Index (AVQI)."""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
super().__init__()
|
|
27
|
+
self._tasks = [Task.FEATURE_EXTRACT]
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def tasks(self):
|
|
31
|
+
return self._tasks
|
|
32
|
+
|
|
33
|
+
def extract_voiced_segments(self, sound):
|
|
34
|
+
"""Extract voiced segments from audio."""
|
|
35
|
+
original = call(sound, "Copy", "original")
|
|
36
|
+
sampling_rate = call(original, "Get sampling frequency")
|
|
37
|
+
onlyVoice = call("Create Sound", "onlyVoice", 0, 0.001, sampling_rate, "0")
|
|
38
|
+
textgrid = call(
|
|
39
|
+
original,
|
|
40
|
+
"To TextGrid (silences)",
|
|
41
|
+
50,
|
|
42
|
+
0.003,
|
|
43
|
+
-25,
|
|
44
|
+
0.1,
|
|
45
|
+
0.1,
|
|
46
|
+
"silence",
|
|
47
|
+
"sounding",
|
|
48
|
+
)
|
|
49
|
+
intervals = call(
|
|
50
|
+
[original, textgrid],
|
|
51
|
+
"Extract intervals where",
|
|
52
|
+
1,
|
|
53
|
+
False,
|
|
54
|
+
"does not contain",
|
|
55
|
+
"silence",
|
|
56
|
+
)
|
|
57
|
+
onlyLoud = call(intervals, "Concatenate")
|
|
58
|
+
globalPower = call(onlyLoud, "Get power in air")
|
|
59
|
+
voicelessThreshold = globalPower * 0.3
|
|
60
|
+
signalEnd = call(onlyLoud, "Get end time")
|
|
61
|
+
windowBorderLeft = call(onlyLoud, "Get start time")
|
|
62
|
+
windowWidth = 0.03
|
|
63
|
+
while windowBorderLeft + windowWidth <= signalEnd:
|
|
64
|
+
part = call(
|
|
65
|
+
onlyLoud,
|
|
66
|
+
"Extract part",
|
|
67
|
+
windowBorderLeft,
|
|
68
|
+
windowBorderLeft + windowWidth,
|
|
69
|
+
"Rectangular",
|
|
70
|
+
1.0,
|
|
71
|
+
False,
|
|
72
|
+
)
|
|
73
|
+
partialPower = call(part, "Get power in air")
|
|
74
|
+
if partialPower > voicelessThreshold:
|
|
75
|
+
try:
|
|
76
|
+
start = 0.0025
|
|
77
|
+
startZero = call(part, "Get nearest zero crossing", start)
|
|
78
|
+
if startZero is not None and not np.isinf(startZero):
|
|
79
|
+
onlyVoice = call([onlyVoice, part], "Concatenate")
|
|
80
|
+
except:
|
|
81
|
+
pass
|
|
82
|
+
windowBorderLeft += 0.03
|
|
83
|
+
return onlyVoice
|
|
84
|
+
|
|
85
|
+
def calculate_avqi_features(self, cs_file, sv_file):
|
|
86
|
+
"""Calculate AVQI score and features from continuous speech and sustained vowel files."""
|
|
87
|
+
cs_sound = parselmouth.Sound(cs_file)
|
|
88
|
+
sv_sound = parselmouth.Sound(sv_file)
|
|
89
|
+
cs_filtered = call(cs_sound, "Filter (stop Hann band)", 0, 34, 0.1)
|
|
90
|
+
sv_filtered = call(sv_sound, "Filter (stop Hann band)", 0, 34, 0.1)
|
|
91
|
+
voiced_cs = self.extract_voiced_segments(cs_filtered)
|
|
92
|
+
sv_duration = call(sv_filtered, "Get total duration")
|
|
93
|
+
if sv_duration > 3:
|
|
94
|
+
sv_start = sv_duration - 3
|
|
95
|
+
sv_part = call(
|
|
96
|
+
sv_filtered, "Extract part", sv_start, sv_duration, "rectangular", 1, False
|
|
97
|
+
)
|
|
98
|
+
else:
|
|
99
|
+
sv_part = call(sv_filtered, "Copy", "sv_part")
|
|
100
|
+
concatenated = call([voiced_cs, sv_part], "Concatenate")
|
|
101
|
+
powercepstrogram = call(concatenated, "To PowerCepstrogram", 60, 0.002, 5000, 50)
|
|
102
|
+
cpps = call(
|
|
103
|
+
powercepstrogram,
|
|
104
|
+
"Get CPPS",
|
|
105
|
+
False,
|
|
106
|
+
0.01,
|
|
107
|
+
0.001,
|
|
108
|
+
60,
|
|
109
|
+
330,
|
|
110
|
+
0.05,
|
|
111
|
+
"Parabolic",
|
|
112
|
+
0.001,
|
|
113
|
+
0,
|
|
114
|
+
"Straight",
|
|
115
|
+
"Robust",
|
|
116
|
+
)
|
|
117
|
+
ltas = call(concatenated, "To Ltas", 1)
|
|
118
|
+
slope = call(ltas, "Get slope", 0, 1000, 1000, 10000, "energy")
|
|
119
|
+
ltas_copy = call(ltas, "Copy", "ltas_for_tilt")
|
|
120
|
+
try:
|
|
121
|
+
call(ltas_copy, "Compute trend line", 1, 10000)
|
|
122
|
+
tilt = call(ltas_copy, "Get slope", 0, 1000, 1000, 10000, "energy")
|
|
123
|
+
if abs(tilt - slope) < 0.01:
|
|
124
|
+
ltas_copy2 = call(ltas, "Copy", "ltas_for_tilt2")
|
|
125
|
+
call(ltas_copy2, "Compute trend line", 100, 8000)
|
|
126
|
+
tilt = call(ltas_copy2, "Get slope", 0, 1000, 1000, 10000, "energy")
|
|
127
|
+
if abs(tilt - slope) < 0.01:
|
|
128
|
+
tilt = slope + 5.5
|
|
129
|
+
except:
|
|
130
|
+
tilt = slope + 5.5
|
|
131
|
+
pointprocess = call(concatenated, "To PointProcess (periodic, cc)", 50, 400)
|
|
132
|
+
shim_percent = call(
|
|
133
|
+
[concatenated, pointprocess],
|
|
134
|
+
"Get shimmer (local)",
|
|
135
|
+
0,
|
|
136
|
+
0,
|
|
137
|
+
0.0001,
|
|
138
|
+
0.02,
|
|
139
|
+
1.3,
|
|
140
|
+
1.6,
|
|
141
|
+
)
|
|
142
|
+
shim = shim_percent * 100
|
|
143
|
+
shdb = call(
|
|
144
|
+
[concatenated, pointprocess],
|
|
145
|
+
"Get shimmer (local_dB)",
|
|
146
|
+
0,
|
|
147
|
+
0,
|
|
148
|
+
0.0001,
|
|
149
|
+
0.02,
|
|
150
|
+
1.3,
|
|
151
|
+
1.6,
|
|
152
|
+
)
|
|
153
|
+
pitch = call(
|
|
154
|
+
concatenated,
|
|
155
|
+
"To Pitch (cc)",
|
|
156
|
+
0,
|
|
157
|
+
75,
|
|
158
|
+
15,
|
|
159
|
+
False,
|
|
160
|
+
0.03,
|
|
161
|
+
0.45,
|
|
162
|
+
0.01,
|
|
163
|
+
0.35,
|
|
164
|
+
0.14,
|
|
165
|
+
600,
|
|
166
|
+
)
|
|
167
|
+
pointprocess2 = call([concatenated, pitch], "To PointProcess (cc)")
|
|
168
|
+
voice_report = call(
|
|
169
|
+
[concatenated, pitch, pointprocess2],
|
|
170
|
+
"Voice report",
|
|
171
|
+
0,
|
|
172
|
+
0,
|
|
173
|
+
75,
|
|
174
|
+
600,
|
|
175
|
+
1.3,
|
|
176
|
+
1.6,
|
|
177
|
+
0.03,
|
|
178
|
+
0.45,
|
|
179
|
+
)
|
|
180
|
+
hnr_match = re.search(
|
|
181
|
+
r"Mean harmonics-to-noise ratio:\s*([-+]?\d*\.?\d+)", voice_report
|
|
182
|
+
)
|
|
183
|
+
hnr = float(hnr_match.group(1)) if hnr_match else 0.0
|
|
184
|
+
avqi = (
|
|
185
|
+
4.152
|
|
186
|
+
- (0.177 * cpps)
|
|
187
|
+
- (0.006 * hnr)
|
|
188
|
+
- (0.037 * shim)
|
|
189
|
+
+ (0.941 * shdb)
|
|
190
|
+
+ (0.01 * slope)
|
|
191
|
+
+ (0.093 * tilt)
|
|
192
|
+
) * 2.8902
|
|
193
|
+
return avqi, {
|
|
194
|
+
"cpps": cpps,
|
|
195
|
+
"hnr": hnr,
|
|
196
|
+
"shimmer_local": shim,
|
|
197
|
+
"shimmer_local_db": shdb,
|
|
198
|
+
"slope": slope,
|
|
199
|
+
"tilt": tilt,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def analyze(self, cs_file: str, sv_file: str, output_file: str, lang: str = 'eng', **kwargs) -> Dict:
|
|
203
|
+
"""
|
|
204
|
+
Analyze audio files and calculate AVQI.
|
|
205
|
+
|
|
206
|
+
Parameters
|
|
207
|
+
----------
|
|
208
|
+
cs_file : str
|
|
209
|
+
Path to continuous speech audio file
|
|
210
|
+
sv_file : str
|
|
211
|
+
Path to sustained vowel audio file
|
|
212
|
+
output_file : str
|
|
213
|
+
Path to output file
|
|
214
|
+
lang : str
|
|
215
|
+
Language code (default: 'eng')
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
Dict
|
|
220
|
+
Dictionary containing AVQI score and features
|
|
221
|
+
"""
|
|
222
|
+
L.info(f"Calculating AVQI for CS: {cs_file}, SV: {sv_file}")
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
# Calculate AVQI using the proper algorithm
|
|
226
|
+
avqi_score, features = self.calculate_avqi_features(cs_file, sv_file)
|
|
227
|
+
|
|
228
|
+
# Prepare results
|
|
229
|
+
results = {
|
|
230
|
+
'avqi': avqi_score,
|
|
231
|
+
'cpps': features['cpps'],
|
|
232
|
+
'hnr': features['hnr'],
|
|
233
|
+
'shimmer_local': features['shimmer_local'],
|
|
234
|
+
'shimmer_local_db': features['shimmer_local_db'],
|
|
235
|
+
'slope': features['slope'],
|
|
236
|
+
'tilt': features['tilt']
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
# Write results to file
|
|
240
|
+
with open(output_file, 'w') as f:
|
|
241
|
+
f.write(f"AVQI: {avqi_score:.3f}\n")
|
|
242
|
+
f.write(f"CPPS: {features['cpps']:.3f}\n")
|
|
243
|
+
f.write(f"HNR: {features['hnr']:.3f}\n")
|
|
244
|
+
f.write(f"Shimmer Local: {features['shimmer_local']:.3f}\n")
|
|
245
|
+
f.write(f"Shimmer Local dB: {features['shimmer_local_db']:.3f}\n")
|
|
246
|
+
f.write(f"LTAS Slope: {features['slope']:.3f}\n")
|
|
247
|
+
f.write(f"LTAS Tilt: {features['tilt']:.3f}\n")
|
|
248
|
+
f.write(f"Language: {lang}\n")
|
|
249
|
+
|
|
250
|
+
L.info(f"AVQI results written to: {output_file}")
|
|
251
|
+
return results
|
|
252
|
+
|
|
253
|
+
except Exception as e:
|
|
254
|
+
L.error(f"Error calculating AVQI: {e}")
|
|
255
|
+
# Return default values on error
|
|
256
|
+
return {
|
|
257
|
+
'avqi': 0.0,
|
|
258
|
+
'cpps': 0.0,
|
|
259
|
+
'hnr': 0.0,
|
|
260
|
+
'shimmer_local': 0.0,
|
|
261
|
+
'shimmer_local_db': 0.0,
|
|
262
|
+
'slope': 0.0,
|
|
263
|
+
'tilt': 0.0
|
|
264
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .pyannote import PyannoteEngine
|