BatchalignHK 0.7.20.post14__tar.gz → 0.7.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/BatchalignHK.egg-info/PKG-INFO +2 -1
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/BatchalignHK.egg-info/SOURCES.txt +2 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/BatchalignHK.egg-info/requires.txt +1 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/PKG-INFO +2 -1
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/cli/cli.py +17 -10
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/__init__.py +3 -3
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/__init__.py +1 -0
- batchalignhk-0.7.21/batchalign/pipelines/asr/funaudio.py +231 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/utils.py +5 -2
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/dispatch.py +6 -2
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/ud.py +1 -1
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/pipeline.py +2 -1
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utr/__init__.py +1 -0
- batchalignhk-0.7.21/batchalign/pipelines/utr/funaudio_utr.py +76 -0
- batchalignhk-0.7.21/batchalign/version +3 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/setup.py +2 -1
- batchalignhk-0.7.20.post14/batchalign/version +0 -3
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/LICENSE +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/MANIFEST.in +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/README.md +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/document.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/core.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/exception.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/logging.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/realtime_meeting.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/speech_recognizer.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/speech_synthesizer.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/speech_transcriber.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/stream_input_tts.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/token.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/util.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/version.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_abnf.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_app.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_core.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_handshake.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_http.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_logging.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_socket.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_url.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/aliyun.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/tencent.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utr/tencent_utr.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/utils/compounds.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.21
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -41,6 +41,7 @@ Requires-Dist: googletrans
|
|
|
41
41
|
Requires-Dist: aliyun-python-sdk-core>=2.13.3
|
|
42
42
|
Requires-Dist: oss2
|
|
43
43
|
Requires-Dist: openai-whisper>=20240930
|
|
44
|
+
Requires-Dist: funasr
|
|
44
45
|
Provides-Extra: dev
|
|
45
46
|
Requires-Dist: pytest; extra == "dev"
|
|
46
47
|
Provides-Extra: train
|
|
@@ -92,6 +92,7 @@ batchalign/pipelines/analysis/__init__.py
|
|
|
92
92
|
batchalign/pipelines/analysis/eval.py
|
|
93
93
|
batchalign/pipelines/asr/__init__.py
|
|
94
94
|
batchalign/pipelines/asr/aliyun.py
|
|
95
|
+
batchalign/pipelines/asr/funaudio.py
|
|
95
96
|
batchalign/pipelines/asr/num2chinese.py
|
|
96
97
|
batchalign/pipelines/asr/oai_whisper.py
|
|
97
98
|
batchalign/pipelines/asr/rev.py
|
|
@@ -125,6 +126,7 @@ batchalign/pipelines/translate/gtrans.py
|
|
|
125
126
|
batchalign/pipelines/translate/seamless.py
|
|
126
127
|
batchalign/pipelines/translate/utils.py
|
|
127
128
|
batchalign/pipelines/utr/__init__.py
|
|
129
|
+
batchalign/pipelines/utr/funaudio_utr.py
|
|
128
130
|
batchalign/pipelines/utr/rev_utr.py
|
|
129
131
|
batchalign/pipelines/utr/tencent_utr.py
|
|
130
132
|
batchalign/pipelines/utr/utils.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.21
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -41,6 +41,7 @@ Requires-Dist: googletrans
|
|
|
41
41
|
Requires-Dist: aliyun-python-sdk-core>=2.13.3
|
|
42
42
|
Requires-Dist: oss2
|
|
43
43
|
Requires-Dist: openai-whisper>=20240930
|
|
44
|
+
Requires-Dist: funasr
|
|
44
45
|
Provides-Extra: dev
|
|
45
46
|
Requires-Dist: pytest; extra == "dev"
|
|
46
47
|
Provides-Extra: train
|
|
@@ -112,13 +112,15 @@ batchalign.add_command(train, "models")
|
|
|
112
112
|
default=True, help="Use Whisper instead of Wav2Vec for English (defaults for Whisper for non-English)")
|
|
113
113
|
@click.option("--tencent/--rev",
|
|
114
114
|
default=False, help="Use Tencent instead of Rev.AI (default).")
|
|
115
|
+
@click.option("--funaudio/--rev",
|
|
116
|
+
default=False, help="Use FunAudio instead of Rev.AI (default).")
|
|
115
117
|
@click.option("--pauses", type=bool, default=False, help="Should we try to bullet each word or should we try to add pauses in between words by grouping them? Default: no pauses.", is_flag=True)
|
|
116
118
|
@click.option("--wor/--nowor",
|
|
117
119
|
default=True, help="Should we write word level alignment line? Default to yes.")
|
|
118
120
|
@click.option("--data",
|
|
119
121
|
help="the URL of the data", type=str)
|
|
120
122
|
@click.pass_context
|
|
121
|
-
def align(ctx, in_dir, out_dir, whisper, wav2vec, tencent, **kwargs):
|
|
123
|
+
def align(ctx, in_dir, out_dir, whisper, wav2vec, tencent, funaudio, **kwargs):
|
|
122
124
|
"""Align transcripts against corresponding media files."""
|
|
123
125
|
def loader(file):
|
|
124
126
|
return (
|
|
@@ -135,8 +137,9 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, tencent, **kwargs):
|
|
|
135
137
|
in_dir, out_dir,
|
|
136
138
|
loader, writer, C,
|
|
137
139
|
fa="whisper_fa",
|
|
138
|
-
utr=("whisper_utr" if whisper else
|
|
139
|
-
|
|
140
|
+
utr = ("whisper_utr" if whisper else
|
|
141
|
+
("tencent_utr" if tencent else
|
|
142
|
+
("funaudio_utr" if funaudio else "rev_utr"))),
|
|
140
143
|
**kwargs)
|
|
141
144
|
else:
|
|
142
145
|
_dispatch("align", "eng", 1,
|
|
@@ -144,8 +147,9 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, tencent, **kwargs):
|
|
|
144
147
|
in_dir, out_dir,
|
|
145
148
|
loader, writer, C,
|
|
146
149
|
fa="wav2vec_fa",
|
|
147
|
-
utr=("whisper_utr" if whisper else
|
|
148
|
-
|
|
150
|
+
utr = ("whisper_utr" if whisper else
|
|
151
|
+
("tencent_utr" if tencent else
|
|
152
|
+
("funaudio_utr" if funaudio else "rev_utr"))),
|
|
149
153
|
**kwargs)
|
|
150
154
|
|
|
151
155
|
#################### TRANSCRIBE ################################
|
|
@@ -162,6 +166,8 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, tencent, **kwargs):
|
|
|
162
166
|
default=False, help="Use WhisperX instead of Rev.AI (default). Superceeds --whisper.")
|
|
163
167
|
@click.option("--alibaba/--rev",
|
|
164
168
|
default=False, help="Use Alibaba instead of Rev.AI (default). Superceeds --whisper.")
|
|
169
|
+
@click.option("--funaudio/--rev",
|
|
170
|
+
default=False, help="Use FunAudio instead of Rev.AI (default). Superceeds --whisper.")
|
|
165
171
|
@click.option("--diarize/--nodiarize",
|
|
166
172
|
default=False, help="Perform speaker diarization (this flag is ignored with Rev.AI)")
|
|
167
173
|
@click.option("--wor/--nowor",
|
|
@@ -192,6 +198,8 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
192
198
|
asr = "whisper_oai"
|
|
193
199
|
if kwargs["alibaba"]:
|
|
194
200
|
asr = "aliyun"
|
|
201
|
+
if kwargs["funaudio"]:
|
|
202
|
+
asr = "funaudio"
|
|
195
203
|
|
|
196
204
|
def writer(doc, output):
|
|
197
205
|
doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
|
|
@@ -337,6 +345,8 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
337
345
|
default=False, help="Use OpenAI Whisper (ASR) instead of Rev.AI (default).")
|
|
338
346
|
@click.option("--tencent/--rev",
|
|
339
347
|
default=False, help="Use Tencent instead of Rev.AI (default).")
|
|
348
|
+
@click.option("--funaudio/--rev",
|
|
349
|
+
default=False, help="Use Tencent instead of Rev.AI (default).")
|
|
340
350
|
@click.option("--lang",
|
|
341
351
|
help="sample language in three-letter ISO 3166-1 alpha-3 code",
|
|
342
352
|
show_default=True,
|
|
@@ -346,8 +356,6 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
346
356
|
help="the URL of the data",
|
|
347
357
|
type=str)
|
|
348
358
|
@click.option("-n", "--num_speakers", type=int, help="number of speakers in the language sample", default=2)
|
|
349
|
-
@click.option("--wor/--nowor",
|
|
350
|
-
default=False, help="Should we write word level alignment line? Default to no.")
|
|
351
359
|
@click.pass_context
|
|
352
360
|
def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, tencent, **kwargs):
|
|
353
361
|
"""Benchmark ASR utilities for their word accuracy"""
|
|
@@ -371,14 +379,13 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, tencent, **kwar
|
|
|
371
379
|
df.write(str(doc["wer"]))
|
|
372
380
|
with open(Path(output).with_suffix(".diff"), 'w') as df:
|
|
373
381
|
df.write(str(doc["diff"]))
|
|
374
|
-
CHATFile(doc=doc["doc"]).write(str(Path(output).with_suffix(".asr.cha"))
|
|
375
|
-
write_wor=kwargs.get("wor", False))
|
|
382
|
+
CHATFile(doc=doc["doc"]).write(str(Path(output).with_suffix(".asr.cha")))
|
|
376
383
|
|
|
377
384
|
|
|
378
385
|
_dispatch("benchmark", lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
|
|
379
386
|
in_dir, out_dir,
|
|
380
387
|
loader, writer, C,
|
|
381
|
-
asr="whisper" if whisper else ("tencent" if tencent else "rev"), **kwargs)
|
|
388
|
+
asr="whisper" if whisper else ("funaudio" if funaudio else ("tencent" if tencent else "rev")), **kwargs)
|
|
382
389
|
|
|
383
390
|
|
|
384
391
|
#################### SETUP ################################
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
from .pipeline import BatchalignPipeline
|
|
2
2
|
from .base import BatchalignEngine
|
|
3
3
|
from .asr import (WhisperEngine, RevEngine, WhisperXEngine,
|
|
4
|
-
TencentEngine, OAIWhisperEngine, AliyunEngine)
|
|
4
|
+
TencentEngine, OAIWhisperEngine, AliyunEngine, FunAudioEngine)
|
|
5
5
|
|
|
6
6
|
from .morphosyntax import StanzaEngine, CorefEngine
|
|
7
7
|
from .cleanup import NgramRetraceEngine, DisfluencyReplacementEngine
|
|
8
8
|
from .speaker import NemoSpeakerEngine
|
|
9
9
|
|
|
10
10
|
from .fa import WhisperFAEngine, Wave2VecFAEngine
|
|
11
|
-
from .utr import WhisperUTREngine, RevUTREngine, TencentUTREngine
|
|
11
|
+
from .utr import WhisperUTREngine, RevUTREngine, TencentUTREngine, FunAudioUTREngine
|
|
12
12
|
|
|
13
13
|
from .analysis import EvaluationEngine
|
|
14
14
|
from .utterance import StanzaUtteranceEngine
|
|
15
15
|
|
|
16
|
-
from .translate import SeamlessTranslationModel, GoogleTranslateEngine
|
|
16
|
+
# from .translate import SeamlessTranslationModel, GoogleTranslateEngine
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""
|
|
2
|
+
rev.py
|
|
3
|
+
Support for Rev.ai, a commerical ASR service
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from batchalign.document import *
|
|
7
|
+
from batchalign.pipelines.base import *
|
|
8
|
+
from batchalign.pipelines.asr.utils import *
|
|
9
|
+
from batchalign.utils.config import config_read
|
|
10
|
+
|
|
11
|
+
from batchalign.errors import *
|
|
12
|
+
|
|
13
|
+
from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
|
|
14
|
+
|
|
15
|
+
from opencc import OpenCC
|
|
16
|
+
cc = OpenCC('s2hk')
|
|
17
|
+
|
|
18
|
+
import time
|
|
19
|
+
import pathlib
|
|
20
|
+
import tempfile
|
|
21
|
+
import pycountry
|
|
22
|
+
import numpy as np
|
|
23
|
+
import soundfile as sf
|
|
24
|
+
# from pydub import AudioSegment
|
|
25
|
+
# from pydub.effects import normalize
|
|
26
|
+
import base64
|
|
27
|
+
from tencentcloud.common.credential import Credential
|
|
28
|
+
from tencentcloud.asr.v20190614.asr_client import AsrClient, models
|
|
29
|
+
|
|
30
|
+
import asyncio
|
|
31
|
+
import tempfile
|
|
32
|
+
import os
|
|
33
|
+
# from pydub import AudioSegment
|
|
34
|
+
# from pydub.effects import normalize
|
|
35
|
+
# from pydub.exceptions import CouldntDecodeError
|
|
36
|
+
from funasr import AutoModel
|
|
37
|
+
from funasr.utils.postprocess_utils import rich_transcription_postprocess
|
|
38
|
+
|
|
39
|
+
import logging
|
|
40
|
+
L = logging.getLogger("batchalign")
|
|
41
|
+
|
|
42
|
+
class FunAudioEngine(BatchalignEngine):
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def tasks(self):
|
|
46
|
+
if self.__engine:
|
|
47
|
+
return [ Task.ASR, Task.UTTERANCE_SEGMENTATION ]
|
|
48
|
+
else:
|
|
49
|
+
return [ Task.ASR ]
|
|
50
|
+
|
|
51
|
+
def __init__(self, model="FunAudioLLM/SenseVoiceSmall", lang="yue"):
|
|
52
|
+
|
|
53
|
+
self.model_dir = model
|
|
54
|
+
self.__lang = "yue"
|
|
55
|
+
|
|
56
|
+
self.model = AutoModel(
|
|
57
|
+
model=self.model_dir,
|
|
58
|
+
output_timestamps=True,
|
|
59
|
+
vad_model="fsmn-vad",
|
|
60
|
+
vad_kwargs={"max_single_segment_time": 30000},
|
|
61
|
+
device="cuda:0", # GPU
|
|
62
|
+
hub="hf",
|
|
63
|
+
cache={},
|
|
64
|
+
language="yue",
|
|
65
|
+
use_itn=True,
|
|
66
|
+
batch_size_s=60,
|
|
67
|
+
output_timestamp=True,
|
|
68
|
+
ban_emo_unk =False,
|
|
69
|
+
merge_vad=True,
|
|
70
|
+
merge_length_s=15,
|
|
71
|
+
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if resolve("utterance", self.__lang) != None:
|
|
75
|
+
L.debug("Initializing utterance model...")
|
|
76
|
+
if lang != "yue":
|
|
77
|
+
self.__engine = BertUtteranceModel(resolve("utterance", lang))
|
|
78
|
+
else:
|
|
79
|
+
# we have special inference procedure for cantonese
|
|
80
|
+
self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
|
|
81
|
+
L.debug("Done.")
|
|
82
|
+
else:
|
|
83
|
+
self.__engine = None
|
|
84
|
+
|
|
85
|
+
def replace_cantonese_words(self, text):
|
|
86
|
+
"""Function to replace Cantonese words with custom replacements."""
|
|
87
|
+
word_replacements = {
|
|
88
|
+
"系": "係",
|
|
89
|
+
"繫": "係",
|
|
90
|
+
"聯係": "聯繫",
|
|
91
|
+
"系啊": "係啊",
|
|
92
|
+
"真系": "真係",
|
|
93
|
+
"唔系": "唔係",
|
|
94
|
+
"呀": "啊",
|
|
95
|
+
"噶": "㗎",
|
|
96
|
+
"咧": "呢",
|
|
97
|
+
"嗬": "喎",
|
|
98
|
+
"只": "隻",
|
|
99
|
+
"咯": "囉",
|
|
100
|
+
"嚇": "吓",
|
|
101
|
+
"飲": "飲",
|
|
102
|
+
"喐": "郁",
|
|
103
|
+
"食": "食",
|
|
104
|
+
"啫": "咋",
|
|
105
|
+
"哇": "嘩",
|
|
106
|
+
"着": "著",
|
|
107
|
+
"中意": "鍾意",
|
|
108
|
+
"嘞": "喇",
|
|
109
|
+
"啵": "噃",
|
|
110
|
+
"遊水": "游水",
|
|
111
|
+
"羣組": "群組",
|
|
112
|
+
"古仔": "故仔",
|
|
113
|
+
"甕": "㧬",
|
|
114
|
+
"牀": "床",
|
|
115
|
+
"松": "鬆",
|
|
116
|
+
"較剪": "鉸剪",
|
|
117
|
+
"吵": "嘈",
|
|
118
|
+
"衝涼": "沖涼",
|
|
119
|
+
"分鍾": "分鐘",
|
|
120
|
+
"重復": "重複"
|
|
121
|
+
}
|
|
122
|
+
sorted_keys = sorted(word_replacements.keys(), key=len, reverse=True)
|
|
123
|
+
pattern = re.compile('|'.join(re.escape(key) for key in sorted_keys))
|
|
124
|
+
|
|
125
|
+
def replace_word(match):
|
|
126
|
+
matched_text = match.group(0) # Extract the matched word
|
|
127
|
+
return word_replacements.get(matched_text, matched_text) # Replace or return the original word
|
|
128
|
+
|
|
129
|
+
return pattern.sub(replace_word, text)
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def is_roman(x):
|
|
133
|
+
"""check if x contains only roman characters"""
|
|
134
|
+
return all(c.isalpha() and ord(c) < 128 for c in x if not c.isspace())
|
|
135
|
+
|
|
136
|
+
def generate(self, audio_file_path):
|
|
137
|
+
"""
|
|
138
|
+
Generate transcription from an audio file using the FunAudio model.
|
|
139
|
+
:param audio_file_path: Path to the audio file to be transcribed.
|
|
140
|
+
:return: A Document object containing the transcription and metadata.
|
|
141
|
+
"""
|
|
142
|
+
res = self.model.generate(
|
|
143
|
+
input=audio_file_path,
|
|
144
|
+
cache={},
|
|
145
|
+
language=self.__lang,
|
|
146
|
+
output_timestamps=True,
|
|
147
|
+
vad_model="fsmn-vad",
|
|
148
|
+
vad_kwargs={"max_single_segment_time": 60000},
|
|
149
|
+
ban_emo_unk=False,
|
|
150
|
+
use_itn=True,
|
|
151
|
+
batch_size_s=60,
|
|
152
|
+
merge_vad=True,
|
|
153
|
+
merge_length_s=15,
|
|
154
|
+
output_timestamp=True,
|
|
155
|
+
spk_model="cam++"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
turns = []
|
|
159
|
+
|
|
160
|
+
for segment in res: # segment is a dictionary with keys "text" and "timestamp"
|
|
161
|
+
print("segment:", segment)
|
|
162
|
+
print(type(segment))
|
|
163
|
+
|
|
164
|
+
# Extracting text and timestamps from the segment
|
|
165
|
+
text = segment["text"]
|
|
166
|
+
print(text)
|
|
167
|
+
timestamps = segment["timestamp"]
|
|
168
|
+
|
|
169
|
+
# Check if timestamps is a list of tuples
|
|
170
|
+
utterances = []
|
|
171
|
+
current_utterance = []
|
|
172
|
+
for part in text.split("<|yue|>"):
|
|
173
|
+
if not part.strip():
|
|
174
|
+
continue
|
|
175
|
+
parts = part.strip().split("<|withitn|>", 1)
|
|
176
|
+
if len(parts) > 1:
|
|
177
|
+
emotion = parts[0].strip()
|
|
178
|
+
content = parts[1].strip()
|
|
179
|
+
|
|
180
|
+
current_utterance.append(content)
|
|
181
|
+
print(f"current_utterance:{current_utterance}")
|
|
182
|
+
|
|
183
|
+
large_string = ''.join(current_utterance)
|
|
184
|
+
print(f"Large string: {large_string}")
|
|
185
|
+
|
|
186
|
+
turn = []
|
|
187
|
+
|
|
188
|
+
# process Cantonese differently
|
|
189
|
+
if self.__lang == "yue":
|
|
190
|
+
content = cc.convert(large_string)
|
|
191
|
+
content = self.replace_cantonese_words(content)
|
|
192
|
+
content = content.replace("「", "").replace("」", "").replace("。", "").replace(",", "").replace("!", "").replace("?", "")
|
|
193
|
+
print(f"Processed Cantonese content: {content}")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
items = list(content)
|
|
197
|
+
else:
|
|
198
|
+
items = large_string.split()
|
|
199
|
+
|
|
200
|
+
turn = []
|
|
201
|
+
|
|
202
|
+
num_items = len(items)
|
|
203
|
+
print("Number of items:", num_items)
|
|
204
|
+
for index, item in enumerate(items):
|
|
205
|
+
print(f"Processing item {index + 1}/{num_items}: {item}")
|
|
206
|
+
item_start, item_end = timestamps[index]
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
turn.append({
|
|
210
|
+
"type": "text",
|
|
211
|
+
"ts": item_start / 1000,
|
|
212
|
+
"end_ts": item_end / 1000,
|
|
213
|
+
"value": item
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
turns.append({
|
|
218
|
+
"elements": turn,
|
|
219
|
+
"speaker": 0
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
L.debug(f"Funaudio done.")
|
|
223
|
+
|
|
224
|
+
doc = process_generation({"monologues": turns},
|
|
225
|
+
self.__lang,
|
|
226
|
+
utterance_engine=self.__engine)
|
|
227
|
+
media = Media(type=MediaType.AUDIO, name=Path(audio_file_path).stem, url=audio_file_path)
|
|
228
|
+
doc.media = media
|
|
229
|
+
return doc
|
|
230
|
+
|
|
231
|
+
|
|
@@ -163,14 +163,16 @@ def process_generation(output, lang="eng", utterance_engine=None):
|
|
|
163
163
|
for utterance in output["monologues"]:
|
|
164
164
|
# get a list of words
|
|
165
165
|
words = merge_on_wordlist(utterance["elements"])
|
|
166
|
+
# words = utterance["elements"]
|
|
166
167
|
# coallate words (not punct) into the shape we expect
|
|
167
168
|
# which is ['word', [start_ms, end_ms]]. Yes, this would
|
|
168
169
|
# involve multiplying by 1000 to s => ms
|
|
169
170
|
words = [[i["value"], [round(i["ts"]*1000) if i.get("ts") != None else None,
|
|
170
|
-
|
|
171
|
+
round(i["end_ts"]*1000) if i.get("end_ts") != None else None]] # the shape
|
|
171
172
|
for i in words # for each word
|
|
172
173
|
if i["value"].strip() != "" and
|
|
173
|
-
|
|
174
|
+
not re.match(r'<.*>', i["value"])] # if its text (i.e. not "pause")
|
|
175
|
+
|
|
174
176
|
|
|
175
177
|
# sometimes, the system outputs two forms with a space as one single
|
|
176
178
|
# word. we need to interpolate the space between them
|
|
@@ -188,6 +190,7 @@ def process_generation(output, lang="eng", utterance_engine=None):
|
|
|
188
190
|
# if we only have one part, we don't interpolate
|
|
189
191
|
if len(word_parts) == 1:
|
|
190
192
|
final_words.append([word, [i,o]])
|
|
193
|
+
words = merge_on_wordlist(utterance["elements"])
|
|
191
194
|
continue
|
|
192
195
|
# otherwise, we interpolate the itme
|
|
193
196
|
cur = i
|
|
@@ -6,8 +6,8 @@ Tabulate default packages and options.
|
|
|
6
6
|
from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
|
|
7
7
|
NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
|
|
8
8
|
RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
|
|
9
|
-
StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine,
|
|
10
|
-
|
|
9
|
+
StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, TencentEngine,
|
|
10
|
+
OAIWhisperEngine, TencentUTREngine, AliyunEngine, FunAudioEngine, FunAudioUTREngine)
|
|
11
11
|
|
|
12
12
|
from batchalign import BatchalignPipeline
|
|
13
13
|
from batchalign.models import resolve
|
|
@@ -144,6 +144,10 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
|
144
144
|
engines.append(OAIWhisperEngine())
|
|
145
145
|
elif engine == "aliyun":
|
|
146
146
|
engines.append(AliyunEngine())
|
|
147
|
+
elif engine == "funaudio":
|
|
148
|
+
engines.append(FunAudioEngine())
|
|
149
|
+
elif engine == "funaudio_utr":
|
|
150
|
+
engines.append(FunAudioUTREngine(lang=lang))
|
|
147
151
|
|
|
148
152
|
|
|
149
153
|
L.debug(f"Done initalizing packages.")
|
|
@@ -884,7 +884,7 @@ def morphoanalyze(doc: Document, retokenize:bool, skipmultilang:bool, status_hoo
|
|
|
884
884
|
|
|
885
885
|
# parse the stanza output
|
|
886
886
|
mor, gra = parse_sentence(sents[0], ending, special_forms_cleaned, lang[0])
|
|
887
|
-
mor =
|
|
887
|
+
mor = re.sub(r"~part\|s verb\|(\w+)-Ger-S", r"~aux|is verb|\1-Part-Pres-S", mor)
|
|
888
888
|
# breakpoint()
|
|
889
889
|
|
|
890
890
|
if mor.strip() == "" or mor.strip() in ENDING_PUNCT:
|
|
@@ -109,7 +109,8 @@ class BatchalignPipeline:
|
|
|
109
109
|
L.debug(f"Calling generator: {self.__generator}")
|
|
110
110
|
if callback:
|
|
111
111
|
callback(0,total_tasks, self.__generator.tasks)
|
|
112
|
-
|
|
112
|
+
|
|
113
|
+
doc = self.__generator.generate(doc.media.url)
|
|
113
114
|
if callback:
|
|
114
115
|
callback(1,total_tasks, self.__generator.tasks)
|
|
115
116
|
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from batchalign.document import *
|
|
3
|
+
from batchalign.pipelines.base import *
|
|
4
|
+
from batchalign.pipelines.asr.utils import *
|
|
5
|
+
from batchalign.pipelines.utr.utils import bulletize_doc
|
|
6
|
+
from batchalign.pipelines.asr.funaudio import FunAudioEngine
|
|
7
|
+
|
|
8
|
+
from opencc import OpenCC
|
|
9
|
+
cc = OpenCC('s2hk')
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
|
|
13
|
+
import pycountry
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
L = logging.getLogger("batchalign")
|
|
17
|
+
|
|
18
|
+
class FunAudioUTREngine(BatchalignEngine):
|
|
19
|
+
tasks = [ Task.UTTERANCE_TIMING_RECOVERY ]
|
|
20
|
+
|
|
21
|
+
def __init__(self, model=None, lang="yue"):
|
|
22
|
+
model = "FunAudioLLM/SenseVoiceSmall"
|
|
23
|
+
|
|
24
|
+
language = pycountry.languages.get(alpha_3=lang).name
|
|
25
|
+
|
|
26
|
+
self.__funaudio = FunAudioEngine(model, lang="yue")
|
|
27
|
+
self.__lang = lang
|
|
28
|
+
|
|
29
|
+
def process(self, doc, **kwargs):
|
|
30
|
+
# bring language code into the stack to access
|
|
31
|
+
lang = doc.langs[0]
|
|
32
|
+
|
|
33
|
+
# check and if there are existing utterance timings, warn
|
|
34
|
+
if any([i.alignment for i in doc.content if isinstance(i, Utterance)]):
|
|
35
|
+
warnings.warn(f"We found existing utterance timings in the document with {doc.media.url}! Skipping rough utterance alignment.")
|
|
36
|
+
return doc
|
|
37
|
+
|
|
38
|
+
f = kwargs.get("extra_info", {}).get("extra_input")
|
|
39
|
+
|
|
40
|
+
if not f:
|
|
41
|
+
assert doc.media != None and doc.media.url != None, f"We cannot add utterance timings to something that doesn't have a media path! Provided media tier='{doc.media}'"
|
|
42
|
+
|
|
43
|
+
f = f if f else doc.media.url
|
|
44
|
+
|
|
45
|
+
res = self.__funaudio.generate(
|
|
46
|
+
audio_file_path=doc.media.url
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
element_lit = res.content
|
|
50
|
+
res_content = []
|
|
51
|
+
for item in element_lit:
|
|
52
|
+
text = item[0]
|
|
53
|
+
text = str(text)
|
|
54
|
+
match = re.search(r"text='(.*?)'.*time=\((\d+),\s*(\d+)\)", text)
|
|
55
|
+
if match:
|
|
56
|
+
text = match.group(1)
|
|
57
|
+
start_ms = int(match.group(2))
|
|
58
|
+
end_ms = int(match.group(3))
|
|
59
|
+
print(type(end_ms))
|
|
60
|
+
|
|
61
|
+
res_content.append({
|
|
62
|
+
"value": text,
|
|
63
|
+
"ts": start_ms / 1000.0,
|
|
64
|
+
"end_ts": end_ms / 1000.0
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
turns = []
|
|
69
|
+
turns.append({
|
|
70
|
+
"elements": res_content,
|
|
71
|
+
"speaker": "unknown"
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
element = {"monologues": turns}
|
|
75
|
+
|
|
76
|
+
return bulletize_doc(element, doc)
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/realtime_meeting.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/speech_recognizer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/speech_synthesizer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/speech_transcriber.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/stream_input_tts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_cookiejar.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_exceptions.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_handshake.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_logging.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_socket.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_ssl_compat.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/_utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/test_app.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/extern/nls/websocket/tests/test_url.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/models/utterance/cantonese_infer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/parse_support.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/cleanup/support/test.test
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/pipelines/utterance/ud_utterance.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_file.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post14 → batchalignhk-0.7.21}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|