BatchalignHK 0.7.19.post19__tar.gz → 0.7.19.post21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/BatchalignHK.egg-info/PKG-INFO +3 -1
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/BatchalignHK.egg-info/SOURCES.txt +34 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/BatchalignHK.egg-info/requires.txt +2 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/PKG-INFO +3 -1
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/cli/cli.py +4 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/__init__.py +10 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/core.py +183 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/exception.py +31 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/logging.py +65 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/realtime_meeting.py +321 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/speech_recognizer.py +315 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/speech_synthesizer.py +288 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/speech_transcriber.py +375 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/stream_input_tts.py +439 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/token.py +49 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/util.py +44 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/version.py +2 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/__init__.py +26 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_abnf.py +423 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_app.py +426 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_cookiejar.py +67 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_core.py +607 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_exceptions.py +84 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_handshake.py +200 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_http.py +335 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_logging.py +90 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_socket.py +182 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_ssl_compat.py +44 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_url.py +176 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/_utils.py +104 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/tests/echo-server.py +21 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/tests/test_abnf.py +89 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/tests/test_app.py +179 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/tests/test_cookiejar.py +119 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/tests/test_http.py +177 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/tests/test_url.py +301 -0
- batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket/tests/test_websocket.py +458 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/__init__.py +2 -1
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/asr/__init__.py +1 -0
- batchalignhk-0.7.19.post21/batchalign/pipelines/asr/aliyun.py +253 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/dispatch.py +4 -1
- batchalignhk-0.7.19.post21/batchalign/tests/__init__.py +0 -0
- batchalignhk-0.7.19.post21/batchalign/version +3 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/setup.py +2 -0
- batchalignhk-0.7.19.post19/batchalign/version +0 -3
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/LICENSE +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/MANIFEST.in +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/README.md +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/document.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.19.post19/batchalign → batchalignhk-0.7.19.post21/batchalign/extern/nls/websocket}/tests/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/asr/tencent.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/utr/tencent_utr.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.19.post19 → batchalignhk-0.7.19.post21}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.19.
|
|
3
|
+
Version: 0.7.19.post21
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -37,6 +37,8 @@ Requires-Dist: sentencepiece
|
|
|
37
37
|
Requires-Dist: tencentcloud-sdk-python-common
|
|
38
38
|
Requires-Dist: tencentcloud-sdk-python-asr
|
|
39
39
|
Requires-Dist: googletrans
|
|
40
|
+
Requires-Dist: aliyun-python-sdk-core>=2.13.3
|
|
41
|
+
Requires-Dist: oss2
|
|
40
42
|
Requires-Dist: openai-whisper>=20240930
|
|
41
43
|
Provides-Extra: dev
|
|
42
44
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -17,6 +17,39 @@ batchalign/version
|
|
|
17
17
|
batchalign/cli/__init__.py
|
|
18
18
|
batchalign/cli/cli.py
|
|
19
19
|
batchalign/cli/dispatch.py
|
|
20
|
+
batchalign/extern/nls/__init__.py
|
|
21
|
+
batchalign/extern/nls/core.py
|
|
22
|
+
batchalign/extern/nls/exception.py
|
|
23
|
+
batchalign/extern/nls/logging.py
|
|
24
|
+
batchalign/extern/nls/realtime_meeting.py
|
|
25
|
+
batchalign/extern/nls/speech_recognizer.py
|
|
26
|
+
batchalign/extern/nls/speech_synthesizer.py
|
|
27
|
+
batchalign/extern/nls/speech_transcriber.py
|
|
28
|
+
batchalign/extern/nls/stream_input_tts.py
|
|
29
|
+
batchalign/extern/nls/token.py
|
|
30
|
+
batchalign/extern/nls/util.py
|
|
31
|
+
batchalign/extern/nls/version.py
|
|
32
|
+
batchalign/extern/nls/websocket/__init__.py
|
|
33
|
+
batchalign/extern/nls/websocket/_abnf.py
|
|
34
|
+
batchalign/extern/nls/websocket/_app.py
|
|
35
|
+
batchalign/extern/nls/websocket/_cookiejar.py
|
|
36
|
+
batchalign/extern/nls/websocket/_core.py
|
|
37
|
+
batchalign/extern/nls/websocket/_exceptions.py
|
|
38
|
+
batchalign/extern/nls/websocket/_handshake.py
|
|
39
|
+
batchalign/extern/nls/websocket/_http.py
|
|
40
|
+
batchalign/extern/nls/websocket/_logging.py
|
|
41
|
+
batchalign/extern/nls/websocket/_socket.py
|
|
42
|
+
batchalign/extern/nls/websocket/_ssl_compat.py
|
|
43
|
+
batchalign/extern/nls/websocket/_url.py
|
|
44
|
+
batchalign/extern/nls/websocket/_utils.py
|
|
45
|
+
batchalign/extern/nls/websocket/tests/__init__.py
|
|
46
|
+
batchalign/extern/nls/websocket/tests/echo-server.py
|
|
47
|
+
batchalign/extern/nls/websocket/tests/test_abnf.py
|
|
48
|
+
batchalign/extern/nls/websocket/tests/test_app.py
|
|
49
|
+
batchalign/extern/nls/websocket/tests/test_cookiejar.py
|
|
50
|
+
batchalign/extern/nls/websocket/tests/test_http.py
|
|
51
|
+
batchalign/extern/nls/websocket/tests/test_url.py
|
|
52
|
+
batchalign/extern/nls/websocket/tests/test_websocket.py
|
|
20
53
|
batchalign/formats/__init__.py
|
|
21
54
|
batchalign/formats/base.py
|
|
22
55
|
batchalign/formats/chat/__init__.py
|
|
@@ -58,6 +91,7 @@ batchalign/pipelines/pipeline.py
|
|
|
58
91
|
batchalign/pipelines/analysis/__init__.py
|
|
59
92
|
batchalign/pipelines/analysis/eval.py
|
|
60
93
|
batchalign/pipelines/asr/__init__.py
|
|
94
|
+
batchalign/pipelines/asr/aliyun.py
|
|
61
95
|
batchalign/pipelines/asr/num2chinese.py
|
|
62
96
|
batchalign/pipelines/asr/oai_whisper.py
|
|
63
97
|
batchalign/pipelines/asr/rev.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.19.
|
|
3
|
+
Version: 0.7.19.post21
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -37,6 +37,8 @@ Requires-Dist: sentencepiece
|
|
|
37
37
|
Requires-Dist: tencentcloud-sdk-python-common
|
|
38
38
|
Requires-Dist: tencentcloud-sdk-python-asr
|
|
39
39
|
Requires-Dist: googletrans
|
|
40
|
+
Requires-Dist: aliyun-python-sdk-core>=2.13.3
|
|
41
|
+
Requires-Dist: oss2
|
|
40
42
|
Requires-Dist: openai-whisper>=20240930
|
|
41
43
|
Provides-Extra: dev
|
|
42
44
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -160,6 +160,8 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, tencent, **kwargs):
|
|
|
160
160
|
default=False, help="Use Tencent instead of Rev.AI (default).")
|
|
161
161
|
@click.option("--whisperx/--rev",
|
|
162
162
|
default=False, help="Use WhisperX instead of Rev.AI (default). Superceeds --whisper.")
|
|
163
|
+
@click.option("--alibaba/--rev",
|
|
164
|
+
default=False, help="Use Alibaba instead of Rev.AI (default). Superceeds --whisper.")
|
|
163
165
|
@click.option("--diarize/--nodiarize",
|
|
164
166
|
default=False, help="Perform speaker diarization (this flag is ignored with Rev.AI)")
|
|
165
167
|
@click.option("--wor/--nowor",
|
|
@@ -188,6 +190,8 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
188
190
|
asr = "tencent"
|
|
189
191
|
if kwargs["whisper_oai"]:
|
|
190
192
|
asr = "whisper_oai"
|
|
193
|
+
if kwargs["alibaba"]:
|
|
194
|
+
asr = "aliyun"
|
|
191
195
|
|
|
192
196
|
def writer(doc, output):
|
|
193
197
|
doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
from .logging import *
|
|
4
|
+
from .speech_recognizer import *
|
|
5
|
+
from .speech_transcriber import *
|
|
6
|
+
from .speech_synthesizer import *
|
|
7
|
+
from .stream_input_tts import *
|
|
8
|
+
from .realtime_meeting import *
|
|
9
|
+
from .util import *
|
|
10
|
+
from .version import __version__
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
from enum import Enum, unique
|
|
7
|
+
from queue import Queue
|
|
8
|
+
|
|
9
|
+
from . import logging, token, websocket
|
|
10
|
+
from .exception import InvalidParameter, ConnectionTimeout, ConnectionUnavailable
|
|
11
|
+
|
|
12
|
+
__URL__ = 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1'
|
|
13
|
+
__HEADER__ = [
|
|
14
|
+
'Sec-WebSocket-Key: x3JJHMbDL1EzLkh9GBhXDw==',
|
|
15
|
+
'Sec-WebSocket-Version: 13',
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
__FORMAT__ = '%(asctime)s - %(levelname)s - %(message)s'
|
|
19
|
+
#__all__ = ['NlsCore']
|
|
20
|
+
|
|
21
|
+
def core_on_msg(ws, message, args):
|
|
22
|
+
logging.debug('core_on_msg:{}'.format(message))
|
|
23
|
+
if not args:
|
|
24
|
+
logging.error('callback core_on_msg with null args')
|
|
25
|
+
return
|
|
26
|
+
nls = args[0]
|
|
27
|
+
nls._NlsCore__issue_callback('on_message', [message])
|
|
28
|
+
|
|
29
|
+
def core_on_error(ws, message, args):
|
|
30
|
+
logging.debug('core_on_error:{}'.format(message))
|
|
31
|
+
if not args:
|
|
32
|
+
logging.error('callback core_on_error with null args')
|
|
33
|
+
return
|
|
34
|
+
nls = args[0]
|
|
35
|
+
nls._NlsCore__issue_callback('on_error', [message])
|
|
36
|
+
|
|
37
|
+
def core_on_close(ws, close_status_code, close_msg, args):
|
|
38
|
+
logging.debug('core_on_close')
|
|
39
|
+
if not args:
|
|
40
|
+
logging.error('callback core_on_close with null args')
|
|
41
|
+
return
|
|
42
|
+
nls = args[0]
|
|
43
|
+
nls._NlsCore__issue_callback('on_close')
|
|
44
|
+
|
|
45
|
+
def core_on_open(ws, args):
|
|
46
|
+
logging.debug('core_on_open:{}'.format(args))
|
|
47
|
+
if not args:
|
|
48
|
+
logging.debug('callback with null args')
|
|
49
|
+
ws.close()
|
|
50
|
+
elif len(args) != 2:
|
|
51
|
+
logging.debug('callback args not 2')
|
|
52
|
+
ws.close()
|
|
53
|
+
nls = args[0]
|
|
54
|
+
nls._NlsCore__notify_on_open()
|
|
55
|
+
nls.start(args[1], nls._NlsCore__ping_interval, nls._NlsCore__ping_timeout)
|
|
56
|
+
nls._NlsCore__issue_callback('on_open')
|
|
57
|
+
|
|
58
|
+
def core_on_data(ws, data, opcode, flag, args):
|
|
59
|
+
logging.debug('core_on_data opcode={}'.format(opcode))
|
|
60
|
+
if not args:
|
|
61
|
+
logging.error('callback core_on_data with null args')
|
|
62
|
+
return
|
|
63
|
+
nls = args[0]
|
|
64
|
+
nls._NlsCore__issue_callback('on_data', [data, opcode, flag])
|
|
65
|
+
|
|
66
|
+
@unique
|
|
67
|
+
class NlsConnectionStatus(Enum):
|
|
68
|
+
Disconnected = 0
|
|
69
|
+
Connected = 1
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class NlsCore:
|
|
73
|
+
"""
|
|
74
|
+
NlsCore
|
|
75
|
+
"""
|
|
76
|
+
def __init__(self,
|
|
77
|
+
url=__URL__,
|
|
78
|
+
token=None,
|
|
79
|
+
on_open=None, on_message=None, on_close=None,
|
|
80
|
+
on_error=None, on_data=None, asynch=False, callback_args=[]):
|
|
81
|
+
self.__url = url
|
|
82
|
+
self.__async = asynch
|
|
83
|
+
if not token:
|
|
84
|
+
raise InvalidParameter('Must provide a valid token!')
|
|
85
|
+
else:
|
|
86
|
+
self.__token = token
|
|
87
|
+
self.__callbacks = {}
|
|
88
|
+
if on_open:
|
|
89
|
+
self.__callbacks['on_open'] = on_open
|
|
90
|
+
if on_message:
|
|
91
|
+
self.__callbacks['on_message'] = on_message
|
|
92
|
+
if on_close:
|
|
93
|
+
self.__callbacks['on_close'] = on_close
|
|
94
|
+
if on_error:
|
|
95
|
+
self.__callbacks['on_error'] = on_error
|
|
96
|
+
if on_data:
|
|
97
|
+
self.__callbacks['on_data'] = on_data
|
|
98
|
+
if not on_open and not on_message and not on_close and not on_error:
|
|
99
|
+
raise InvalidParameter('Must provide at least one callback')
|
|
100
|
+
logging.debug('callback args:{}'.format(callback_args))
|
|
101
|
+
self.__callback_args = callback_args
|
|
102
|
+
self.__header = __HEADER__ + ['X-NLS-Token: {}'.format(self.__token)]
|
|
103
|
+
websocket.enableTrace(True)
|
|
104
|
+
self.__ws = websocket.WebSocketApp(self.__url,
|
|
105
|
+
self.__header,
|
|
106
|
+
on_message=core_on_msg,
|
|
107
|
+
on_data=core_on_data,
|
|
108
|
+
on_error=core_on_error,
|
|
109
|
+
on_close=core_on_close,
|
|
110
|
+
callback_args=[self])
|
|
111
|
+
self.__ws.on_open = core_on_open
|
|
112
|
+
self.__lock = threading.Lock()
|
|
113
|
+
self.__cond = threading.Condition()
|
|
114
|
+
self.__connection_status = NlsConnectionStatus.Disconnected
|
|
115
|
+
|
|
116
|
+
def start(self, msg, ping_interval, ping_timeout):
|
|
117
|
+
self.__lock.acquire()
|
|
118
|
+
self.__ping_interval = ping_interval
|
|
119
|
+
self.__ping_timeout = ping_timeout
|
|
120
|
+
if self.__connection_status == NlsConnectionStatus.Disconnected:
|
|
121
|
+
self.__ws.update_args(self, msg)
|
|
122
|
+
self.__lock.release()
|
|
123
|
+
self.__connect_before_start(ping_interval, ping_timeout)
|
|
124
|
+
else:
|
|
125
|
+
self.__lock.release()
|
|
126
|
+
self.__ws.send(msg)
|
|
127
|
+
|
|
128
|
+
def __notify_on_open(self):
|
|
129
|
+
logging.debug('notify on open')
|
|
130
|
+
with self.__cond:
|
|
131
|
+
self.__connection_status = NlsConnectionStatus.Connected
|
|
132
|
+
self.__cond.notify()
|
|
133
|
+
|
|
134
|
+
def __issue_callback(self, which, exargs=[]):
|
|
135
|
+
if which not in self.__callbacks:
|
|
136
|
+
logging.error('no such callback:{}'.format(which))
|
|
137
|
+
return
|
|
138
|
+
if which == 'on_close':
|
|
139
|
+
with self.__cond:
|
|
140
|
+
self.__connection_status = NlsConnectionStatus.Disconnected
|
|
141
|
+
self.__cond.notify()
|
|
142
|
+
args = exargs+self.__callback_args
|
|
143
|
+
self.__callbacks[which](*args)
|
|
144
|
+
|
|
145
|
+
def send(self, msg, binary):
|
|
146
|
+
self.__lock.acquire()
|
|
147
|
+
if self.__connection_status == NlsConnectionStatus.Disconnected:
|
|
148
|
+
self.__lock.release()
|
|
149
|
+
logging.error('start before send')
|
|
150
|
+
raise ConnectionUnavailable('Must call start before send!')
|
|
151
|
+
else:
|
|
152
|
+
self.__lock.release()
|
|
153
|
+
if binary:
|
|
154
|
+
self.__ws.send(msg, opcode=websocket.ABNF.OPCODE_BINARY)
|
|
155
|
+
else:
|
|
156
|
+
logging.debug('send {}'.format(msg))
|
|
157
|
+
self.__ws.send(msg)
|
|
158
|
+
|
|
159
|
+
def shutdown(self):
|
|
160
|
+
self.__ws.close()
|
|
161
|
+
|
|
162
|
+
def __run(self, ping_interval, ping_timeout):
|
|
163
|
+
logging.debug('ws run...')
|
|
164
|
+
self.__ws.run_forever(ping_interval=ping_interval,
|
|
165
|
+
ping_timeout=ping_timeout)
|
|
166
|
+
with self.__lock:
|
|
167
|
+
self.__connection_status = NlsConnectionStatus.Disconnected
|
|
168
|
+
logging.debug('ws exit...')
|
|
169
|
+
|
|
170
|
+
def __connect_before_start(self, ping_interval, ping_timeout):
|
|
171
|
+
with self.__cond:
|
|
172
|
+
self.__th = threading.Thread(target=self.__run,
|
|
173
|
+
args=[ping_interval, ping_timeout])
|
|
174
|
+
self.__th.start()
|
|
175
|
+
if self.__connection_status == NlsConnectionStatus.Disconnected:
|
|
176
|
+
logging.debug('wait cond wakeup')
|
|
177
|
+
if not self.__async:
|
|
178
|
+
if self.__cond.wait(timeout=10):
|
|
179
|
+
logging.debug('wakeup without timeout')
|
|
180
|
+
return self.__connection_status == NlsConnectionStatus.Connected
|
|
181
|
+
else:
|
|
182
|
+
logging.debug('wakeup with timeout')
|
|
183
|
+
raise ConnectionTimeout('Wait response timeout! Please check local network!')
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class InvalidParameter(Exception):
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
# Token
|
|
8
|
+
class GetTokenFailed(Exception):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
# Connection
|
|
12
|
+
class ConnectionTimeout(Exception):
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
class ConnectionUnavailable(Exception):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
class StartTimeoutException(Exception):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
class StopTimeoutException(Exception):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
class NotStartException(Exception):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
class CompleteTimeoutException(Exception):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
class WrongStateException(Exception):
|
|
31
|
+
pass
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
_logger = logging.getLogger('nls')
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from logging import NullHandler
|
|
9
|
+
except ImportError:
|
|
10
|
+
class NullHandler(logging.Handler):
|
|
11
|
+
def emit(self, record):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
_logger.addHandler(NullHandler())
|
|
15
|
+
_traceEnabled = False
|
|
16
|
+
__LOG_FORMAT__ = '%(asctime)s - %(levelname)s - %(message)s'
|
|
17
|
+
|
|
18
|
+
__all__=['enableTrace', 'dump', 'error', 'warning', 'debug', 'trace',
|
|
19
|
+
'isEnabledForError', 'isEnabledForDebug', 'isEnabledForTrace']
|
|
20
|
+
|
|
21
|
+
def enableTrace(traceable, handler=logging.StreamHandler()):
|
|
22
|
+
"""
|
|
23
|
+
enable log print
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
traceable: bool
|
|
28
|
+
whether enable log print, default log level is logging.DEBUG
|
|
29
|
+
handler: Handler object
|
|
30
|
+
handle how to print out log, default to stdio
|
|
31
|
+
"""
|
|
32
|
+
global _traceEnabled
|
|
33
|
+
_traceEnabled = traceable
|
|
34
|
+
if traceable:
|
|
35
|
+
_logger.addHandler(handler)
|
|
36
|
+
_logger.setLevel(logging.DEBUG)
|
|
37
|
+
handler.setFormatter(logging.Formatter(__LOG_FORMAT__))
|
|
38
|
+
|
|
39
|
+
def dump(title, message):
|
|
40
|
+
if _traceEnabled:
|
|
41
|
+
_logger.debug('### ' + title + ' ###')
|
|
42
|
+
_logger.debug(message)
|
|
43
|
+
_logger.debug('########################################')
|
|
44
|
+
|
|
45
|
+
def error(msg):
|
|
46
|
+
_logger.error(msg)
|
|
47
|
+
|
|
48
|
+
def warning(msg):
|
|
49
|
+
_logger.warning(msg)
|
|
50
|
+
|
|
51
|
+
def debug(msg):
|
|
52
|
+
_logger.debug(msg)
|
|
53
|
+
|
|
54
|
+
def trace(msg):
|
|
55
|
+
if _traceEnabled:
|
|
56
|
+
_logger.debug(msg)
|
|
57
|
+
|
|
58
|
+
def isEnabledForError():
|
|
59
|
+
return _logger.isEnabledFor(logging.ERROR)
|
|
60
|
+
|
|
61
|
+
def isEnabledForDebug():
|
|
62
|
+
return _logger.isEnabledFor(logging.Debug)
|
|
63
|
+
|
|
64
|
+
def isEnabledForTrace():
|
|
65
|
+
return _traceEnabled
|