BatchalignHK 0.7.19.post18__tar.gz → 0.7.19.post20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/PKG-INFO +4 -2
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/SOURCES.txt +34 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/requires.txt +2 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/PKG-INFO +4 -2
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/README.md +1 -1
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/cli/cli.py +4 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/__init__.py +10 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/core.py +183 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/exception.py +31 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/logging.py +65 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/realtime_meeting.py +321 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/speech_recognizer.py +315 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/speech_synthesizer.py +288 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/speech_transcriber.py +375 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/stream_input_tts.py +439 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/token.py +49 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/util.py +44 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/version.py +2 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/__init__.py +26 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_abnf.py +423 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_app.py +426 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_cookiejar.py +67 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_core.py +607 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_exceptions.py +84 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_handshake.py +200 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_http.py +335 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_logging.py +90 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_socket.py +182 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_ssl_compat.py +44 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_url.py +176 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/_utils.py +104 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/echo-server.py +21 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_abnf.py +89 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_app.py +179 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_cookiejar.py +119 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_http.py +177 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_url.py +301 -0
- batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket/tests/test_websocket.py +458 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/__init__.py +2 -1
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/__init__.py +1 -0
- batchalignhk-0.7.19.post20/batchalign/pipelines/asr/aliyun.py +254 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/utils.py +1 -1
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/dispatch.py +4 -1
- batchalignhk-0.7.19.post20/batchalign/tests/__init__.py +0 -0
- batchalignhk-0.7.19.post20/batchalign/version +3 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/setup.py +2 -0
- batchalignhk-0.7.19.post18/batchalign/version +0 -3
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/LICENSE +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/MANIFEST.in +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/document.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.19.post18/batchalign → batchalignhk-0.7.19.post20/batchalign/extern/nls/websocket}/tests/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/tencent.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/tencent_utr.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.19.post18 → batchalignhk-0.7.19.post20}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.19.
|
|
3
|
+
Version: 0.7.19.post20
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -37,6 +37,8 @@ Requires-Dist: sentencepiece
|
|
|
37
37
|
Requires-Dist: tencentcloud-sdk-python-common
|
|
38
38
|
Requires-Dist: tencentcloud-sdk-python-asr
|
|
39
39
|
Requires-Dist: googletrans
|
|
40
|
+
Requires-Dist: aliyun-python-sdk-core>=2.13.3
|
|
41
|
+
Requires-Dist: oss2
|
|
40
42
|
Requires-Dist: openai-whisper>=20240930
|
|
41
43
|
Provides-Extra: dev
|
|
42
44
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -64,7 +66,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
|
64
66
|
|
|
65
67
|
## Quick Start
|
|
66
68
|
|
|
67
|
-
The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/
|
|
69
|
+
The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/0info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/0info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
|
|
68
70
|
|
|
69
71
|
### Install and Update the Package
|
|
70
72
|
Batchalign is on PyPi (as `batchalign`). We recommend the use of UV to install Batchalign:
|
|
@@ -17,6 +17,39 @@ batchalign/version
|
|
|
17
17
|
batchalign/cli/__init__.py
|
|
18
18
|
batchalign/cli/cli.py
|
|
19
19
|
batchalign/cli/dispatch.py
|
|
20
|
+
batchalign/extern/nls/__init__.py
|
|
21
|
+
batchalign/extern/nls/core.py
|
|
22
|
+
batchalign/extern/nls/exception.py
|
|
23
|
+
batchalign/extern/nls/logging.py
|
|
24
|
+
batchalign/extern/nls/realtime_meeting.py
|
|
25
|
+
batchalign/extern/nls/speech_recognizer.py
|
|
26
|
+
batchalign/extern/nls/speech_synthesizer.py
|
|
27
|
+
batchalign/extern/nls/speech_transcriber.py
|
|
28
|
+
batchalign/extern/nls/stream_input_tts.py
|
|
29
|
+
batchalign/extern/nls/token.py
|
|
30
|
+
batchalign/extern/nls/util.py
|
|
31
|
+
batchalign/extern/nls/version.py
|
|
32
|
+
batchalign/extern/nls/websocket/__init__.py
|
|
33
|
+
batchalign/extern/nls/websocket/_abnf.py
|
|
34
|
+
batchalign/extern/nls/websocket/_app.py
|
|
35
|
+
batchalign/extern/nls/websocket/_cookiejar.py
|
|
36
|
+
batchalign/extern/nls/websocket/_core.py
|
|
37
|
+
batchalign/extern/nls/websocket/_exceptions.py
|
|
38
|
+
batchalign/extern/nls/websocket/_handshake.py
|
|
39
|
+
batchalign/extern/nls/websocket/_http.py
|
|
40
|
+
batchalign/extern/nls/websocket/_logging.py
|
|
41
|
+
batchalign/extern/nls/websocket/_socket.py
|
|
42
|
+
batchalign/extern/nls/websocket/_ssl_compat.py
|
|
43
|
+
batchalign/extern/nls/websocket/_url.py
|
|
44
|
+
batchalign/extern/nls/websocket/_utils.py
|
|
45
|
+
batchalign/extern/nls/websocket/tests/__init__.py
|
|
46
|
+
batchalign/extern/nls/websocket/tests/echo-server.py
|
|
47
|
+
batchalign/extern/nls/websocket/tests/test_abnf.py
|
|
48
|
+
batchalign/extern/nls/websocket/tests/test_app.py
|
|
49
|
+
batchalign/extern/nls/websocket/tests/test_cookiejar.py
|
|
50
|
+
batchalign/extern/nls/websocket/tests/test_http.py
|
|
51
|
+
batchalign/extern/nls/websocket/tests/test_url.py
|
|
52
|
+
batchalign/extern/nls/websocket/tests/test_websocket.py
|
|
20
53
|
batchalign/formats/__init__.py
|
|
21
54
|
batchalign/formats/base.py
|
|
22
55
|
batchalign/formats/chat/__init__.py
|
|
@@ -58,6 +91,7 @@ batchalign/pipelines/pipeline.py
|
|
|
58
91
|
batchalign/pipelines/analysis/__init__.py
|
|
59
92
|
batchalign/pipelines/analysis/eval.py
|
|
60
93
|
batchalign/pipelines/asr/__init__.py
|
|
94
|
+
batchalign/pipelines/asr/aliyun.py
|
|
61
95
|
batchalign/pipelines/asr/num2chinese.py
|
|
62
96
|
batchalign/pipelines/asr/oai_whisper.py
|
|
63
97
|
batchalign/pipelines/asr/rev.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.19.
|
|
3
|
+
Version: 0.7.19.post20
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -37,6 +37,8 @@ Requires-Dist: sentencepiece
|
|
|
37
37
|
Requires-Dist: tencentcloud-sdk-python-common
|
|
38
38
|
Requires-Dist: tencentcloud-sdk-python-asr
|
|
39
39
|
Requires-Dist: googletrans
|
|
40
|
+
Requires-Dist: aliyun-python-sdk-core>=2.13.3
|
|
41
|
+
Requires-Dist: oss2
|
|
40
42
|
Requires-Dist: openai-whisper>=20240930
|
|
41
43
|
Provides-Extra: dev
|
|
42
44
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -64,7 +66,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
|
64
66
|
|
|
65
67
|
## Quick Start
|
|
66
68
|
|
|
67
|
-
The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/
|
|
69
|
+
The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/0info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/0info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
|
|
68
70
|
|
|
69
71
|
### Install and Update the Package
|
|
70
72
|
Batchalign is on PyPi (as `batchalign`). We recommend the use of UV to install Batchalign:
|
|
@@ -8,7 +8,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
|
8
8
|
|
|
9
9
|
## Quick Start
|
|
10
10
|
|
|
11
|
-
The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/
|
|
11
|
+
The following instructions provide a quick start to installing Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/0info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/0info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
|
|
12
12
|
|
|
13
13
|
### Install and Update the Package
|
|
14
14
|
Batchalign is on PyPi (as `batchalign`). We recommend the use of UV to install Batchalign:
|
|
@@ -160,6 +160,8 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, tencent, **kwargs):
|
|
|
160
160
|
default=False, help="Use Tencent instead of Rev.AI (default).")
|
|
161
161
|
@click.option("--whisperx/--rev",
|
|
162
162
|
default=False, help="Use WhisperX instead of Rev.AI (default). Superceeds --whisper.")
|
|
163
|
+
@click.option("--alibaba/--rev",
|
|
164
|
+
default=False, help="Use Alibaba instead of Rev.AI (default). Superceeds --whisper.")
|
|
163
165
|
@click.option("--diarize/--nodiarize",
|
|
164
166
|
default=False, help="Perform speaker diarization (this flag is ignored with Rev.AI)")
|
|
165
167
|
@click.option("--wor/--nowor",
|
|
@@ -188,6 +190,8 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
188
190
|
asr = "tencent"
|
|
189
191
|
if kwargs["whisper_oai"]:
|
|
190
192
|
asr = "whisper_oai"
|
|
193
|
+
if kwargs["alibaba"]:
|
|
194
|
+
asr = "aliyun"
|
|
191
195
|
|
|
192
196
|
def writer(doc, output):
|
|
193
197
|
doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
from .logging import *
|
|
4
|
+
from .speech_recognizer import *
|
|
5
|
+
from .speech_transcriber import *
|
|
6
|
+
from .speech_synthesizer import *
|
|
7
|
+
from .stream_input_tts import *
|
|
8
|
+
from .realtime_meeting import *
|
|
9
|
+
from .util import *
|
|
10
|
+
from .version import __version__
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
|
|
6
|
+
from enum import Enum, unique
|
|
7
|
+
from queue import Queue
|
|
8
|
+
|
|
9
|
+
from . import logging, token, websocket
|
|
10
|
+
from .exception import InvalidParameter, ConnectionTimeout, ConnectionUnavailable
|
|
11
|
+
|
|
12
|
+
__URL__ = 'wss://nls-gateway.cn-shanghai.aliyuncs.com/ws/v1'
|
|
13
|
+
__HEADER__ = [
|
|
14
|
+
'Sec-WebSocket-Key: x3JJHMbDL1EzLkh9GBhXDw==',
|
|
15
|
+
'Sec-WebSocket-Version: 13',
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
__FORMAT__ = '%(asctime)s - %(levelname)s - %(message)s'
|
|
19
|
+
#__all__ = ['NlsCore']
|
|
20
|
+
|
|
21
|
+
def core_on_msg(ws, message, args):
|
|
22
|
+
logging.debug('core_on_msg:{}'.format(message))
|
|
23
|
+
if not args:
|
|
24
|
+
logging.error('callback core_on_msg with null args')
|
|
25
|
+
return
|
|
26
|
+
nls = args[0]
|
|
27
|
+
nls._NlsCore__issue_callback('on_message', [message])
|
|
28
|
+
|
|
29
|
+
def core_on_error(ws, message, args):
|
|
30
|
+
logging.debug('core_on_error:{}'.format(message))
|
|
31
|
+
if not args:
|
|
32
|
+
logging.error('callback core_on_error with null args')
|
|
33
|
+
return
|
|
34
|
+
nls = args[0]
|
|
35
|
+
nls._NlsCore__issue_callback('on_error', [message])
|
|
36
|
+
|
|
37
|
+
def core_on_close(ws, close_status_code, close_msg, args):
|
|
38
|
+
logging.debug('core_on_close')
|
|
39
|
+
if not args:
|
|
40
|
+
logging.error('callback core_on_close with null args')
|
|
41
|
+
return
|
|
42
|
+
nls = args[0]
|
|
43
|
+
nls._NlsCore__issue_callback('on_close')
|
|
44
|
+
|
|
45
|
+
def core_on_open(ws, args):
|
|
46
|
+
logging.debug('core_on_open:{}'.format(args))
|
|
47
|
+
if not args:
|
|
48
|
+
logging.debug('callback with null args')
|
|
49
|
+
ws.close()
|
|
50
|
+
elif len(args) != 2:
|
|
51
|
+
logging.debug('callback args not 2')
|
|
52
|
+
ws.close()
|
|
53
|
+
nls = args[0]
|
|
54
|
+
nls._NlsCore__notify_on_open()
|
|
55
|
+
nls.start(args[1], nls._NlsCore__ping_interval, nls._NlsCore__ping_timeout)
|
|
56
|
+
nls._NlsCore__issue_callback('on_open')
|
|
57
|
+
|
|
58
|
+
def core_on_data(ws, data, opcode, flag, args):
|
|
59
|
+
logging.debug('core_on_data opcode={}'.format(opcode))
|
|
60
|
+
if not args:
|
|
61
|
+
logging.error('callback core_on_data with null args')
|
|
62
|
+
return
|
|
63
|
+
nls = args[0]
|
|
64
|
+
nls._NlsCore__issue_callback('on_data', [data, opcode, flag])
|
|
65
|
+
|
|
66
|
+
@unique
|
|
67
|
+
class NlsConnectionStatus(Enum):
|
|
68
|
+
Disconnected = 0
|
|
69
|
+
Connected = 1
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class NlsCore:
|
|
73
|
+
"""
|
|
74
|
+
NlsCore
|
|
75
|
+
"""
|
|
76
|
+
def __init__(self,
|
|
77
|
+
url=__URL__,
|
|
78
|
+
token=None,
|
|
79
|
+
on_open=None, on_message=None, on_close=None,
|
|
80
|
+
on_error=None, on_data=None, asynch=False, callback_args=[]):
|
|
81
|
+
self.__url = url
|
|
82
|
+
self.__async = asynch
|
|
83
|
+
if not token:
|
|
84
|
+
raise InvalidParameter('Must provide a valid token!')
|
|
85
|
+
else:
|
|
86
|
+
self.__token = token
|
|
87
|
+
self.__callbacks = {}
|
|
88
|
+
if on_open:
|
|
89
|
+
self.__callbacks['on_open'] = on_open
|
|
90
|
+
if on_message:
|
|
91
|
+
self.__callbacks['on_message'] = on_message
|
|
92
|
+
if on_close:
|
|
93
|
+
self.__callbacks['on_close'] = on_close
|
|
94
|
+
if on_error:
|
|
95
|
+
self.__callbacks['on_error'] = on_error
|
|
96
|
+
if on_data:
|
|
97
|
+
self.__callbacks['on_data'] = on_data
|
|
98
|
+
if not on_open and not on_message and not on_close and not on_error:
|
|
99
|
+
raise InvalidParameter('Must provide at least one callback')
|
|
100
|
+
logging.debug('callback args:{}'.format(callback_args))
|
|
101
|
+
self.__callback_args = callback_args
|
|
102
|
+
self.__header = __HEADER__ + ['X-NLS-Token: {}'.format(self.__token)]
|
|
103
|
+
websocket.enableTrace(True)
|
|
104
|
+
self.__ws = websocket.WebSocketApp(self.__url,
|
|
105
|
+
self.__header,
|
|
106
|
+
on_message=core_on_msg,
|
|
107
|
+
on_data=core_on_data,
|
|
108
|
+
on_error=core_on_error,
|
|
109
|
+
on_close=core_on_close,
|
|
110
|
+
callback_args=[self])
|
|
111
|
+
self.__ws.on_open = core_on_open
|
|
112
|
+
self.__lock = threading.Lock()
|
|
113
|
+
self.__cond = threading.Condition()
|
|
114
|
+
self.__connection_status = NlsConnectionStatus.Disconnected
|
|
115
|
+
|
|
116
|
+
def start(self, msg, ping_interval, ping_timeout):
|
|
117
|
+
self.__lock.acquire()
|
|
118
|
+
self.__ping_interval = ping_interval
|
|
119
|
+
self.__ping_timeout = ping_timeout
|
|
120
|
+
if self.__connection_status == NlsConnectionStatus.Disconnected:
|
|
121
|
+
self.__ws.update_args(self, msg)
|
|
122
|
+
self.__lock.release()
|
|
123
|
+
self.__connect_before_start(ping_interval, ping_timeout)
|
|
124
|
+
else:
|
|
125
|
+
self.__lock.release()
|
|
126
|
+
self.__ws.send(msg)
|
|
127
|
+
|
|
128
|
+
def __notify_on_open(self):
|
|
129
|
+
logging.debug('notify on open')
|
|
130
|
+
with self.__cond:
|
|
131
|
+
self.__connection_status = NlsConnectionStatus.Connected
|
|
132
|
+
self.__cond.notify()
|
|
133
|
+
|
|
134
|
+
def __issue_callback(self, which, exargs=[]):
|
|
135
|
+
if which not in self.__callbacks:
|
|
136
|
+
logging.error('no such callback:{}'.format(which))
|
|
137
|
+
return
|
|
138
|
+
if which == 'on_close':
|
|
139
|
+
with self.__cond:
|
|
140
|
+
self.__connection_status = NlsConnectionStatus.Disconnected
|
|
141
|
+
self.__cond.notify()
|
|
142
|
+
args = exargs+self.__callback_args
|
|
143
|
+
self.__callbacks[which](*args)
|
|
144
|
+
|
|
145
|
+
def send(self, msg, binary):
|
|
146
|
+
self.__lock.acquire()
|
|
147
|
+
if self.__connection_status == NlsConnectionStatus.Disconnected:
|
|
148
|
+
self.__lock.release()
|
|
149
|
+
logging.error('start before send')
|
|
150
|
+
raise ConnectionUnavailable('Must call start before send!')
|
|
151
|
+
else:
|
|
152
|
+
self.__lock.release()
|
|
153
|
+
if binary:
|
|
154
|
+
self.__ws.send(msg, opcode=websocket.ABNF.OPCODE_BINARY)
|
|
155
|
+
else:
|
|
156
|
+
logging.debug('send {}'.format(msg))
|
|
157
|
+
self.__ws.send(msg)
|
|
158
|
+
|
|
159
|
+
def shutdown(self):
|
|
160
|
+
self.__ws.close()
|
|
161
|
+
|
|
162
|
+
def __run(self, ping_interval, ping_timeout):
|
|
163
|
+
logging.debug('ws run...')
|
|
164
|
+
self.__ws.run_forever(ping_interval=ping_interval,
|
|
165
|
+
ping_timeout=ping_timeout)
|
|
166
|
+
with self.__lock:
|
|
167
|
+
self.__connection_status = NlsConnectionStatus.Disconnected
|
|
168
|
+
logging.debug('ws exit...')
|
|
169
|
+
|
|
170
|
+
def __connect_before_start(self, ping_interval, ping_timeout):
|
|
171
|
+
with self.__cond:
|
|
172
|
+
self.__th = threading.Thread(target=self.__run,
|
|
173
|
+
args=[ping_interval, ping_timeout])
|
|
174
|
+
self.__th.start()
|
|
175
|
+
if self.__connection_status == NlsConnectionStatus.Disconnected:
|
|
176
|
+
logging.debug('wait cond wakeup')
|
|
177
|
+
if not self.__async:
|
|
178
|
+
if self.__cond.wait(timeout=10):
|
|
179
|
+
logging.debug('wakeup without timeout')
|
|
180
|
+
return self.__connection_status == NlsConnectionStatus.Connected
|
|
181
|
+
else:
|
|
182
|
+
logging.debug('wakeup with timeout')
|
|
183
|
+
raise ConnectionTimeout('Wait response timeout! Please check local network!')
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class InvalidParameter(Exception):
|
|
5
|
+
pass
|
|
6
|
+
|
|
7
|
+
# Token
|
|
8
|
+
class GetTokenFailed(Exception):
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
# Connection
|
|
12
|
+
class ConnectionTimeout(Exception):
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
class ConnectionUnavailable(Exception):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
class StartTimeoutException(Exception):
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
class StopTimeoutException(Exception):
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
class NotStartException(Exception):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
class CompleteTimeoutException(Exception):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
class WrongStateException(Exception):
|
|
31
|
+
pass
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
_logger = logging.getLogger('nls')
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from logging import NullHandler
|
|
9
|
+
except ImportError:
|
|
10
|
+
class NullHandler(logging.Handler):
|
|
11
|
+
def emit(self, record):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
_logger.addHandler(NullHandler())
|
|
15
|
+
_traceEnabled = False
|
|
16
|
+
__LOG_FORMAT__ = '%(asctime)s - %(levelname)s - %(message)s'
|
|
17
|
+
|
|
18
|
+
__all__=['enableTrace', 'dump', 'error', 'warning', 'debug', 'trace',
|
|
19
|
+
'isEnabledForError', 'isEnabledForDebug', 'isEnabledForTrace']
|
|
20
|
+
|
|
21
|
+
def enableTrace(traceable, handler=logging.StreamHandler()):
|
|
22
|
+
"""
|
|
23
|
+
enable log print
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
traceable: bool
|
|
28
|
+
whether enable log print, default log level is logging.DEBUG
|
|
29
|
+
handler: Handler object
|
|
30
|
+
handle how to print out log, default to stdio
|
|
31
|
+
"""
|
|
32
|
+
global _traceEnabled
|
|
33
|
+
_traceEnabled = traceable
|
|
34
|
+
if traceable:
|
|
35
|
+
_logger.addHandler(handler)
|
|
36
|
+
_logger.setLevel(logging.DEBUG)
|
|
37
|
+
handler.setFormatter(logging.Formatter(__LOG_FORMAT__))
|
|
38
|
+
|
|
39
|
+
def dump(title, message):
|
|
40
|
+
if _traceEnabled:
|
|
41
|
+
_logger.debug('### ' + title + ' ###')
|
|
42
|
+
_logger.debug(message)
|
|
43
|
+
_logger.debug('########################################')
|
|
44
|
+
|
|
45
|
+
def error(msg):
|
|
46
|
+
_logger.error(msg)
|
|
47
|
+
|
|
48
|
+
def warning(msg):
|
|
49
|
+
_logger.warning(msg)
|
|
50
|
+
|
|
51
|
+
def debug(msg):
|
|
52
|
+
_logger.debug(msg)
|
|
53
|
+
|
|
54
|
+
def trace(msg):
|
|
55
|
+
if _traceEnabled:
|
|
56
|
+
_logger.debug(msg)
|
|
57
|
+
|
|
58
|
+
def isEnabledForError():
|
|
59
|
+
return _logger.isEnabledFor(logging.ERROR)
|
|
60
|
+
|
|
61
|
+
def isEnabledForDebug():
|
|
62
|
+
return _logger.isEnabledFor(logging.Debug)
|
|
63
|
+
|
|
64
|
+
def isEnabledForTrace():
|
|
65
|
+
return _traceEnabled
|