BatchalignHK 0.7.22.post6__tar.gz → 0.7.22.post7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/PKG-INFO +1 -1
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/PKG-INFO +1 -1
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/file.py +4 -2
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/generator.py +4 -1
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/tencent.py +4 -28
- batchalignhk-0.7.22.post7/batchalign/version +3 -0
- batchalignhk-0.7.22.post6/batchalign/version +0 -3
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/SOURCES.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/requires.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/LICENSE +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/MANIFEST.in +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/README.md +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/cli/cli.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/document.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/core.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/exception.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/logging.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/realtime_meeting.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_recognizer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_synthesizer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_transcriber.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/stream_input_tts.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/token.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/util.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/version.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_abnf.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_app.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_core.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_handshake.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_http.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_logging.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_socket.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_url.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/aliyun.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/funaudio.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/avqi/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/avqi/engine.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/diarization/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/diarization/pyannote.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/dispatch.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/tencent_utr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/utils/compounds.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/setup.cfg +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/setup.py +0 -0
|
@@ -127,8 +127,10 @@ class CHATFile(BaseFormat):
|
|
|
127
127
|
elif str(i).strip() in ENDING_PUNCT:
|
|
128
128
|
continue
|
|
129
129
|
else:
|
|
130
|
-
main.append(generate_chat_utterance(i,
|
|
131
|
-
|
|
130
|
+
main.append(generate_chat_utterance(i,
|
|
131
|
+
special and doc.langs[0] == "eng",
|
|
132
|
+
write_wor=write_wor,
|
|
133
|
+
merge_letters="yue" in doc.langs))
|
|
132
134
|
main.append("@End\n")
|
|
133
135
|
|
|
134
136
|
raw = "\n".join(main)
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/chat/generator.py
RENAMED
|
@@ -11,7 +11,7 @@ import warnings
|
|
|
11
11
|
# document[3].text = None
|
|
12
12
|
# document[3].model_dump()
|
|
13
13
|
|
|
14
|
-
def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=True):
|
|
14
|
+
def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=True, merge_letters=False):
|
|
15
15
|
"""Converts at Utterance to a CHAT string.
|
|
16
16
|
|
|
17
17
|
Parameters
|
|
@@ -30,6 +30,9 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
|
|
|
30
30
|
main_line = str(utterance)
|
|
31
31
|
# last minut ecorrections
|
|
32
32
|
# main_line = re.sub(r"<([\w ]+) \[\/", r"<\1> [/", main_line)
|
|
33
|
+
if merge_letters:
|
|
34
|
+
main_line = re.sub(r"([a-z]) ", r"\1", main_line)
|
|
35
|
+
main_line = re.sub(r"([a-z])([^a-z])", r"\1 \2", main_line)
|
|
33
36
|
main_line = re.sub(r"«", "“", main_line)
|
|
34
37
|
main_line = re.sub(r"»", "”", main_line)
|
|
35
38
|
main_line = re.sub(r"—", "-", main_line)
|
|
@@ -189,40 +189,17 @@ class TencentEngine(BatchalignEngine):
|
|
|
189
189
|
roman_cache = ""
|
|
190
190
|
roman_cache_start = i.StartMs
|
|
191
191
|
roman_cache_end = i.StartMs
|
|
192
|
+
|
|
192
193
|
for j in i.Words:
|
|
193
194
|
word = j.Word
|
|
194
195
|
if self.__lang == "yue":
|
|
195
196
|
word = cc.convert(word)
|
|
196
|
-
|
|
197
197
|
word = self.replace_cantonese_words(word)
|
|
198
|
-
|
|
199
|
-
if self.is_roman(word):
|
|
200
|
-
if roman_cache == "":
|
|
201
|
-
roman_cache_start = (j.OffsetStartMs + start)
|
|
202
|
-
roman_cache = roman_cache + word
|
|
203
|
-
roman_cache_end = (j.OffsetEndMs + start)
|
|
204
|
-
else:
|
|
205
|
-
if roman_cache != "":
|
|
206
|
-
turn.append({
|
|
207
|
-
"type": "text",
|
|
208
|
-
"ts": roman_cache_start / 1000,
|
|
209
|
-
"end_ts": roman_cache_end / 1000,
|
|
210
|
-
"value": roman_cache
|
|
211
|
-
})
|
|
212
|
-
roman_cache = ""
|
|
213
|
-
turn.append({
|
|
214
|
-
"type": "text",
|
|
215
|
-
"ts": (j.OffsetStartMs + start) / 1000,
|
|
216
|
-
"end_ts": (j.OffsetEndMs + start) / 1000,
|
|
217
|
-
"value": word
|
|
218
|
-
})
|
|
219
|
-
|
|
220
|
-
if roman_cache != "":
|
|
221
198
|
turn.append({
|
|
222
199
|
"type": "text",
|
|
223
|
-
"ts":
|
|
224
|
-
"end_ts":
|
|
225
|
-
"value":
|
|
200
|
+
"ts": (j.OffsetStartMs + start) / 1000,
|
|
201
|
+
"end_ts": (j.OffsetEndMs + start) / 1000,
|
|
202
|
+
"value": word
|
|
226
203
|
})
|
|
227
204
|
|
|
228
205
|
turns.append({
|
|
@@ -232,7 +209,6 @@ class TencentEngine(BatchalignEngine):
|
|
|
232
209
|
L.debug(f"Tencent done.")
|
|
233
210
|
|
|
234
211
|
# Extract the text from the small volume parts for translation
|
|
235
|
-
|
|
236
212
|
doc = process_generation({"monologues": turns},
|
|
237
213
|
self.__lang_code,
|
|
238
214
|
utterance_engine=self.__engine)
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/BatchalignHK.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/realtime_meeting.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_recognizer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_synthesizer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/speech_transcriber.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/stream_input_tts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_abnf.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_app.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_core.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_http.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_logging.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_socket.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_url.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/extern/nls/websocket/_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/generator.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/formats/textgrid/parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/speaker/config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/training/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/dataset.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/execute.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/infer.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/utterance/train.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/wave2vec/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/wave2vec/infer_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/infer_asr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/models/whisper/infer_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/analysis/eval.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/funaudio.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/num2chinese.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/oai_whisper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/asr/whisperx.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/avqi/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/cleanup.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/cleanup/retrace.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/diarization/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/diarization/pyannote.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/fa/wave2vec_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/fa/whisper_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/morphosyntax/ud.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/gtrans.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/translate/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/funaudio_utr.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/tencent_utr.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utr/whisper_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/fixures.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post7}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|