BatchalignHK 0.7.22.post6__tar.gz → 0.7.22.post8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/BatchalignHK.egg-info/PKG-INFO +1 -1
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/PKG-INFO +1 -1
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/chat/file.py +4 -2
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/chat/generator.py +4 -1
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/chat/utils.py +1 -1
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/analysis/eval.py +5 -1
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/tencent.py +4 -28
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/utils.py +8 -2
- batchalignhk-0.7.22.post8/batchalign/version +3 -0
- batchalignhk-0.7.22.post6/batchalign/version +0 -3
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/BatchalignHK.egg-info/SOURCES.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/BatchalignHK.egg-info/requires.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/LICENSE +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/MANIFEST.in +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/README.md +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/cli/cli.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/document.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/core.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/exception.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/logging.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/realtime_meeting.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/speech_recognizer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/speech_synthesizer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/speech_transcriber.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/stream_input_tts.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/token.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/util.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/version.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_abnf.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_app.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_core.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_handshake.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_http.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_logging.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_socket.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_url.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/aliyun.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/funaudio.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/avqi/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/avqi/engine.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/diarization/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/diarization/pyannote.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/dispatch.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/tencent_utr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/utils/compounds.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/setup.cfg +0 -0
- {batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/setup.py +0 -0
|
@@ -127,8 +127,10 @@ class CHATFile(BaseFormat):
|
|
|
127
127
|
elif str(i).strip() in ENDING_PUNCT:
|
|
128
128
|
continue
|
|
129
129
|
else:
|
|
130
|
-
main.append(generate_chat_utterance(i,
|
|
131
|
-
|
|
130
|
+
main.append(generate_chat_utterance(i,
|
|
131
|
+
special and doc.langs[0] == "eng",
|
|
132
|
+
write_wor=write_wor,
|
|
133
|
+
merge_letters="yue" in doc.langs))
|
|
132
134
|
main.append("@End\n")
|
|
133
135
|
|
|
134
136
|
raw = "\n".join(main)
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/chat/generator.py
RENAMED
|
@@ -11,7 +11,7 @@ import warnings
|
|
|
11
11
|
# document[3].text = None
|
|
12
12
|
# document[3].model_dump()
|
|
13
13
|
|
|
14
|
-
def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=True):
|
|
14
|
+
def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=True, merge_letters=False):
|
|
15
15
|
"""Converts at Utterance to a CHAT string.
|
|
16
16
|
|
|
17
17
|
Parameters
|
|
@@ -30,6 +30,9 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
|
|
|
30
30
|
main_line = str(utterance)
|
|
31
31
|
# last minut ecorrections
|
|
32
32
|
# main_line = re.sub(r"<([\w ]+) \[\/", r"<\1> [/", main_line)
|
|
33
|
+
if merge_letters:
|
|
34
|
+
main_line = re.sub(r"([a-z]) ", r"\1", main_line)
|
|
35
|
+
main_line = re.sub(r"([a-z])([^a-z])", r"\1 \2", main_line)
|
|
33
36
|
main_line = re.sub(r"«", "“", main_line)
|
|
34
37
|
main_line = re.sub(r"»", "”", main_line)
|
|
35
38
|
main_line = re.sub(r"—", "-", main_line)
|
|
@@ -56,7 +56,7 @@ def chat_parse_mor(mor_str):
|
|
|
56
56
|
lemmas, feats = zip(*[(i[0], "-".join(i[1:])) for i in feats])
|
|
57
57
|
pos = [i[0] for i in mors]
|
|
58
58
|
except:
|
|
59
|
-
raise CHATValidationException(f"mor parser
|
|
59
|
+
raise CHATValidationException(f"mor parser received invalid mor string: '{mor_str}'")
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
mors = []
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/analysis/eval.py
RENAMED
|
@@ -106,11 +106,15 @@ def conform(x):
|
|
|
106
106
|
elif "hadta" == i.strip():
|
|
107
107
|
result.append("had")
|
|
108
108
|
result.append("to")
|
|
109
|
-
elif "eh" == i.strip():
|
|
109
|
+
elif "eh" == i.strip()j:
|
|
110
110
|
result.append("uh")
|
|
111
111
|
elif "kinda" == i.strip():
|
|
112
112
|
result.append("kind")
|
|
113
113
|
result.append("of")
|
|
114
|
+
elif "ed" == i.strip():
|
|
115
|
+
result.append("education")
|
|
116
|
+
elif "til" == i.strip():
|
|
117
|
+
result.append("until")
|
|
114
118
|
elif "gonna" == i.strip():
|
|
115
119
|
result.append("going")
|
|
116
120
|
result.append("to")
|
|
@@ -189,40 +189,17 @@ class TencentEngine(BatchalignEngine):
|
|
|
189
189
|
roman_cache = ""
|
|
190
190
|
roman_cache_start = i.StartMs
|
|
191
191
|
roman_cache_end = i.StartMs
|
|
192
|
+
|
|
192
193
|
for j in i.Words:
|
|
193
194
|
word = j.Word
|
|
194
195
|
if self.__lang == "yue":
|
|
195
196
|
word = cc.convert(word)
|
|
196
|
-
|
|
197
197
|
word = self.replace_cantonese_words(word)
|
|
198
|
-
|
|
199
|
-
if self.is_roman(word):
|
|
200
|
-
if roman_cache == "":
|
|
201
|
-
roman_cache_start = (j.OffsetStartMs + start)
|
|
202
|
-
roman_cache = roman_cache + word
|
|
203
|
-
roman_cache_end = (j.OffsetEndMs + start)
|
|
204
|
-
else:
|
|
205
|
-
if roman_cache != "":
|
|
206
|
-
turn.append({
|
|
207
|
-
"type": "text",
|
|
208
|
-
"ts": roman_cache_start / 1000,
|
|
209
|
-
"end_ts": roman_cache_end / 1000,
|
|
210
|
-
"value": roman_cache
|
|
211
|
-
})
|
|
212
|
-
roman_cache = ""
|
|
213
|
-
turn.append({
|
|
214
|
-
"type": "text",
|
|
215
|
-
"ts": (j.OffsetStartMs + start) / 1000,
|
|
216
|
-
"end_ts": (j.OffsetEndMs + start) / 1000,
|
|
217
|
-
"value": word
|
|
218
|
-
})
|
|
219
|
-
|
|
220
|
-
if roman_cache != "":
|
|
221
198
|
turn.append({
|
|
222
199
|
"type": "text",
|
|
223
|
-
"ts":
|
|
224
|
-
"end_ts":
|
|
225
|
-
"value":
|
|
200
|
+
"ts": (j.OffsetStartMs + start) / 1000,
|
|
201
|
+
"end_ts": (j.OffsetEndMs + start) / 1000,
|
|
202
|
+
"value": word
|
|
226
203
|
})
|
|
227
204
|
|
|
228
205
|
turns.append({
|
|
@@ -232,7 +209,6 @@ class TencentEngine(BatchalignEngine):
|
|
|
232
209
|
L.debug(f"Tencent done.")
|
|
233
210
|
|
|
234
211
|
# Extract the text from the small volume parts for translation
|
|
235
|
-
|
|
236
212
|
doc = process_generation({"monologues": turns},
|
|
237
213
|
self.__lang_code,
|
|
238
214
|
utterance_engine=self.__engine)
|
|
@@ -275,8 +275,14 @@ def process_generation(output, lang="eng", utterance_engine=None):
|
|
|
275
275
|
continue
|
|
276
276
|
if word not in ENDING_PUNCT+MOR_PUNCT:
|
|
277
277
|
word_replaced = word
|
|
278
|
-
if word_replaced.strip() == "
|
|
279
|
-
word_replaced = "
|
|
278
|
+
if word_replaced.strip() == "؟":
|
|
279
|
+
word_replaced = "?"
|
|
280
|
+
elif word_replaced.strip() == "۔":
|
|
281
|
+
word_replaced = "."
|
|
282
|
+
elif word_replaced.strip() == "،":
|
|
283
|
+
word_replaced = ","
|
|
284
|
+
elif word_replaced.strip() == "؛":
|
|
285
|
+
word_replaced = ";"
|
|
280
286
|
|
|
281
287
|
if start == None or end == None:
|
|
282
288
|
words.append(Form(text=word_replaced, time=None))
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/BatchalignHK.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/realtime_meeting.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/speech_recognizer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/speech_synthesizer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/speech_transcriber.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/stream_input_tts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_abnf.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_app.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_core.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_http.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_logging.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_socket.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_url.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/extern/nls/websocket/_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/textgrid/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/textgrid/generator.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/formats/textgrid/parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/speaker/config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/training/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/dataset.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/execute.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/infer.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/utterance/train.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/wave2vec/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/wave2vec/infer_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/whisper/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/whisper/infer_asr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/models/whisper/infer_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/funaudio.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/num2chinese.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/oai_whisper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/asr/whisperx.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/avqi/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/cleanup.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/cleanup/retrace.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/diarization/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/diarization/pyannote.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/fa/wave2vec_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/fa/whisper_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/morphosyntax/ud.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/translate/gtrans.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/translate/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/funaudio_utr.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/tencent_utr.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utr/whisper_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/fixures.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post6 → batchalignhk-0.7.22.post8}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|