BatchalignHK 0.7.22.post10__tar.gz → 0.7.22.post11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/PKG-INFO +1 -1
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/PKG-INFO +1 -1
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/utils.py +16 -15
- batchalignhk-0.7.22.post11/batchalign/version +3 -0
- batchalignhk-0.7.22.post10/batchalign/version +0 -3
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/SOURCES.txt +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/requires.txt +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/LICENSE +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/MANIFEST.in +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/README.md +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/cli/cli.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/document.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/core.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/exception.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/logging.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/realtime_meeting.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/speech_recognizer.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/speech_synthesizer.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/speech_transcriber.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/stream_input_tts.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/token.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/util.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/version.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_abnf.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_app.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_core.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_handshake.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_http.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_logging.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_socket.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_url.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/aliyun.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/funaudio.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/tencent.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/avqi/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/avqi/engine.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/diarization/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/diarization/pyannote.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/dispatch.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/funaudio_utr.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/tencent_utr.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/utils/compounds.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/setup.cfg +0 -0
- {batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/setup.py +0 -0
|
@@ -60,10 +60,11 @@ def retokenize(intermediate_output):
|
|
|
60
60
|
word = word.replace("。", ".")
|
|
61
61
|
word = word.replace("¿", " ").replace("¡", " ")
|
|
62
62
|
tmp.append((word, bullet))
|
|
63
|
-
if len(word) > 0 and (word in ENDING_PUNCT
|
|
64
|
-
|
|
63
|
+
if len(word) > 0 and (word in ENDING_PUNCT+["؟", "۔", "،", "؛"]
|
|
64
|
+
or word[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]):
|
|
65
|
+
if word in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
|
|
65
66
|
final_outputs.append((speaker, tmp))
|
|
66
|
-
elif word[-1] in ENDING_PUNCT:
|
|
67
|
+
elif word[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
|
|
67
68
|
# we want to seperate the ending punct out
|
|
68
69
|
final, time = tmp.pop(-1)
|
|
69
70
|
tmp.append((final[:-1], time))
|
|
@@ -102,7 +103,7 @@ def retokenize_with_engine(intermediate_output, engine):
|
|
|
102
103
|
# because we are using an utterance engine, we need
|
|
103
104
|
# to get rid of all the preexisting punctuation
|
|
104
105
|
for i in utterance:
|
|
105
|
-
for j in MOR_PUNCT+ENDING_PUNCT:
|
|
106
|
+
for j in MOR_PUNCT+ENDING_PUNCT+["؟", "۔", "،", "؛"]:
|
|
106
107
|
i[0] = i[0].strip(j).lower()
|
|
107
108
|
|
|
108
109
|
# remove everything that's now blank
|
|
@@ -118,7 +119,7 @@ def retokenize_with_engine(intermediate_output, engine):
|
|
|
118
119
|
# align the utterance against original splits and generate final outputs
|
|
119
120
|
for i in split:
|
|
120
121
|
# Check if the split has ending punctuation
|
|
121
|
-
if i[-1] in ENDING_PUNCT:
|
|
122
|
+
if i[-1] in ENDING_PUNCT+["؟", "۔", "،", "؛"]:
|
|
122
123
|
new_ut, delim = (i[:-1].split(" "), i[-1])
|
|
123
124
|
else:
|
|
124
125
|
new_ut, delim = (i.split(" "), ".")
|
|
@@ -273,16 +274,8 @@ def process_generation(output, lang="eng", utterance_engine=None):
|
|
|
273
274
|
seen_word = False
|
|
274
275
|
if word.strip() == "":
|
|
275
276
|
continue
|
|
276
|
-
if word not in ENDING_PUNCT+MOR_PUNCT:
|
|
277
|
+
if word not in ENDING_PUNCT+MOR_PUNCT+["؟", "۔", "،", "؛"]:
|
|
277
278
|
word_replaced = word
|
|
278
|
-
if word_replaced.strip() == "؟":
|
|
279
|
-
word_replaced = "?"
|
|
280
|
-
elif word_replaced.strip() == "۔":
|
|
281
|
-
word_replaced = "."
|
|
282
|
-
elif word_replaced.strip() == "،":
|
|
283
|
-
word_replaced = ","
|
|
284
|
-
elif word_replaced.strip() == "؛":
|
|
285
|
-
word_replaced = ";"
|
|
286
279
|
|
|
287
280
|
if start == None or end == None:
|
|
288
281
|
words.append(Form(text=word_replaced, time=None))
|
|
@@ -290,7 +283,15 @@ def process_generation(output, lang="eng", utterance_engine=None):
|
|
|
290
283
|
seen_word = True
|
|
291
284
|
words.append(Form(text=word_replaced, time=(int(start), int(end))))
|
|
292
285
|
else:
|
|
293
|
-
|
|
286
|
+
if word.strip() == "؟":
|
|
287
|
+
word = "?"
|
|
288
|
+
elif word.strip() == "۔":
|
|
289
|
+
word = "."
|
|
290
|
+
elif word.strip() == "،":
|
|
291
|
+
word = ","
|
|
292
|
+
elif word.strip() == "؛":
|
|
293
|
+
word = ";"
|
|
294
|
+
words.append(Form(text=word, time=None))
|
|
294
295
|
|
|
295
296
|
final_utterances.append(Utterance(
|
|
296
297
|
tier=participant,
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/requires.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/BatchalignHK.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/exception.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/realtime_meeting.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/speech_recognizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/stream_input_tts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_abnf.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_app.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_core.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_http.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_socket.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_url.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/extern/nls/websocket/_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/chat/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/chat/generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/textgrid/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/textgrid/file.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/textgrid/generator.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/formats/textgrid/parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/speaker/config.yaml
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/speaker/infer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/speaker/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/training/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/training/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/dataset.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/execute.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/infer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/prep.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/utterance/train.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/wave2vec/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/wave2vec/infer_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/whisper/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/whisper/infer_asr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/models/whisper/infer_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/analysis/eval.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/aliyun.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/funaudio.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/num2chinese.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/oai_whisper.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/tencent.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/whisper.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/asr/whisperx.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/avqi/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/avqi/engine.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/cleanup.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/cleanup/retrace.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/fa/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/fa/wave2vec_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/fa/whisper_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/morphosyntax/ud.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/speaker/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/translate/gtrans.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/translate/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/funaudio_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/rev_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/tencent_utr.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utr/whisper_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.22.post10 → batchalignhk-0.7.22.post11}/batchalign/tests/pipelines/fixures.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|