BatchalignHK 0.7.20.post11__tar.gz → 0.7.20.post12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/PKG-INFO +1 -1
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/PKG-INFO +1 -1
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/document.py +5 -2
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/chat/lexer.py +10 -8
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/resolve.py +1 -1
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/whisper/infer_asr.py +2 -2
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/ud.py +4 -1
- batchalignhk-0.7.20.post12/batchalign/version +3 -0
- batchalignhk-0.7.20.post11/batchalign/version +0 -3
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/SOURCES.txt +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/requires.txt +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/LICENSE +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/MANIFEST.in +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/README.md +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/cli/cli.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/core.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/exception.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/logging.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/realtime_meeting.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/speech_recognizer.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/speech_synthesizer.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/speech_transcriber.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/stream_input_tts.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/token.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/util.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/version.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_abnf.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_app.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_cookiejar.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_core.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_exceptions.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_handshake.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_http.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_logging.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_socket.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_ssl_compat.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_url.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/tests/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/tests/echo-server.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/tests/test_abnf.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/tests/test_app.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/tests/test_cookiejar.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/tests/test_http.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/tests/test_url.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/tests/test_websocket.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/aliyun.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/tencent.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/dispatch.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/tencent_utr.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/utils/compounds.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/setup.cfg +0 -0
- {batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/setup.py +0 -0
|
@@ -206,8 +206,8 @@ class Utterance(BaseModel):
|
|
|
206
206
|
def __len__(self):
|
|
207
207
|
return len(self.content)
|
|
208
208
|
|
|
209
|
-
def
|
|
210
|
-
if self.text != None:
|
|
209
|
+
def tostring(self, always_detokenize=False):
|
|
210
|
+
if self.text != None and not always_detokenize:
|
|
211
211
|
t = self.text
|
|
212
212
|
else:
|
|
213
213
|
t = self._detokenize()
|
|
@@ -231,6 +231,9 @@ class Utterance(BaseModel):
|
|
|
231
231
|
|
|
232
232
|
return t
|
|
233
233
|
|
|
234
|
+
def __str__(self):
|
|
235
|
+
return self.tostring()
|
|
236
|
+
|
|
234
237
|
def __repr__(self):
|
|
235
238
|
return str(self)
|
|
236
239
|
|
|
@@ -79,14 +79,16 @@ class UtteranceLexer:
|
|
|
79
79
|
# self.__clauses.append((form.strip(), TokenType.FEAT))
|
|
80
80
|
elif form.strip() in NORMAL_GROUP_MARKS:
|
|
81
81
|
# basically ignore the form
|
|
82
|
-
|
|
83
|
-
if
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
82
|
+
o = self.__clauses.pop(-1)
|
|
83
|
+
if len(o) <= 1 or o[1] != TokenType.FP:
|
|
84
|
+
popped = o[0]
|
|
85
|
+
if not isinstance(popped, str):
|
|
86
|
+
for i in popped:
|
|
87
|
+
if i[0] not in CHAT_IGNORE and i[0] != "&":
|
|
88
|
+
self.__clauses.append(i)
|
|
89
|
+
else:
|
|
90
|
+
if popped not in CHAT_IGNORE and popped[0] != "&":
|
|
91
|
+
self.__clauses.append((popped, TokenType.REGULAR))
|
|
90
92
|
# if isinstance(popped, str) and :
|
|
91
93
|
# pass
|
|
92
94
|
# self.__clauses.append((form.strip(), TokenType.FEAT))
|
|
@@ -11,7 +11,7 @@ resolver = {
|
|
|
11
11
|
"yue": "PolyU-AngelChanLab/Cantonese-Utterance-Segmentation",
|
|
12
12
|
},
|
|
13
13
|
"whisper": {
|
|
14
|
-
'eng': ("talkbank/CHATWhisper-en
|
|
14
|
+
'eng': ("talkbank/CHATWhisper-en", "openai/whisper-large-v2"),
|
|
15
15
|
'yue': ("alvanlii/whisper-small-cantonese", "alvanlii/whisper-small-cantonese"),
|
|
16
16
|
"heb": ("ivrit-ai/whisper-large-v3", "ivrit-ai/whisper-large-v3")
|
|
17
17
|
}
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/whisper/infer_asr.py
RENAMED
|
@@ -93,7 +93,7 @@ class WhisperASRModel(object):
|
|
|
93
93
|
stride_length_s=3,
|
|
94
94
|
device=DEVICE,
|
|
95
95
|
torch_dtype=torch.bfloat16,
|
|
96
|
-
return_timestamps=
|
|
96
|
+
return_timestamps=True,
|
|
97
97
|
)
|
|
98
98
|
except TypeError:
|
|
99
99
|
self.pipe = pipeline(
|
|
@@ -104,7 +104,7 @@ class WhisperASRModel(object):
|
|
|
104
104
|
stride_length_s=3,
|
|
105
105
|
device=DEVICE,
|
|
106
106
|
torch_dtype=torch.float16,
|
|
107
|
-
return_timestamps=
|
|
107
|
+
return_timestamps=True,
|
|
108
108
|
)
|
|
109
109
|
L.debug("Done, initalizing processor and config...")
|
|
110
110
|
processor = WhisperProcessor.from_pretrained(base)
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/ud.py
RENAMED
|
@@ -729,7 +729,7 @@ def morphoanalyze(doc: Document, retokenize:bool, skipmultilang:bool, status_hoo
|
|
|
729
729
|
pass
|
|
730
730
|
|
|
731
731
|
|
|
732
|
-
# pycountry.languages.get(alpha_3=i).alpha_2 for i in lang
|
|
732
|
+
# pycountry.languages.get(alpha_3=i).alpha_2 for i in lang
|
|
733
733
|
|
|
734
734
|
config = {"processors": {"tokenize": "default",
|
|
735
735
|
"pos": "default",
|
|
@@ -813,6 +813,9 @@ def morphoanalyze(doc: Document, retokenize:bool, skipmultilang:bool, status_hoo
|
|
|
813
813
|
line_cut = i.strip(join_with_spaces=True)
|
|
814
814
|
else:
|
|
815
815
|
line_cut = i.strip(join_with_spaces=True)[:-len(ending)].strip()
|
|
816
|
+
|
|
817
|
+
# import ipdb
|
|
818
|
+
# ipdb.set_trace()
|
|
816
819
|
# ending = ending.replace("+//", "")
|
|
817
820
|
|
|
818
821
|
# if we don't have anything in line cut, just take the original
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/requires.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/BatchalignHK.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/exception.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/realtime_meeting.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/speech_recognizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/stream_input_tts.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_abnf.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_app.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_core.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_http.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_socket.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_url.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/extern/nls/websocket/_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/chat/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/chat/generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/textgrid/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/textgrid/file.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/textgrid/generator.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/formats/textgrid/parser.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/speaker/config.yaml
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/speaker/infer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/speaker/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/training/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/training/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/dataset.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/execute.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/infer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/prep.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/utterance/train.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/wave2vec/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/wave2vec/infer_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/whisper/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/models/whisper/infer_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/analysis/eval.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/aliyun.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/num2chinese.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/oai_whisper.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/tencent.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/whisper.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/asr/whisperx.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/cleanup.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/cleanup/retrace.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/fa/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/fa/wave2vec_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/fa/whisper_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/speaker/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/translate/gtrans.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/translate/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/rev_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/tencent_utr.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utr/whisper_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.20.post11 → batchalignhk-0.7.20.post12}/batchalign/tests/pipelines/fixures.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|