BatchalignHK 0.7.17.post22__tar.gz → 0.7.18__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/BatchalignHK.egg-info/PKG-INFO +2 -2
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/BatchalignHK.egg-info/SOURCES.txt +2 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/BatchalignHK.egg-info/requires.txt +1 -1
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/PKG-INFO +2 -2
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/cli/cli.py +4 -2
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/chat/generator.py +2 -2
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/__init__.py +1 -1
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/dispatch.py +6 -2
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/translate/__init__.py +1 -0
- batchalignhk-0.7.18/batchalign/pipelines/translate/gtrans.py +58 -0
- batchalignhk-0.7.18/batchalign/pipelines/translate/utils.py +35 -0
- batchalignhk-0.7.18/batchalign/version +3 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/setup.py +1 -0
- batchalignhk-0.7.17.post22/batchalign/version +0 -3
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/LICENSE +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/MANIFEST.in +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/README.md +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/document.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/asr/tencent.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.18
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -38,7 +38,7 @@ Requires-Dist: tiktoken
|
|
|
38
38
|
Requires-Dist: blobfile
|
|
39
39
|
Requires-Dist: sentencepiece
|
|
40
40
|
Requires-Dist: tencentcloud-sdk-python-common
|
|
41
|
-
Requires-Dist: tencentcloud-sdk-python-
|
|
41
|
+
Requires-Dist: tencentcloud-sdk-python-asrgoogletrans
|
|
42
42
|
Provides-Extra: dev
|
|
43
43
|
Requires-Dist: pytest; extra == "dev"
|
|
44
44
|
Provides-Extra: train
|
|
@@ -86,7 +86,9 @@ batchalign/pipelines/morphosyntax/ja/verbforms.py
|
|
|
86
86
|
batchalign/pipelines/speaker/__init__.py
|
|
87
87
|
batchalign/pipelines/speaker/nemo_speaker.py
|
|
88
88
|
batchalign/pipelines/translate/__init__.py
|
|
89
|
+
batchalign/pipelines/translate/gtrans.py
|
|
89
90
|
batchalign/pipelines/translate/seamless.py
|
|
91
|
+
batchalign/pipelines/translate/utils.py
|
|
90
92
|
batchalign/pipelines/utr/__init__.py
|
|
91
93
|
batchalign/pipelines/utr/rev_utr.py
|
|
92
94
|
batchalign/pipelines/utr/utils.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.18
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -38,7 +38,7 @@ Requires-Dist: tiktoken
|
|
|
38
38
|
Requires-Dist: blobfile
|
|
39
39
|
Requires-Dist: sentencepiece
|
|
40
40
|
Requires-Dist: tencentcloud-sdk-python-common
|
|
41
|
-
Requires-Dist: tencentcloud-sdk-python-
|
|
41
|
+
Requires-Dist: tencentcloud-sdk-python-asrgoogletrans
|
|
42
42
|
Provides-Extra: dev
|
|
43
43
|
Requires-Dist: pytest; extra == "dev"
|
|
44
44
|
Provides-Extra: train
|
|
@@ -315,6 +315,8 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
315
315
|
@common_options
|
|
316
316
|
@click.option("--whisper/--rev",
|
|
317
317
|
default=False, help="Use OpenAI Whisper (ASR) instead of Rev.AI (default).")
|
|
318
|
+
@click.option("--tencent/--rev",
|
|
319
|
+
default=False, help="Use Tencent instead of Rev.AI (default).")
|
|
318
320
|
@click.option("--lang",
|
|
319
321
|
help="sample language in three-letter ISO 3166-1 alpha-3 code",
|
|
320
322
|
show_default=True,
|
|
@@ -322,7 +324,7 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
322
324
|
type=str)
|
|
323
325
|
@click.option("-n", "--num_speakers", type=int, help="number of speakers in the language sample", default=2)
|
|
324
326
|
@click.pass_context
|
|
325
|
-
def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
|
|
327
|
+
def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, tencent, **kwargs):
|
|
326
328
|
"""Benchmark ASR utilities for their word accuracy"""
|
|
327
329
|
def loader(file):
|
|
328
330
|
# try to find a .cha in the same directory
|
|
@@ -348,7 +350,7 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
|
|
|
348
350
|
_dispatch("benchmark", lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
|
|
349
351
|
in_dir, out_dir,
|
|
350
352
|
loader, writer, C,
|
|
351
|
-
asr="whisper" if whisper else "rev", **kwargs)
|
|
353
|
+
asr="whisper" if whisper else ("tencent" if tencent else "rev"), **kwargs)
|
|
352
354
|
|
|
353
355
|
|
|
354
356
|
#################### SETUP ################################
|
|
@@ -30,8 +30,8 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
|
|
|
30
30
|
main_line = str(utterance)
|
|
31
31
|
# last minut ecorrections
|
|
32
32
|
main_line = re.sub(r"<([\w ]+) \[\/", r"<\1> [/", main_line)
|
|
33
|
-
main_line = re.sub(r"«", "
|
|
34
|
-
main_line = re.sub(r"»", "
|
|
33
|
+
main_line = re.sub(r"«", "“", main_line)
|
|
34
|
+
main_line = re.sub(r"»", "”", main_line)
|
|
35
35
|
main_line = re.sub(r"—", "-", main_line)
|
|
36
36
|
main_line = re.sub(r"–", "-", main_line)
|
|
37
37
|
tier = utterance.tier
|
|
@@ -6,7 +6,9 @@ Tabulate default packages and options.
|
|
|
6
6
|
from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
|
|
7
7
|
NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
|
|
8
8
|
RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
|
|
9
|
-
StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel, TencentEngine
|
|
9
|
+
StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel, TencentEngine,
|
|
10
|
+
GoogleTranslateEngine)
|
|
11
|
+
|
|
10
12
|
from batchalign import BatchalignPipeline
|
|
11
13
|
from batchalign.models import resolve
|
|
12
14
|
|
|
@@ -28,7 +30,7 @@ DEFAULT_PACKAGES = {
|
|
|
28
30
|
"eval": "evaluation",
|
|
29
31
|
"utterance": "stanza_utt",
|
|
30
32
|
"coref": "stanza_coref",
|
|
31
|
-
"translate": "
|
|
33
|
+
"translate": "gtrans",
|
|
32
34
|
}
|
|
33
35
|
|
|
34
36
|
LANGUAGE_OVERRIDE_PACKAGES = {
|
|
@@ -134,6 +136,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
|
134
136
|
engines.append(SeamlessTranslationModel())
|
|
135
137
|
elif engine == "tencent":
|
|
136
138
|
engines.append(TencentEngine(lang=lang))
|
|
139
|
+
elif engine == "gtrans":
|
|
140
|
+
engines.append(GoogleTranslateEngine())
|
|
137
141
|
|
|
138
142
|
|
|
139
143
|
L.debug(f"Done initalizing packages.")
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
from batchalign.models import WhisperFAModel
|
|
2
|
+
from batchalign.document import *
|
|
3
|
+
from batchalign.pipelines.base import *
|
|
4
|
+
from batchalign.utils import *
|
|
5
|
+
from batchalign.utils.dp import *
|
|
6
|
+
from batchalign.constants import *
|
|
7
|
+
from batchalign.pipelines.translate.utils import run_coroutine_sync
|
|
8
|
+
|
|
9
|
+
from googletrans import Translator
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
L = logging.getLogger("batchalign")
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
# !uv pip install sentencepiece
|
|
17
|
+
|
|
18
|
+
import pycountry
|
|
19
|
+
import warnings
|
|
20
|
+
import time
|
|
21
|
+
|
|
22
|
+
import asyncio
|
|
23
|
+
|
|
24
|
+
class GoogleTranslateEngine(BatchalignEngine):
|
|
25
|
+
tasks = [ Task.TRANSLATE ]
|
|
26
|
+
|
|
27
|
+
def _hook_status(self, status_hook):
|
|
28
|
+
self.status_hook = status_hook
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
self.status_hook = None
|
|
32
|
+
|
|
33
|
+
async def translate(self, text):
|
|
34
|
+
translator = Translator()
|
|
35
|
+
return await translator.translate(text)
|
|
36
|
+
|
|
37
|
+
def process(self, doc:Document, **kwargs):
|
|
38
|
+
|
|
39
|
+
for indx, i in enumerate(doc.content):
|
|
40
|
+
if not isinstance(i, Utterance):
|
|
41
|
+
continue
|
|
42
|
+
if i.translation:
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
text = i.strip(join_with_spaces=False, include_retrace=True, include_fp=True)
|
|
46
|
+
translated_text_from_text = run_coroutine_sync(self.translate(text)).text
|
|
47
|
+
|
|
48
|
+
i.translation = translated_text_from_text
|
|
49
|
+
for j in MOR_PUNCT + ENDING_PUNCT:
|
|
50
|
+
i.translation = i.translation.replace(j, " "+j)
|
|
51
|
+
|
|
52
|
+
if self.status_hook != None:
|
|
53
|
+
self.status_hook(indx+1, len(doc.content))
|
|
54
|
+
time.sleep(1.5)
|
|
55
|
+
|
|
56
|
+
return doc
|
|
57
|
+
|
|
58
|
+
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import threading
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
4
|
+
from typing import Any, Coroutine, TypeVar
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"run_coroutine_sync",
|
|
8
|
+
]
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def run_coroutine_sync(coroutine: Coroutine[Any, Any, T], timeout: float = 30) -> T:
|
|
14
|
+
def run_in_new_loop():
|
|
15
|
+
new_loop = asyncio.new_event_loop()
|
|
16
|
+
asyncio.set_event_loop(new_loop)
|
|
17
|
+
try:
|
|
18
|
+
return new_loop.run_until_complete(coroutine)
|
|
19
|
+
finally:
|
|
20
|
+
new_loop.close()
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
loop = asyncio.get_running_loop()
|
|
24
|
+
except RuntimeError:
|
|
25
|
+
return asyncio.run(coroutine)
|
|
26
|
+
|
|
27
|
+
if threading.current_thread() is threading.main_thread():
|
|
28
|
+
if not loop.is_running():
|
|
29
|
+
return loop.run_until_complete(coroutine)
|
|
30
|
+
else:
|
|
31
|
+
with ThreadPoolExecutor() as pool:
|
|
32
|
+
future = pool.submit(run_in_new_loop)
|
|
33
|
+
return future.result(timeout=timeout)
|
|
34
|
+
else:
|
|
35
|
+
return asyncio.run_coroutine_threadsafe(coroutine, loop).result()
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/models/utterance/cantonese_infer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/parse_support.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/cleanup/support/test.test
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/pipelines/utterance/ud_utterance.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_file.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post22 → batchalignhk-0.7.18}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|