BatchalignHK 0.7.17.post22__tar.gz → 0.7.18b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/BatchalignHK.egg-info/PKG-INFO +2 -2
  2. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/BatchalignHK.egg-info/SOURCES.txt +2 -0
  3. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/BatchalignHK.egg-info/requires.txt +1 -1
  4. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/PKG-INFO +2 -2
  5. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/cli/cli.py +4 -2
  6. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/chat/generator.py +2 -2
  7. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/__init__.py +1 -1
  8. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/dispatch.py +6 -2
  9. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/translate/__init__.py +1 -0
  10. batchalignhk-0.7.18b0/batchalign/pipelines/translate/gtrans.py +58 -0
  11. batchalignhk-0.7.18b0/batchalign/pipelines/translate/utils.py +35 -0
  12. batchalignhk-0.7.18b0/batchalign/version +3 -0
  13. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/setup.py +1 -0
  14. batchalignhk-0.7.17.post22/batchalign/version +0 -3
  15. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  16. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/BatchalignHK.egg-info/entry_points.txt +0 -0
  17. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/BatchalignHK.egg-info/top_level.txt +0 -0
  18. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/LICENSE +0 -0
  19. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/MANIFEST.in +0 -0
  20. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/README.md +0 -0
  21. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/__init__.py +0 -0
  22. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/__main__.py +0 -0
  23. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/cli/__init__.py +0 -0
  24. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/cli/dispatch.py +0 -0
  25. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/constants.py +0 -0
  26. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/document.py +0 -0
  27. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/errors.py +0 -0
  28. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/__init__.py +0 -0
  29. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/base.py +0 -0
  30. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/chat/__init__.py +0 -0
  31. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/chat/file.py +0 -0
  32. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/chat/lexer.py +0 -0
  33. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/chat/parser.py +0 -0
  34. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/chat/utils.py +0 -0
  35. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/textgrid/__init__.py +0 -0
  36. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/textgrid/file.py +0 -0
  37. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/textgrid/generator.py +0 -0
  38. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/formats/textgrid/parser.py +0 -0
  39. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/__init__.py +0 -0
  40. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/resolve.py +0 -0
  41. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/speaker/__init__.py +0 -0
  42. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/speaker/config.yaml +0 -0
  43. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/speaker/infer.py +0 -0
  44. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/speaker/utils.py +0 -0
  45. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/training/__init__.py +0 -0
  46. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/training/run.py +0 -0
  47. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/training/utils.py +0 -0
  48. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/utils.py +0 -0
  49. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/utterance/__init__.py +0 -0
  50. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/utterance/cantonese_infer.py +0 -0
  51. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/utterance/dataset.py +0 -0
  52. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/utterance/execute.py +0 -0
  53. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/utterance/infer.py +0 -0
  54. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/utterance/prep.py +0 -0
  55. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/utterance/train.py +0 -0
  56. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/wave2vec/__init__.py +0 -0
  57. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/wave2vec/infer_fa.py +0 -0
  58. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/whisper/__init__.py +0 -0
  59. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/whisper/infer_asr.py +0 -0
  60. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/models/whisper/infer_fa.py +0 -0
  61. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/analysis/__init__.py +0 -0
  62. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/analysis/eval.py +0 -0
  63. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/asr/__init__.py +0 -0
  64. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/asr/num2chinese.py +0 -0
  65. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/asr/rev.py +0 -0
  66. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/asr/tencent.py +0 -0
  67. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/asr/utils.py +0 -0
  68. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/asr/whisper.py +0 -0
  69. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/asr/whisperx.py +0 -0
  70. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/base.py +0 -0
  71. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/cleanup/__init__.py +0 -0
  72. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  73. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  74. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  75. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/cleanup/retrace.py +0 -0
  76. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  77. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  78. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/cleanup/support/test.test +0 -0
  79. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/fa/__init__.py +0 -0
  80. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  81. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  82. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  83. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  84. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  85. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  86. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  87. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  88. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  89. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  90. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/pipeline.py +0 -0
  91. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/speaker/__init__.py +0 -0
  92. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  93. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/translate/seamless.py +0 -0
  94. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/utr/__init__.py +0 -0
  95. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/utr/rev_utr.py +0 -0
  96. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/utr/utils.py +0 -0
  97. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  98. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/utterance/__init__.py +0 -0
  99. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  100. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/__init__.py +0 -0
  101. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/conftest.py +0 -0
  102. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  103. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  104. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  105. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  106. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  107. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  108. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  109. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  110. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  111. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  112. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  113. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  114. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/fixures.py +0 -0
  115. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  116. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  117. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/tests/test_document.py +0 -0
  118. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/utils/__init__.py +0 -0
  119. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/utils/config.py +0 -0
  120. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/utils/dp.py +0 -0
  121. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/batchalign/utils/utils.py +0 -0
  122. {batchalignhk-0.7.17.post22 → batchalignhk-0.7.18b0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.17.post22
3
+ Version: 0.7.18b0
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -38,7 +38,7 @@ Requires-Dist: tiktoken
38
38
  Requires-Dist: blobfile
39
39
  Requires-Dist: sentencepiece
40
40
  Requires-Dist: tencentcloud-sdk-python-common
41
- Requires-Dist: tencentcloud-sdk-python-asr
41
+ Requires-Dist: tencentcloud-sdk-python-asrgoogletrans
42
42
  Provides-Extra: dev
43
43
  Requires-Dist: pytest; extra == "dev"
44
44
  Provides-Extra: train
@@ -86,7 +86,9 @@ batchalign/pipelines/morphosyntax/ja/verbforms.py
86
86
  batchalign/pipelines/speaker/__init__.py
87
87
  batchalign/pipelines/speaker/nemo_speaker.py
88
88
  batchalign/pipelines/translate/__init__.py
89
+ batchalign/pipelines/translate/gtrans.py
89
90
  batchalign/pipelines/translate/seamless.py
91
+ batchalign/pipelines/translate/utils.py
90
92
  batchalign/pipelines/utr/__init__.py
91
93
  batchalign/pipelines/utr/rev_utr.py
92
94
  batchalign/pipelines/utr/utils.py
@@ -28,7 +28,7 @@ tiktoken
28
28
  blobfile
29
29
  sentencepiece
30
30
  tencentcloud-sdk-python-common
31
- tencentcloud-sdk-python-asr
31
+ tencentcloud-sdk-python-asrgoogletrans
32
32
 
33
33
  [dev]
34
34
  pytest
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.17.post22
3
+ Version: 0.7.18b0
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -38,7 +38,7 @@ Requires-Dist: tiktoken
38
38
  Requires-Dist: blobfile
39
39
  Requires-Dist: sentencepiece
40
40
  Requires-Dist: tencentcloud-sdk-python-common
41
- Requires-Dist: tencentcloud-sdk-python-asr
41
+ Requires-Dist: tencentcloud-sdk-python-asrgoogletrans
42
42
  Provides-Extra: dev
43
43
  Requires-Dist: pytest; extra == "dev"
44
44
  Provides-Extra: train
@@ -315,6 +315,8 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
315
315
  @common_options
316
316
  @click.option("--whisper/--rev",
317
317
  default=False, help="Use OpenAI Whisper (ASR) instead of Rev.AI (default).")
318
+ @click.option("--tencent/--rev",
319
+ default=False, help="Use Tencent instead of Rev.AI (default).")
318
320
  @click.option("--lang",
319
321
  help="sample language in three-letter ISO 3166-1 alpha-3 code",
320
322
  show_default=True,
@@ -322,7 +324,7 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
322
324
  type=str)
323
325
  @click.option("-n", "--num_speakers", type=int, help="number of speakers in the language sample", default=2)
324
326
  @click.pass_context
325
- def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
327
+ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, tencent, **kwargs):
326
328
  """Benchmark ASR utilities for their word accuracy"""
327
329
  def loader(file):
328
330
  # try to find a .cha in the same directory
@@ -348,7 +350,7 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
348
350
  _dispatch("benchmark", lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
349
351
  in_dir, out_dir,
350
352
  loader, writer, C,
351
- asr="whisper" if whisper else "rev", **kwargs)
353
+ asr="whisper" if whisper else ("tencent" if tencent else "rev"), **kwargs)
352
354
 
353
355
 
354
356
  #################### SETUP ################################
@@ -30,8 +30,8 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
30
30
  main_line = str(utterance)
31
31
  # last minut ecorrections
32
32
  main_line = re.sub(r"<([\w ]+) \[\/", r"<\1> [/", main_line)
33
- main_line = re.sub(r"«", "\"", main_line)
34
- main_line = re.sub(r"»", "\"", main_line)
33
+ main_line = re.sub(r"«", "", main_line)
34
+ main_line = re.sub(r"»", "", main_line)
35
35
  main_line = re.sub(r"—", "-", main_line)
36
36
  main_line = re.sub(r"–", "-", main_line)
37
37
  tier = utterance.tier
@@ -12,4 +12,4 @@ from .utr import WhisperUTREngine, RevUTREngine
12
12
  from .analysis import EvaluationEngine
13
13
  from .utterance import StanzaUtteranceEngine
14
14
 
15
- from .translate import SeamlessTranslationModel
15
+ from .translate import SeamlessTranslationModel, GoogleTranslateEngine
@@ -6,7 +6,9 @@ Tabulate default packages and options.
6
6
  from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
7
7
  NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
8
8
  RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
9
- StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel, TencentEngine)
9
+ StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel, TencentEngine,
10
+ GoogleTranslateEngine)
11
+
10
12
  from batchalign import BatchalignPipeline
11
13
  from batchalign.models import resolve
12
14
 
@@ -28,7 +30,7 @@ DEFAULT_PACKAGES = {
28
30
  "eval": "evaluation",
29
31
  "utterance": "stanza_utt",
30
32
  "coref": "stanza_coref",
31
- "translate": "seamless_translate",
33
+ "translate": "gtrans",
32
34
  }
33
35
 
34
36
  LANGUAGE_OVERRIDE_PACKAGES = {
@@ -134,6 +136,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
134
136
  engines.append(SeamlessTranslationModel())
135
137
  elif engine == "tencent":
136
138
  engines.append(TencentEngine(lang=lang))
139
+ elif engine == "gtrans":
140
+ engines.append(GoogleTranslateEngine())
137
141
 
138
142
 
139
143
  L.debug(f"Done initalizing packages.")
@@ -1 +1,2 @@
1
1
  from .seamless import SeamlessTranslationModel
2
+ from .gtrans import GoogleTranslateEngine
@@ -0,0 +1,58 @@
1
+ from batchalign.models import WhisperFAModel
2
+ from batchalign.document import *
3
+ from batchalign.pipelines.base import *
4
+ from batchalign.utils import *
5
+ from batchalign.utils.dp import *
6
+ from batchalign.constants import *
7
+ from batchalign.pipelines.translate.utils import run_coroutine_sync
8
+
9
+ from googletrans import Translator
10
+
11
+ import logging
12
+ L = logging.getLogger("batchalign")
13
+
14
+ import re
15
+
16
+ # !uv pip install sentencepiece
17
+
18
+ import pycountry
19
+ import warnings
20
+ import time
21
+
22
+ import asyncio
23
+
24
+ class GoogleTranslateEngine(BatchalignEngine):
25
+ tasks = [ Task.TRANSLATE ]
26
+
27
+ def _hook_status(self, status_hook):
28
+ self.status_hook = status_hook
29
+
30
+ def __init__(self):
31
+ self.status_hook = None
32
+
33
+ async def translate(self, text):
34
+ translator = Translator()
35
+ return await translator.translate(text)
36
+
37
+ def process(self, doc:Document, **kwargs):
38
+
39
+ for indx, i in enumerate(doc.content):
40
+ if not isinstance(i, Utterance):
41
+ continue
42
+ if i.translation:
43
+ continue
44
+
45
+ text = i.strip(join_with_spaces=False, include_retrace=True, include_fp=True)
46
+ translated_text_from_text = run_coroutine_sync(self.translate(text)).text
47
+
48
+ i.translation = translated_text_from_text
49
+ for j in MOR_PUNCT + ENDING_PUNCT:
50
+ i.translation = i.translation.replace(j, " "+j)
51
+
52
+ if self.status_hook != None:
53
+ self.status_hook(indx+1, len(doc.content))
54
+ time.sleep(1.5)
55
+
56
+ return doc
57
+
58
+
@@ -0,0 +1,35 @@
1
+ import asyncio
2
+ import threading
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from typing import Any, Coroutine, TypeVar
5
+
6
+ __all__ = [
7
+ "run_coroutine_sync",
8
+ ]
9
+
10
+ T = TypeVar("T")
11
+
12
+
13
+ def run_coroutine_sync(coroutine: Coroutine[Any, Any, T], timeout: float = 30) -> T:
14
+ def run_in_new_loop():
15
+ new_loop = asyncio.new_event_loop()
16
+ asyncio.set_event_loop(new_loop)
17
+ try:
18
+ return new_loop.run_until_complete(coroutine)
19
+ finally:
20
+ new_loop.close()
21
+
22
+ try:
23
+ loop = asyncio.get_running_loop()
24
+ except RuntimeError:
25
+ return asyncio.run(coroutine)
26
+
27
+ if threading.current_thread() is threading.main_thread():
28
+ if not loop.is_running():
29
+ return loop.run_until_complete(coroutine)
30
+ else:
31
+ with ThreadPoolExecutor() as pool:
32
+ future = pool.submit(run_in_new_loop)
33
+ return future.result(timeout=timeout)
34
+ else:
35
+ return asyncio.run_coroutine_threadsafe(coroutine, loop).result()
@@ -0,0 +1,3 @@
1
+ 0.7.18-beta.0
2
+ April 16th, 2025
3
+ google translate
@@ -58,6 +58,7 @@ setup(
58
58
  "sentencepiece",
59
59
  "tencentcloud-sdk-python-common",
60
60
  "tencentcloud-sdk-python-asr"
61
+ "googletrans"
61
62
  ],
62
63
  extras_require={
63
64
  'dev': [
@@ -1,3 +0,0 @@
1
- 0.7.17-post.22
2
- April 16th, 2025
3
- Various fixes for numbers