batchalign 0.7.13.post1__tar.gz → 0.7.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {batchalign-0.7.13.post1/batchalign.egg-info → batchalign-0.7.14}/PKG-INFO +4 -1
  2. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/cli/cli.py +22 -0
  3. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/cli/dispatch.py +1 -0
  4. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/document.py +4 -0
  5. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/chat/generator.py +2 -1
  6. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/chat/parser.py +5 -1
  7. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/__init__.py +1 -0
  8. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/dispatch.py +4 -1
  9. batchalign-0.7.14/batchalign/pipelines/translate/__init__.py +1 -0
  10. batchalign-0.7.14/batchalign/pipelines/translate/seamless.py +53 -0
  11. batchalign-0.7.14/batchalign/version +3 -0
  12. {batchalign-0.7.13.post1 → batchalign-0.7.14/batchalign.egg-info}/PKG-INFO +4 -1
  13. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign.egg-info/SOURCES.txt +2 -0
  14. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign.egg-info/requires.txt +3 -0
  15. {batchalign-0.7.13.post1 → batchalign-0.7.14}/setup.py +3 -0
  16. batchalign-0.7.13.post1/batchalign/version +0 -3
  17. {batchalign-0.7.13.post1 → batchalign-0.7.14}/LICENSE +0 -0
  18. {batchalign-0.7.13.post1 → batchalign-0.7.14}/MANIFEST.in +0 -0
  19. {batchalign-0.7.13.post1 → batchalign-0.7.14}/README.md +0 -0
  20. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/__init__.py +0 -0
  21. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/__main__.py +0 -0
  22. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/cli/__init__.py +0 -0
  23. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/constants.py +0 -0
  24. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/errors.py +0 -0
  25. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/__init__.py +0 -0
  26. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/base.py +0 -0
  27. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/chat/__init__.py +0 -0
  28. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/chat/file.py +0 -0
  29. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/chat/lexer.py +0 -0
  30. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/chat/utils.py +0 -0
  31. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/textgrid/__init__.py +0 -0
  32. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/textgrid/file.py +0 -0
  33. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/textgrid/generator.py +0 -0
  34. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/formats/textgrid/parser.py +0 -0
  35. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/__init__.py +0 -0
  36. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/resolve.py +0 -0
  37. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/speaker/__init__.py +0 -0
  38. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/speaker/config.yaml +0 -0
  39. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/speaker/infer.py +0 -0
  40. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/speaker/utils.py +0 -0
  41. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/training/__init__.py +0 -0
  42. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/training/run.py +0 -0
  43. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/training/utils.py +0 -0
  44. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/utils.py +0 -0
  45. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/utterance/__init__.py +0 -0
  46. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/utterance/dataset.py +0 -0
  47. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/utterance/execute.py +0 -0
  48. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/utterance/infer.py +0 -0
  49. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/utterance/prep.py +0 -0
  50. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/utterance/train.py +0 -0
  51. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/wave2vec/__init__.py +0 -0
  52. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/wave2vec/infer_fa.py +0 -0
  53. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/whisper/__init__.py +0 -0
  54. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/whisper/infer_asr.py +0 -0
  55. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/models/whisper/infer_fa.py +0 -0
  56. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/analysis/__init__.py +0 -0
  57. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/analysis/eval.py +0 -0
  58. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/asr/__init__.py +0 -0
  59. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/asr/num2chinese.py +0 -0
  60. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/asr/rev.py +0 -0
  61. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/asr/utils.py +0 -0
  62. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/asr/whisper.py +0 -0
  63. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/asr/whisperx.py +0 -0
  64. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/base.py +0 -0
  65. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/cleanup/__init__.py +0 -0
  66. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  67. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  68. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  69. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/cleanup/retrace.py +0 -0
  70. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  71. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  72. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/cleanup/support/test.test +0 -0
  73. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/fa/__init__.py +0 -0
  74. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  75. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  76. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  77. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  78. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  79. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  80. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  81. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  82. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  83. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  84. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/pipeline.py +0 -0
  85. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/speaker/__init__.py +0 -0
  86. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  87. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/utr/__init__.py +0 -0
  88. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/utr/rev_utr.py +0 -0
  89. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/utr/utils.py +0 -0
  90. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  91. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/utterance/__init__.py +0 -0
  92. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  93. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/__init__.py +0 -0
  94. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/conftest.py +0 -0
  95. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  96. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  97. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  98. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  99. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  100. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  101. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  102. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  103. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  104. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  105. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  106. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  107. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/fixures.py +0 -0
  108. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  109. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  110. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/tests/test_document.py +0 -0
  111. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/utils/__init__.py +0 -0
  112. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/utils/config.py +0 -0
  113. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/utils/dp.py +0 -0
  114. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign/utils/utils.py +0 -0
  115. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign.egg-info/dependency_links.txt +0 -0
  116. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign.egg-info/entry_points.txt +0 -0
  117. {batchalign-0.7.13.post1 → batchalign-0.7.14}/batchalign.egg-info/top_level.txt +0 -0
  118. {batchalign-0.7.13.post1 → batchalign-0.7.14}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.13.post1
3
+ Version: 0.7.14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -33,6 +33,9 @@ Requires-Dist: soundfile~=0.12.0
33
33
  Requires-Dist: rich-click>=1.7.0
34
34
  Requires-Dist: typing-extensions
35
35
  Requires-Dist: num2words
36
+ Requires-Dist: tiktoken
37
+ Requires-Dist: blobfile
38
+ Requires-Dist: sentencepiece
36
39
  Provides-Extra: dev
37
40
  Requires-Dist: pytest; extra == "dev"
38
41
  Provides-Extra: train
@@ -196,6 +196,28 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
196
196
  loader, writer, C,
197
197
  asr=asr, **kwargs)
198
198
 
199
+ #################### TRANSLATE ################################
200
+
201
+ @batchalign.command()
202
+ @common_options
203
+ @click.pass_context
204
+ def translate(ctx, in_dir, out_dir, **kwargs):
205
+ """Translate the transcript to English."""
206
+
207
+ def loader(file):
208
+ cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
209
+ doc = cf.doc
210
+ # if str(cf).count("%mor") > 0:
211
+ # doc.ba_special_["special_mor_notation"] = True
212
+ return doc
213
+
214
+ def writer(doc, output):
215
+ CHATFile(doc=doc).write(output)
216
+
217
+ _dispatch("translate", "eng", 1, ["cha"], ctx,
218
+ in_dir, out_dir,
219
+ loader, writer, C)
220
+
199
221
  #################### MORPHOTAG ################################
200
222
 
201
223
  @batchalign.command()
@@ -48,6 +48,7 @@ Cmd2Task = {
48
48
  "benchmark": "asr,eval",
49
49
  "utseg": "utterance",
50
50
  "coref": "coref",
51
+ "translate": "translate",
51
52
  }
52
53
 
53
54
  # this is the main runner used by all functions
@@ -31,6 +31,7 @@ class Task(IntEnum):
31
31
  MORPHOSYNTAX = 11
32
32
  COREF = 12
33
33
  WER = 13
34
+ TRANSLATE = 14
34
35
 
35
36
 
36
37
  DEBUG__G = 0
@@ -54,6 +55,7 @@ TypeMap = {
54
55
  Task.DISFLUENCY_ANALYSIS: TaskType.PROCESSING,
55
56
  Task.COREF: TaskType.PROCESSING,
56
57
  Task.WER: TaskType.ANALYSIS,
58
+ Task.TRANSLATE: TaskType.PROCESSING,
57
59
 
58
60
  Task.DEBUG__G: TaskType.GENERATION,
59
61
  Task.DEBUG__P: TaskType.PROCESSING,
@@ -73,6 +75,7 @@ TaskFriendlyName = {
73
75
  Task.DISFLUENCY_ANALYSIS: "Disfluncy Analysis",
74
76
  Task.COREF: "Coreference Resolution",
75
77
  Task.WER: "Word Error Rate",
78
+ Task.TRANSLATE: "Translation",
76
79
  Task.DEBUG__G: "TEST_GENERATION",
77
80
  Task.DEBUG__P: "TEST_PROCESSING",
78
81
  Task.DEBUG__A: "TEST_ANALYSIS",
@@ -150,6 +153,7 @@ class Utterance(BaseModel):
150
153
  tier: Tier = Field(default=Tier())
151
154
  content: Sentence
152
155
  text: Optional[str] = Field(default=None)
156
+ translation: Optional[str] = Field(default=None)
153
157
  time: Optional[Tuple[int,int]] = Field(default=None)
154
158
  custom_dependencies: List[CustomLine] = Field(default=[])
155
159
 
@@ -95,7 +95,8 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
95
95
  result.append("%wor:\t"+" ".join(wor_elems))
96
96
  if has_coref:
97
97
  result.append("%coref:\t"+" ".join(coref_elems))
98
-
98
+ if utterance.translation != None:
99
+ result.append("%xtra:\t"+utterance.translation)
99
100
 
100
101
 
101
102
  #### EXTRA LINE GENERATION ####
@@ -280,6 +280,7 @@ def chat_parse_doc(lines, special_mor=False):
280
280
  mor = None
281
281
  gra = None
282
282
  wor = None
283
+ translation = None
283
284
  additional = []
284
285
 
285
286
  while raw[0][0] == "%":
@@ -291,6 +292,8 @@ def chat_parse_doc(lines, special_mor=False):
291
292
  gra = line
292
293
  elif beg.strip() == "wor" or beg.strip() == "xwor":
293
294
  wor = line
295
+ elif beg.strip() == "xtra":
296
+ translation = line
294
297
  else:
295
298
  additional.append(CustomLine(id=beg.strip(),
296
299
  type=CustomLineType.DEPENDENT,
@@ -309,7 +312,8 @@ def chat_parse_doc(lines, special_mor=False):
309
312
  "content": parsed,
310
313
  "text": text,
311
314
  "delim": delim,
312
- "custom_dependencies": additional
315
+ "custom_dependencies": additional,
316
+ "translation": translation
313
317
  })
314
318
 
315
319
  timing = re.findall(rf"\x15(\d+)_(\d+)\x15", text)
@@ -12,3 +12,4 @@ from .utr import WhisperUTREngine, RevUTREngine
12
12
  from .analysis import EvaluationEngine
13
13
  from .utterance import StanzaUtteranceEngine
14
14
 
15
+ from .translate import SeamlessTranslationModel
@@ -6,7 +6,7 @@ Tabulate default packages and options.
6
6
  from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
7
7
  NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
8
8
  RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
9
- StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine)
9
+ StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel)
10
10
  from batchalign import BatchalignPipeline
11
11
  from batchalign.models import resolve
12
12
 
@@ -28,6 +28,7 @@ DEFAULT_PACKAGES = {
28
28
  "eval": "evaluation",
29
29
  "utterance": "stanza_utt",
30
30
  "coref": "stanza_coref",
31
+ "translate": "seamless_translate",
31
32
  }
32
33
 
33
34
  LANGUAGE_OVERRIDE_PACKAGES = {
@@ -129,6 +130,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
129
130
  engines.append(CorefEngine())
130
131
  elif engine == "wav2vec_fa":
131
132
  engines.append(Wave2VecFAEngine())
133
+ elif engine == "seamless_translate":
134
+ engines.append(SeamlessTranslationModel())
132
135
 
133
136
  L.debug(f"Done initalizing packages.")
134
137
  return BatchalignPipeline(*engines)
@@ -0,0 +1 @@
1
+ from .seamless import SeamlessTranslationModel
@@ -0,0 +1,53 @@
1
+ from batchalign.models import WhisperFAModel
2
+ from batchalign.document import *
3
+ from batchalign.pipelines.base import *
4
+ from batchalign.utils import *
5
+ from batchalign.utils.dp import *
6
+ from batchalign.constants import *
7
+
8
+ from transformers import AutoProcessor, SeamlessM4TModel
9
+
10
+ import logging
11
+ L = logging.getLogger("batchalign")
12
+
13
+ import re
14
+
15
+ # !uv pip install sentencepiece
16
+
17
+ import pycountry
18
+ import warnings
19
+
20
+ class SeamlessTranslationModel(BatchalignEngine):
21
+ tasks = [ Task.TRANSLATE ]
22
+
23
+ def _hook_status(self, status_hook):
24
+ self.status_hook = status_hook
25
+
26
+ def __init__(self):
27
+ self.status_hook = None
28
+ self.processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
29
+ self.model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
30
+
31
+ def process(self, doc:Document, **kwargs):
32
+
33
+ for indx, i in enumerate(doc.content):
34
+ if not isinstance(i, Utterance):
35
+ continue
36
+ if i.translation:
37
+ continue
38
+
39
+ text = i.strip(join_with_spaces=False, include_retrace=True, include_fp=True)
40
+ text_inputs = self.processor(text=text, src_lang=doc.langs[0] if doc.langs[0] != "zho" else "cmn", return_tensors="pt")
41
+ output_tokens = self.model.generate(**text_inputs, tgt_lang="eng", generate_speech=False)
42
+ translated_text_from_text = self.processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
43
+
44
+ i.translation = translated_text_from_text
45
+ for j in MOR_PUNCT + ENDING_PUNCT:
46
+ i.translation = i.translation.replace(j, " "+j)
47
+
48
+ if self.status_hook != None:
49
+ self.status_hook(indx+1, len(doc.content))
50
+
51
+ return doc
52
+
53
+
@@ -0,0 +1,3 @@
1
+ 0.7.14
2
+ Feburary 19nd, 2025
3
+ machine translation!
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.13.post1
3
+ Version: 0.7.14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -33,6 +33,9 @@ Requires-Dist: soundfile~=0.12.0
33
33
  Requires-Dist: rich-click>=1.7.0
34
34
  Requires-Dist: typing-extensions
35
35
  Requires-Dist: num2words
36
+ Requires-Dist: tiktoken
37
+ Requires-Dist: blobfile
38
+ Requires-Dist: sentencepiece
36
39
  Provides-Extra: dev
37
40
  Requires-Dist: pytest; extra == "dev"
38
41
  Provides-Extra: train
@@ -83,6 +83,8 @@ batchalign/pipelines/morphosyntax/fr/case.py
83
83
  batchalign/pipelines/morphosyntax/ja/verbforms.py
84
84
  batchalign/pipelines/speaker/__init__.py
85
85
  batchalign/pipelines/speaker/nemo_speaker.py
86
+ batchalign/pipelines/translate/__init__.py
87
+ batchalign/pipelines/translate/seamless.py
86
88
  batchalign/pipelines/utr/__init__.py
87
89
  batchalign/pipelines/utr/rev_utr.py
88
90
  batchalign/pipelines/utr/utils.py
@@ -23,6 +23,9 @@ soundfile~=0.12.0
23
23
  rich-click>=1.7.0
24
24
  typing-extensions
25
25
  num2words
26
+ tiktoken
27
+ blobfile
28
+ sentencepiece
26
29
 
27
30
  [dev]
28
31
  pytest
@@ -52,6 +52,9 @@ setup(
52
52
  "rich-click>=1.7.0",
53
53
  "typing-extensions",
54
54
  "num2words",
55
+ "tiktoken",
56
+ "blobfile",
57
+ "sentencepiece"
55
58
  ],
56
59
  extras_require={
57
60
  'dev': [
@@ -1,3 +0,0 @@
1
- 0.7.13-post.1
2
- Feburary 14nd, 2025
3
- Remove hash sign.
File without changes
File without changes
File without changes