batchalign 0.7.13__tar.gz → 0.7.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.13/batchalign.egg-info → batchalign-0.7.14}/PKG-INFO +4 -1
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/cli/cli.py +22 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/cli/dispatch.py +1 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/document.py +4 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/chat/generator.py +2 -1
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/chat/parser.py +5 -1
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/__init__.py +1 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/dispatch.py +4 -1
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/ud.py +1 -0
- batchalign-0.7.14/batchalign/pipelines/translate/__init__.py +1 -0
- batchalign-0.7.14/batchalign/pipelines/translate/seamless.py +53 -0
- batchalign-0.7.14/batchalign/version +3 -0
- {batchalign-0.7.13 → batchalign-0.7.14/batchalign.egg-info}/PKG-INFO +4 -1
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign.egg-info/SOURCES.txt +2 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign.egg-info/requires.txt +3 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/setup.py +3 -0
- batchalign-0.7.13/batchalign/version +0 -3
- {batchalign-0.7.13 → batchalign-0.7.14}/LICENSE +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/MANIFEST.in +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/README.md +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/__main__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/constants.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/errors.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.13 → batchalign-0.7.14}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.14
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -33,6 +33,9 @@ Requires-Dist: soundfile~=0.12.0
|
|
33
33
|
Requires-Dist: rich-click>=1.7.0
|
34
34
|
Requires-Dist: typing-extensions
|
35
35
|
Requires-Dist: num2words
|
36
|
+
Requires-Dist: tiktoken
|
37
|
+
Requires-Dist: blobfile
|
38
|
+
Requires-Dist: sentencepiece
|
36
39
|
Provides-Extra: dev
|
37
40
|
Requires-Dist: pytest; extra == "dev"
|
38
41
|
Provides-Extra: train
|
@@ -196,6 +196,28 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
196
196
|
loader, writer, C,
|
197
197
|
asr=asr, **kwargs)
|
198
198
|
|
199
|
+
#################### TRANSLATE ################################
|
200
|
+
|
201
|
+
@batchalign.command()
|
202
|
+
@common_options
|
203
|
+
@click.pass_context
|
204
|
+
def translate(ctx, in_dir, out_dir, **kwargs):
|
205
|
+
"""Translate the transcript to English."""
|
206
|
+
|
207
|
+
def loader(file):
|
208
|
+
cf = CHATFile(path=os.path.abspath(file), special_mor_=True)
|
209
|
+
doc = cf.doc
|
210
|
+
# if str(cf).count("%mor") > 0:
|
211
|
+
# doc.ba_special_["special_mor_notation"] = True
|
212
|
+
return doc
|
213
|
+
|
214
|
+
def writer(doc, output):
|
215
|
+
CHATFile(doc=doc).write(output)
|
216
|
+
|
217
|
+
_dispatch("translate", "eng", 1, ["cha"], ctx,
|
218
|
+
in_dir, out_dir,
|
219
|
+
loader, writer, C)
|
220
|
+
|
199
221
|
#################### MORPHOTAG ################################
|
200
222
|
|
201
223
|
@batchalign.command()
|
@@ -31,6 +31,7 @@ class Task(IntEnum):
|
|
31
31
|
MORPHOSYNTAX = 11
|
32
32
|
COREF = 12
|
33
33
|
WER = 13
|
34
|
+
TRANSLATE = 14
|
34
35
|
|
35
36
|
|
36
37
|
DEBUG__G = 0
|
@@ -54,6 +55,7 @@ TypeMap = {
|
|
54
55
|
Task.DISFLUENCY_ANALYSIS: TaskType.PROCESSING,
|
55
56
|
Task.COREF: TaskType.PROCESSING,
|
56
57
|
Task.WER: TaskType.ANALYSIS,
|
58
|
+
Task.TRANSLATE: TaskType.PROCESSING,
|
57
59
|
|
58
60
|
Task.DEBUG__G: TaskType.GENERATION,
|
59
61
|
Task.DEBUG__P: TaskType.PROCESSING,
|
@@ -73,6 +75,7 @@ TaskFriendlyName = {
|
|
73
75
|
Task.DISFLUENCY_ANALYSIS: "Disfluncy Analysis",
|
74
76
|
Task.COREF: "Coreference Resolution",
|
75
77
|
Task.WER: "Word Error Rate",
|
78
|
+
Task.TRANSLATE: "Translation",
|
76
79
|
Task.DEBUG__G: "TEST_GENERATION",
|
77
80
|
Task.DEBUG__P: "TEST_PROCESSING",
|
78
81
|
Task.DEBUG__A: "TEST_ANALYSIS",
|
@@ -150,6 +153,7 @@ class Utterance(BaseModel):
|
|
150
153
|
tier: Tier = Field(default=Tier())
|
151
154
|
content: Sentence
|
152
155
|
text: Optional[str] = Field(default=None)
|
156
|
+
translation: Optional[str] = Field(default=None)
|
153
157
|
time: Optional[Tuple[int,int]] = Field(default=None)
|
154
158
|
custom_dependencies: List[CustomLine] = Field(default=[])
|
155
159
|
|
@@ -95,7 +95,8 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
|
|
95
95
|
result.append("%wor:\t"+" ".join(wor_elems))
|
96
96
|
if has_coref:
|
97
97
|
result.append("%coref:\t"+" ".join(coref_elems))
|
98
|
-
|
98
|
+
if utterance.translation != None:
|
99
|
+
result.append("%xtra:\t"+utterance.translation)
|
99
100
|
|
100
101
|
|
101
102
|
#### EXTRA LINE GENERATION ####
|
@@ -280,6 +280,7 @@ def chat_parse_doc(lines, special_mor=False):
|
|
280
280
|
mor = None
|
281
281
|
gra = None
|
282
282
|
wor = None
|
283
|
+
translation = None
|
283
284
|
additional = []
|
284
285
|
|
285
286
|
while raw[0][0] == "%":
|
@@ -291,6 +292,8 @@ def chat_parse_doc(lines, special_mor=False):
|
|
291
292
|
gra = line
|
292
293
|
elif beg.strip() == "wor" or beg.strip() == "xwor":
|
293
294
|
wor = line
|
295
|
+
elif beg.strip() == "xtra":
|
296
|
+
translation = line
|
294
297
|
else:
|
295
298
|
additional.append(CustomLine(id=beg.strip(),
|
296
299
|
type=CustomLineType.DEPENDENT,
|
@@ -309,7 +312,8 @@ def chat_parse_doc(lines, special_mor=False):
|
|
309
312
|
"content": parsed,
|
310
313
|
"text": text,
|
311
314
|
"delim": delim,
|
312
|
-
"custom_dependencies": additional
|
315
|
+
"custom_dependencies": additional,
|
316
|
+
"translation": translation
|
313
317
|
})
|
314
318
|
|
315
319
|
timing = re.findall(rf"\x15(\d+)_(\d+)\x15", text)
|
@@ -6,7 +6,7 @@ Tabulate default packages and options.
|
|
6
6
|
from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
|
7
7
|
NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
|
8
8
|
RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
|
9
|
-
StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine)
|
9
|
+
StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel)
|
10
10
|
from batchalign import BatchalignPipeline
|
11
11
|
from batchalign.models import resolve
|
12
12
|
|
@@ -28,6 +28,7 @@ DEFAULT_PACKAGES = {
|
|
28
28
|
"eval": "evaluation",
|
29
29
|
"utterance": "stanza_utt",
|
30
30
|
"coref": "stanza_coref",
|
31
|
+
"translate": "seamless_translate",
|
31
32
|
}
|
32
33
|
|
33
34
|
LANGUAGE_OVERRIDE_PACKAGES = {
|
@@ -129,6 +130,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
129
130
|
engines.append(CorefEngine())
|
130
131
|
elif engine == "wav2vec_fa":
|
131
132
|
engines.append(Wave2VecFAEngine())
|
133
|
+
elif engine == "seamless_translate":
|
134
|
+
engines.append(SeamlessTranslationModel())
|
132
135
|
|
133
136
|
L.debug(f"Done initalizing packages.")
|
134
137
|
return BatchalignPipeline(*engines)
|
@@ -826,6 +826,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
826
826
|
line_cut = line_cut.replace("+//", "")
|
827
827
|
line_cut = line_cut.replace("+...", "")
|
828
828
|
line_cut = line_cut.replace("_", "")
|
829
|
+
line_cut = line_cut.replace("#", "")
|
829
830
|
|
830
831
|
# xbxxx is a sepecial xxx-class token to mark
|
831
832
|
# special form markers, used for processing later
|
@@ -0,0 +1 @@
|
|
1
|
+
from .seamless import SeamlessTranslationModel
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from batchalign.models import WhisperFAModel
|
2
|
+
from batchalign.document import *
|
3
|
+
from batchalign.pipelines.base import *
|
4
|
+
from batchalign.utils import *
|
5
|
+
from batchalign.utils.dp import *
|
6
|
+
from batchalign.constants import *
|
7
|
+
|
8
|
+
from transformers import AutoProcessor, SeamlessM4TModel
|
9
|
+
|
10
|
+
import logging
|
11
|
+
L = logging.getLogger("batchalign")
|
12
|
+
|
13
|
+
import re
|
14
|
+
|
15
|
+
# !uv pip install sentencepiece
|
16
|
+
|
17
|
+
import pycountry
|
18
|
+
import warnings
|
19
|
+
|
20
|
+
class SeamlessTranslationModel(BatchalignEngine):
|
21
|
+
tasks = [ Task.TRANSLATE ]
|
22
|
+
|
23
|
+
def _hook_status(self, status_hook):
|
24
|
+
self.status_hook = status_hook
|
25
|
+
|
26
|
+
def __init__(self):
|
27
|
+
self.status_hook = None
|
28
|
+
self.processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
|
29
|
+
self.model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
|
30
|
+
|
31
|
+
def process(self, doc:Document, **kwargs):
|
32
|
+
|
33
|
+
for indx, i in enumerate(doc.content):
|
34
|
+
if not isinstance(i, Utterance):
|
35
|
+
continue
|
36
|
+
if i.translation:
|
37
|
+
continue
|
38
|
+
|
39
|
+
text = i.strip(join_with_spaces=False, include_retrace=True, include_fp=True)
|
40
|
+
text_inputs = self.processor(text=text, src_lang=doc.langs[0] if doc.langs[0] != "zho" else "cmn", return_tensors="pt")
|
41
|
+
output_tokens = self.model.generate(**text_inputs, tgt_lang="eng", generate_speech=False)
|
42
|
+
translated_text_from_text = self.processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
|
43
|
+
|
44
|
+
i.translation = translated_text_from_text
|
45
|
+
for j in MOR_PUNCT + ENDING_PUNCT:
|
46
|
+
i.translation = i.translation.replace(j, " "+j)
|
47
|
+
|
48
|
+
if self.status_hook != None:
|
49
|
+
self.status_hook(indx+1, len(doc.content))
|
50
|
+
|
51
|
+
return doc
|
52
|
+
|
53
|
+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.14
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -33,6 +33,9 @@ Requires-Dist: soundfile~=0.12.0
|
|
33
33
|
Requires-Dist: rich-click>=1.7.0
|
34
34
|
Requires-Dist: typing-extensions
|
35
35
|
Requires-Dist: num2words
|
36
|
+
Requires-Dist: tiktoken
|
37
|
+
Requires-Dist: blobfile
|
38
|
+
Requires-Dist: sentencepiece
|
36
39
|
Provides-Extra: dev
|
37
40
|
Requires-Dist: pytest; extra == "dev"
|
38
41
|
Provides-Extra: train
|
@@ -83,6 +83,8 @@ batchalign/pipelines/morphosyntax/fr/case.py
|
|
83
83
|
batchalign/pipelines/morphosyntax/ja/verbforms.py
|
84
84
|
batchalign/pipelines/speaker/__init__.py
|
85
85
|
batchalign/pipelines/speaker/nemo_speaker.py
|
86
|
+
batchalign/pipelines/translate/__init__.py
|
87
|
+
batchalign/pipelines/translate/seamless.py
|
86
88
|
batchalign/pipelines/utr/__init__.py
|
87
89
|
batchalign/pipelines/utr/rev_utr.py
|
88
90
|
batchalign/pipelines/utr/utils.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.13 → batchalign-0.7.14}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.13 → batchalign-0.7.14}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|