BatchalignHK 0.7.19.post9__tar.gz → 0.7.19.post11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/PKG-INFO +1 -1
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/PKG-INFO +1 -1
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/tencent.py +86 -59
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/retrace.py +1 -1
- batchalignhk-0.7.19.post11/batchalign/version +3 -0
- batchalignhk-0.7.19.post9/batchalign/version +0 -3
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/SOURCES.txt +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/requires.txt +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/LICENSE +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/MANIFEST.in +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/README.md +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/cli/cli.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/document.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/dispatch.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/utils/abbrev.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/setup.cfg +0 -0
- {batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/setup.py +0 -0
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/tencent.py
RENAMED
|
@@ -111,6 +111,11 @@ class TencentEngine(BatchalignEngine):
|
|
|
111
111
|
"重復": "重複"
|
|
112
112
|
}
|
|
113
113
|
return word_replacements.get(word, word)
|
|
114
|
+
|
|
115
|
+
@staticmethod
|
|
116
|
+
def is_roman(x):
|
|
117
|
+
"""check if x contains only roman characters"""
|
|
118
|
+
return all(c.isalpha() and ord(c) < 128 for c in x if not c.isspace())
|
|
114
119
|
|
|
115
120
|
def generate(self, f, **kwargs):
|
|
116
121
|
lang = self.__lang
|
|
@@ -119,73 +124,95 @@ class TencentEngine(BatchalignEngine):
|
|
|
119
124
|
# processed_path = self.__preprocess_audio(f)
|
|
120
125
|
# audio = AudioSegment.from_file(processed_path)
|
|
121
126
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
resp = client.CreateRecTask(req)
|
|
127
|
+
L.info(f"Uploading '{pathlib.Path(f).stem}'...")
|
|
128
|
+
# we will send the file for processing
|
|
129
|
+
if not str(f).startswith("http"):
|
|
130
|
+
with open(f, "rb") as image_file:
|
|
131
|
+
encoded_string = base64.b64encode(image_file.read())
|
|
132
|
+
|
|
133
|
+
req = models.CreateRecTaskRequest()
|
|
134
|
+
if lang in {'zho', 'yue', 'wuu', 'nan','hak'}:
|
|
135
|
+
req.EngineModelType = "16k_zh_large"
|
|
136
|
+
else:
|
|
137
|
+
req.EngineModelType = f"16k_{lang}"
|
|
138
|
+
req.ResTextFormat = 1
|
|
139
|
+
req.SpeakerDiarization = 1
|
|
140
|
+
req.ChannelNum = 1
|
|
141
|
+
if not str(f).startswith("http"):
|
|
142
|
+
req.Data = encoded_string.decode('ascii')
|
|
143
|
+
req.SourceType = 1
|
|
144
|
+
else:
|
|
145
|
+
req.Url = f
|
|
146
|
+
req.SourceType = 0
|
|
147
|
+
resp = client.CreateRecTask(req)
|
|
144
148
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
149
|
+
L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
|
|
150
|
+
req = models.DescribeTaskStatusRequest()
|
|
151
|
+
req.TaskId = resp.Data.TaskId
|
|
148
152
|
|
|
153
|
+
res = client.DescribeTaskStatus(req)
|
|
154
|
+
while res.Data.Status not in [2, 3]:
|
|
155
|
+
time.sleep(15)
|
|
149
156
|
res = client.DescribeTaskStatus(req)
|
|
150
|
-
while res.Data.Status not in [2, 3]:
|
|
151
|
-
time.sleep(15)
|
|
152
|
-
res = client.DescribeTaskStatus(req)
|
|
153
|
-
|
|
154
|
-
if res.Data.Status in ["3", 3]:
|
|
155
|
-
raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
|
|
156
|
-
|
|
157
|
-
turns = []
|
|
158
|
-
for i in res.Data.ResultDetail:
|
|
159
|
-
turn = []
|
|
160
|
-
start = i.StartMs
|
|
161
|
-
for j in i.Words:
|
|
162
|
-
word = j.Word
|
|
163
|
-
if self.__lang == "yue":
|
|
164
|
-
word = cc.convert(word)
|
|
165
|
-
|
|
166
|
-
word = self.replace_cantonese_words(word)
|
|
167
157
|
|
|
158
|
+
if res.Data.Status in ["3", 3]:
|
|
159
|
+
raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
|
|
160
|
+
|
|
161
|
+
turns = []
|
|
162
|
+
for i in res.Data.ResultDetail:
|
|
163
|
+
turn = []
|
|
164
|
+
start = i.StartMs
|
|
165
|
+
roman_cache = ""
|
|
166
|
+
roman_cache_start = i.StartMs
|
|
167
|
+
roman_cache_end = i.StartMs
|
|
168
|
+
for j in i.Words:
|
|
169
|
+
word = j.Word
|
|
170
|
+
if self.__lang == "yue":
|
|
171
|
+
word = cc.convert(word)
|
|
172
|
+
|
|
173
|
+
word = self.replace_cantonese_words(word)
|
|
174
|
+
|
|
175
|
+
if self.is_roman(word):
|
|
176
|
+
if roman_cache == "":
|
|
177
|
+
roman_cache_start = (j.OffsetStartMs + start)
|
|
178
|
+
roman_cache = roman_cache + word
|
|
179
|
+
roman_cache_end = (j.OffsetEndMs + start)
|
|
180
|
+
else:
|
|
181
|
+
if roman_cache != "":
|
|
182
|
+
turn.append({
|
|
183
|
+
"type": "text",
|
|
184
|
+
"ts": roman_cache_start / 1000,
|
|
185
|
+
"end_ts": roman_cache_end / 1000,
|
|
186
|
+
"value": roman_cache
|
|
187
|
+
})
|
|
188
|
+
roman_cache = ""
|
|
168
189
|
turn.append({
|
|
169
190
|
"type": "text",
|
|
170
191
|
"ts": (j.OffsetStartMs + start) / 1000,
|
|
171
192
|
"end_ts": (j.OffsetEndMs + start) / 1000,
|
|
172
193
|
"value": word
|
|
173
194
|
})
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
195
|
+
|
|
196
|
+
if roman_cache != "":
|
|
197
|
+
turn.append({
|
|
198
|
+
"type": "text",
|
|
199
|
+
"ts": roman_cache_start / 1000,
|
|
200
|
+
"end_ts": roman_cache_end / 1000,
|
|
201
|
+
"value": roman_cache
|
|
177
202
|
})
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
203
|
+
|
|
204
|
+
turns.append({
|
|
205
|
+
"elements": turn,
|
|
206
|
+
"speaker": i.SpeakerId
|
|
207
|
+
})
|
|
208
|
+
L.debug(f"Tencent done.")
|
|
209
|
+
|
|
210
|
+
# Extract the text from the small volume parts for translation
|
|
211
|
+
|
|
212
|
+
doc = process_generation({"monologues": turns},
|
|
213
|
+
self.__lang_code,
|
|
214
|
+
utterance_engine=self.__engine)
|
|
215
|
+
media = Media(type=MediaType.AUDIO, name=Path(f).stem, url=f)
|
|
216
|
+
doc.media = media
|
|
217
|
+
return doc
|
|
218
|
+
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/retrace.py
RENAMED
|
@@ -22,7 +22,7 @@ class NgramRetraceEngine(BatchalignEngine):
|
|
|
22
22
|
if i.type in [TokenType.REGULAR, TokenType.PUNCT, TokenType.FP]:
|
|
23
23
|
content.append(i)
|
|
24
24
|
# scan for n-gram retraces
|
|
25
|
-
for n in range(1, len(content)):
|
|
25
|
+
for n in range(1 if "yue" not in doc.langs and "zho" not in doc.langs else 2, len(content)):
|
|
26
26
|
begin = 0
|
|
27
27
|
while begin < len(content)-(n):
|
|
28
28
|
# get the n gram info; we convert it to
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/BatchalignHK.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/chat/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/chat/generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/textgrid/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/textgrid/file.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/textgrid/generator.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/formats/textgrid/parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/speaker/config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/training/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/training/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/dataset.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/execute.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/infer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/prep.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/utterance/train.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/wave2vec/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/wave2vec/infer_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/whisper/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/whisper/infer_asr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/models/whisper/infer_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/analysis/eval.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/num2chinese.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/oai_whisper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/whisper.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/asr/whisperx.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/cleanup/cleanup.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/fa/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/fa/wave2vec_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/fa/whisper_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/morphosyntax/ud.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/speaker/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/translate/gtrans.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/translate/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utr/rev_utr.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utr/whisper_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/fixures.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post9 → batchalignhk-0.7.19.post11}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|