BatchalignHK 0.7.17.post15__tar.gz → 0.7.17.post17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/PKG-INFO +4 -2
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/requires.txt +1 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/PKG-INFO +4 -2
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/cli/cli.py +3 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/cli/dispatch.py +14 -3
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/tencent.py +14 -6
- batchalignhk-0.7.17.post17/batchalign/version +3 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/setup.py +1 -0
- batchalignhk-0.7.17.post15/batchalign/version +0 -3
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/SOURCES.txt +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/LICENSE +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/MANIFEST.in +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/README.md +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/document.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/dispatch.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.17.
|
|
3
|
+
Version: 0.7.17.post17
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -15,6 +15,7 @@ Requires-Dist: torch>=2.6.0
|
|
|
15
15
|
Requires-Dist: torchaudio
|
|
16
16
|
Requires-Dist: hmmlearn==0.3.0
|
|
17
17
|
Requires-Dist: eyed3
|
|
18
|
+
Requires-Dist: opencc-python-reimplemented
|
|
18
19
|
Requires-Dist: pydub
|
|
19
20
|
Requires-Dist: imblearn
|
|
20
21
|
Requires-Dist: plotly>=5.3.0
|
|
@@ -47,6 +48,7 @@ Dynamic: author-email
|
|
|
47
48
|
Dynamic: classifier
|
|
48
49
|
Dynamic: description
|
|
49
50
|
Dynamic: description-content-type
|
|
51
|
+
Dynamic: license-file
|
|
50
52
|
Dynamic: provides-extra
|
|
51
53
|
Dynamic: requires-dist
|
|
52
54
|
Dynamic: summary
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.17.
|
|
3
|
+
Version: 0.7.17.post17
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -15,6 +15,7 @@ Requires-Dist: torch>=2.6.0
|
|
|
15
15
|
Requires-Dist: torchaudio
|
|
16
16
|
Requires-Dist: hmmlearn==0.3.0
|
|
17
17
|
Requires-Dist: eyed3
|
|
18
|
+
Requires-Dist: opencc-python-reimplemented
|
|
18
19
|
Requires-Dist: pydub
|
|
19
20
|
Requires-Dist: imblearn
|
|
20
21
|
Requires-Dist: plotly>=5.3.0
|
|
@@ -47,6 +48,7 @@ Dynamic: author-email
|
|
|
47
48
|
Dynamic: classifier
|
|
48
49
|
Dynamic: description
|
|
49
50
|
Dynamic: description-content-type
|
|
51
|
+
Dynamic: license-file
|
|
50
52
|
Dynamic: provides-extra
|
|
51
53
|
Dynamic: requires-dist
|
|
52
54
|
Dynamic: summary
|
|
@@ -154,6 +154,9 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, **kwargs):
|
|
|
154
154
|
default=False, help="Perform speaker diarization (this flag is ignored with Rev.AI)")
|
|
155
155
|
@click.option("--wor/--nowor",
|
|
156
156
|
default=False, help="Should we write word level alignment line? Default to no.")
|
|
157
|
+
@click.option("--data",
|
|
158
|
+
help="the URL of the data",
|
|
159
|
+
type=str)
|
|
157
160
|
@click.option("--lang",
|
|
158
161
|
help="sample language in three-letter ISO 3166-1 alpha-3 code",
|
|
159
162
|
show_default=True,
|
|
@@ -5,6 +5,7 @@ and actual BatchalignPipeline.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn, BarColumn
|
|
8
|
+
from urllib.parse import urlparse
|
|
8
9
|
|
|
9
10
|
import warnings
|
|
10
11
|
|
|
@@ -63,6 +64,15 @@ def _dispatch(command, lang, num_speakers,
|
|
|
63
64
|
files = []
|
|
64
65
|
outputs = []
|
|
65
66
|
|
|
67
|
+
if kwargs.get("data"):
|
|
68
|
+
url = kwargs.get("data")
|
|
69
|
+
url = urlparse(url)
|
|
70
|
+
if url.scheme == "":
|
|
71
|
+
url = url._replace(scheme="http")
|
|
72
|
+
base = os.path.basename(url.path)
|
|
73
|
+
files.append(url)
|
|
74
|
+
outputs.append(os.path.join(out_dir, base))
|
|
75
|
+
|
|
66
76
|
for basedir, _, fs in os.walk(in_dir):
|
|
67
77
|
for f in fs:
|
|
68
78
|
path = Path(os.path.join(basedir, f))
|
|
@@ -128,7 +138,8 @@ def _dispatch(command, lang, num_speakers,
|
|
|
128
138
|
errors = []
|
|
129
139
|
# create the spinner bars
|
|
130
140
|
for f in files:
|
|
131
|
-
tasks[f] = prog.add_task(Path(f).name
|
|
141
|
+
tasks[f] = prog.add_task(Path(f).name if isinstance(f, str) else Path(f.geturl()).name,
|
|
142
|
+
start=False, processor="")
|
|
132
143
|
|
|
133
144
|
# create pipeline and read files
|
|
134
145
|
baL.debug("Attempting to create BatchalignPipeline for CLI...")
|
|
@@ -152,7 +163,7 @@ def _dispatch(command, lang, num_speakers,
|
|
|
152
163
|
prog.start_task(tasks[file])
|
|
153
164
|
with warnings.catch_warnings(record=True) as w:
|
|
154
165
|
# parse the input format, as needed
|
|
155
|
-
doc = loader(os.path.abspath(file))
|
|
166
|
+
doc = loader(os.path.abspath(file) if isinstance(file, str) else file.geturl())
|
|
156
167
|
# if we ended up with a tuple of length two,
|
|
157
168
|
# that means that the loader requested kwargs
|
|
158
169
|
kw = {}
|
|
@@ -179,7 +190,7 @@ def _dispatch(command, lang, num_speakers,
|
|
|
179
190
|
if len(errors) > 0:
|
|
180
191
|
C.print()
|
|
181
192
|
for file, trcbk, e in errors:
|
|
182
|
-
C.print(f"[bold red]ERROR[/bold red] on file [italic]{os.path.relpath(str(Path(file).absolute()), in_dir)}[/italic]: {escape(str(e))}\n")
|
|
193
|
+
C.print(f"[bold red]ERROR[/bold red] on file [italic]{os.path.relpath(str(Path(file).absolute()), in_dir) if isinstance(file, str) else file.geturl()}[/italic]: {escape(str(e))}\n")
|
|
183
194
|
if ctx.obj["verbose"] == 1:
|
|
184
195
|
C.print(escape(str(trcbk)))
|
|
185
196
|
elif ctx.obj["verbose"] > 1:
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/tencent.py
RENAMED
|
@@ -12,6 +12,9 @@ from batchalign.errors import *
|
|
|
12
12
|
|
|
13
13
|
from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
|
|
14
14
|
|
|
15
|
+
from opencc import OpenCC
|
|
16
|
+
cc = OpenCC('s2hk')
|
|
17
|
+
|
|
15
18
|
import time
|
|
16
19
|
import pathlib
|
|
17
20
|
import pycountry
|
|
@@ -74,16 +77,21 @@ class TencentEngine(BatchalignEngine):
|
|
|
74
77
|
|
|
75
78
|
L.info(f"Uploading '{pathlib.Path(f).stem}'...")
|
|
76
79
|
# we will send the file for processing
|
|
77
|
-
|
|
78
|
-
|
|
80
|
+
if not str(f).startswith("http"):
|
|
81
|
+
with open(f, "rb") as image_file:
|
|
82
|
+
encoded_string = base64.b64encode(image_file.read())
|
|
79
83
|
|
|
80
84
|
req = models.CreateRecTaskRequest()
|
|
81
85
|
req.EngineModelType = f"16k_{lang}"
|
|
82
86
|
req.ResTextFormat = 1
|
|
83
|
-
req.SourceType = 1
|
|
84
87
|
req.SpeakerDiarization = 1
|
|
85
88
|
req.ChannelNum = 1
|
|
86
|
-
|
|
89
|
+
if not str(f).startswith("http"):
|
|
90
|
+
req.Data = encoded_string.decode('ascii')
|
|
91
|
+
req.SourceType = 1
|
|
92
|
+
else:
|
|
93
|
+
req.Url = f
|
|
94
|
+
req.SourceType = 0
|
|
87
95
|
resp = client.CreateRecTask(req)
|
|
88
96
|
|
|
89
97
|
L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
|
|
@@ -96,7 +104,7 @@ class TencentEngine(BatchalignEngine):
|
|
|
96
104
|
res = client.DescribeTaskStatus(req)
|
|
97
105
|
|
|
98
106
|
# if failed, raise
|
|
99
|
-
if res.Data.Status == "3":
|
|
107
|
+
if res.Data.Status == "3" or res.Data.Status == 3:
|
|
100
108
|
raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
|
|
101
109
|
|
|
102
110
|
turns = []
|
|
@@ -108,7 +116,7 @@ class TencentEngine(BatchalignEngine):
|
|
|
108
116
|
"type": "text",
|
|
109
117
|
"ts": (j.OffsetStartMs+start)/1000,
|
|
110
118
|
"end_ts": (j.OffsetEndMs+start)/1000,
|
|
111
|
-
"value": j.Word
|
|
119
|
+
"value": cc.convert(j.Word)
|
|
112
120
|
})
|
|
113
121
|
turns.append({
|
|
114
122
|
"elements": turn,
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/file.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/generator.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/config.yaml
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/infer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/training/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/training/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/dataset.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/execute.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/infer.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/prep.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/train.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/wave2vec/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/wave2vec/infer_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/infer_asr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/infer_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/analysis/eval.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/num2chinese.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/whisper.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/whisperx.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/cleanup.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/retrace.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/wave2vec_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/whisper_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/ud.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/speaker/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/rev_utr.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/whisper_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/fixures.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|