BatchalignHK 0.7.19.post7__tar.gz → 0.7.19.post9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/BatchalignHK.egg-info/PKG-INFO +2 -3
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/BatchalignHK.egg-info/SOURCES.txt +1 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/PKG-INFO +2 -3
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/document.py +6 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/tencent.py +17 -72
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/ud.py +1 -0
- batchalignhk-0.7.19.post9/batchalign/utils/abbrev.py +182 -0
- batchalignhk-0.7.19.post9/batchalign/version +3 -0
- batchalignhk-0.7.19.post7/batchalign/version +0 -3
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/BatchalignHK.egg-info/dependency_links.txt +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/BatchalignHK.egg-info/entry_points.txt +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/BatchalignHK.egg-info/requires.txt +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/BatchalignHK.egg-info/top_level.txt +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/LICENSE +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/MANIFEST.in +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/README.md +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/__main__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/cli/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/cli/cli.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/cli/dispatch.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/constants.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/errors.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/base.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/chat/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/chat/file.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/chat/generator.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/chat/lexer.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/chat/parser.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/chat/utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/textgrid/file.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/resolve.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/speaker/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/speaker/config.yaml +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/speaker/infer.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/speaker/utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/training/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/training/run.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/training/utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/dataset.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/execute.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/infer.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/prep.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/train.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/whisper/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/oai_whisper.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/base.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/dispatch.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/pipeline.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/conftest.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/test_document.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/utils/__init__.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/utils/config.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/utils/dp.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/utils/names.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/utils/utils.py +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/setup.cfg +0 -0
- {batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.19.
|
|
3
|
+
Version: 0.7.19.post9
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -50,7 +50,6 @@ Dynamic: author-email
|
|
|
50
50
|
Dynamic: classifier
|
|
51
51
|
Dynamic: description
|
|
52
52
|
Dynamic: description-content-type
|
|
53
|
-
Dynamic: license-file
|
|
54
53
|
Dynamic: provides-extra
|
|
55
54
|
Dynamic: requires-dist
|
|
56
55
|
Dynamic: summary
|
|
@@ -115,6 +115,7 @@ batchalign/tests/pipelines/cleanup/test_disfluency.py
|
|
|
115
115
|
batchalign/tests/pipelines/cleanup/test_parse_support.py
|
|
116
116
|
batchalign/tests/pipelines/fa/test_fa_pipeline.py
|
|
117
117
|
batchalign/utils/__init__.py
|
|
118
|
+
batchalign/utils/abbrev.py
|
|
118
119
|
batchalign/utils/config.py
|
|
119
120
|
batchalign/utils/dp.py
|
|
120
121
|
batchalign/utils/names.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: BatchalignHK
|
|
3
|
-
Version: 0.7.19.
|
|
3
|
+
Version: 0.7.19.post9
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -50,7 +50,6 @@ Dynamic: author-email
|
|
|
50
50
|
Dynamic: classifier
|
|
51
51
|
Dynamic: description
|
|
52
52
|
Dynamic: description-content-type
|
|
53
|
-
Dynamic: license-file
|
|
54
53
|
Dynamic: provides-extra
|
|
55
54
|
Dynamic: requires-dist
|
|
56
55
|
Dynamic: summary
|
|
@@ -11,6 +11,7 @@ from pathlib import Path
|
|
|
11
11
|
|
|
12
12
|
from batchalign.errors import *
|
|
13
13
|
from batchalign.constants import *
|
|
14
|
+
from batchalign.utils.abbrev import abbrev
|
|
14
15
|
|
|
15
16
|
import re
|
|
16
17
|
|
|
@@ -223,6 +224,11 @@ class Utterance(BaseModel):
|
|
|
223
224
|
t = re.sub(r",", " , ", t.strip()).strip()
|
|
224
225
|
t = re.sub(r" +", " ", t.strip()).strip()
|
|
225
226
|
t = t.replace("+ ,", "+,").strip()
|
|
227
|
+
|
|
228
|
+
abbrevs = [" " .join(list(i)) for i in abbrev]
|
|
229
|
+
for i in abbrevs:
|
|
230
|
+
t = t.replace(i, i.replace(" ", ""))
|
|
231
|
+
|
|
226
232
|
return t
|
|
227
233
|
|
|
228
234
|
def __repr__(self):
|
|
@@ -21,8 +21,8 @@ import tempfile
|
|
|
21
21
|
import pycountry
|
|
22
22
|
import numpy as np
|
|
23
23
|
import soundfile as sf
|
|
24
|
-
from pydub import AudioSegment
|
|
25
|
-
from pydub.effects import normalize
|
|
24
|
+
# from pydub import AudioSegment
|
|
25
|
+
# from pydub.effects import normalize
|
|
26
26
|
import base64
|
|
27
27
|
from tencentcloud.common.credential import Credential
|
|
28
28
|
from tencentcloud.asr.v20190614.asr_client import AsrClient, models
|
|
@@ -30,9 +30,9 @@ from tencentcloud.asr.v20190614.asr_client import AsrClient, models
|
|
|
30
30
|
import asyncio
|
|
31
31
|
import tempfile
|
|
32
32
|
import os
|
|
33
|
-
from pydub import AudioSegment
|
|
34
|
-
from pydub.effects import normalize
|
|
35
|
-
from pydub.exceptions import CouldntDecodeError
|
|
33
|
+
# from pydub import AudioSegment
|
|
34
|
+
# from pydub.effects import normalize
|
|
35
|
+
# from pydub.exceptions import CouldntDecodeError
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
import logging
|
|
@@ -77,66 +77,6 @@ class TencentEngine(BatchalignEngine):
|
|
|
77
77
|
L.debug("Done.")
|
|
78
78
|
else:
|
|
79
79
|
self.__engine = None
|
|
80
|
-
|
|
81
|
-
def __preprocess_audio(self, input_path):
|
|
82
|
-
"""Enhanced audio preprocessing for low-volume speech"""
|
|
83
|
-
try:
|
|
84
|
-
L.info(f"Optimizing audio for ASR: {input_path}")
|
|
85
|
-
|
|
86
|
-
# read the audio file
|
|
87
|
-
audio = AudioSegment.from_file(input_path)
|
|
88
|
-
|
|
89
|
-
audio = audio.set_channels(1)
|
|
90
|
-
audio = audio.set_frame_rate(16000)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
audio = audio.compress_dynamic_range(
|
|
94
|
-
threshold=-40,
|
|
95
|
-
ratio=3,
|
|
96
|
-
attack=5,
|
|
97
|
-
release=100
|
|
98
|
-
)
|
|
99
|
-
audio = audio.low_pass_filter(4000) # filter out high frequencies
|
|
100
|
-
audio = audio.normalize(headroom=2) # keep the headroom
|
|
101
|
-
audio = audio.compress_dynamic_range(
|
|
102
|
-
threshold=-55,
|
|
103
|
-
ratio=6,
|
|
104
|
-
attack=15,
|
|
105
|
-
release=200
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
# enhance low volume
|
|
109
|
-
audio = audio.high_pass_filter(80)
|
|
110
|
-
boosted = audio.high_pass_filter(1000).apply_gain(+4)
|
|
111
|
-
audio = audio.overlay(boosted)
|
|
112
|
-
|
|
113
|
-
if L.level <= logging.DEBUG:
|
|
114
|
-
self.__print_audio_stats(audio)
|
|
115
|
-
|
|
116
|
-
# output to a temporary file
|
|
117
|
-
temp_fd, temp_path = tempfile.mkstemp(suffix=".mp3")
|
|
118
|
-
os.close(temp_fd)
|
|
119
|
-
audio.export(
|
|
120
|
-
temp_path,
|
|
121
|
-
format="mp3",
|
|
122
|
-
codec="libmp3lame",
|
|
123
|
-
bitrate="96k",
|
|
124
|
-
tags={"title": "BA_Optimized"},
|
|
125
|
-
parameters=[
|
|
126
|
-
"-compression_level", "2",
|
|
127
|
-
"-reservoir", "0",
|
|
128
|
-
"-joint_stereo", "0"
|
|
129
|
-
]
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
return temp_path
|
|
133
|
-
|
|
134
|
-
except CouldntDecodeError:
|
|
135
|
-
L.error(f"Audio decoding failed: {input_path}")
|
|
136
|
-
return input_path
|
|
137
|
-
except Exception as e:
|
|
138
|
-
L.error(f"Audio processing error: {str(e)}")
|
|
139
|
-
return input_path
|
|
140
80
|
|
|
141
81
|
def replace_cantonese_words(self, word):
|
|
142
82
|
"""Function to replace Cantonese words with custom replacements."""
|
|
@@ -176,13 +116,15 @@ class TencentEngine(BatchalignEngine):
|
|
|
176
116
|
lang = self.__lang
|
|
177
117
|
client = self.__client
|
|
178
118
|
|
|
179
|
-
processed_path = self.__preprocess_audio(f)
|
|
180
|
-
audio = AudioSegment.from_file(processed_path)
|
|
119
|
+
# processed_path = self.__preprocess_audio(f)
|
|
120
|
+
# audio = AudioSegment.from_file(processed_path)
|
|
181
121
|
|
|
182
122
|
try:
|
|
183
123
|
L.info(f"Uploading '{pathlib.Path(f).stem}'...")
|
|
184
|
-
|
|
185
|
-
|
|
124
|
+
# we will send the file for processing
|
|
125
|
+
if not str(f).startswith("http"):
|
|
126
|
+
with open(f, "rb") as image_file:
|
|
127
|
+
encoded_string = base64.b64encode(image_file.read())
|
|
186
128
|
|
|
187
129
|
req = models.CreateRecTaskRequest()
|
|
188
130
|
if lang in {'zho', 'yue', 'wuu', 'nan','hak'}:
|
|
@@ -192,9 +134,12 @@ class TencentEngine(BatchalignEngine):
|
|
|
192
134
|
req.ResTextFormat = 1
|
|
193
135
|
req.SpeakerDiarization = 1
|
|
194
136
|
req.ChannelNum = 1
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
137
|
+
if not str(f).startswith("http"):
|
|
138
|
+
req.Data = encoded_string.decode('ascii')
|
|
139
|
+
req.SourceType = 1
|
|
140
|
+
else:
|
|
141
|
+
req.Url = f
|
|
142
|
+
req.SourceType = 0
|
|
198
143
|
resp = client.CreateRecTask(req)
|
|
199
144
|
|
|
200
145
|
L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
abbrev = [
|
|
2
|
+
"FBI",
|
|
3
|
+
"CIA",
|
|
4
|
+
"NSA",
|
|
5
|
+
"NATO",
|
|
6
|
+
"UN",
|
|
7
|
+
"WHO",
|
|
8
|
+
"NASA",
|
|
9
|
+
"CDC",
|
|
10
|
+
"IRS",
|
|
11
|
+
"EPA",
|
|
12
|
+
"HTTP",
|
|
13
|
+
"URL",
|
|
14
|
+
"HTML",
|
|
15
|
+
"CSS",
|
|
16
|
+
"API",
|
|
17
|
+
"IP",
|
|
18
|
+
"DNS",
|
|
19
|
+
"SQL",
|
|
20
|
+
"USB",
|
|
21
|
+
"VPN",
|
|
22
|
+
"ATT",
|
|
23
|
+
"AT&T",
|
|
24
|
+
"CEO",
|
|
25
|
+
"CFO",
|
|
26
|
+
"COO",
|
|
27
|
+
"IPO",
|
|
28
|
+
"ROI",
|
|
29
|
+
"GDP",
|
|
30
|
+
"LLC",
|
|
31
|
+
"HR",
|
|
32
|
+
"M&",
|
|
33
|
+
"KPI",
|
|
34
|
+
"GPA",
|
|
35
|
+
"SAT",
|
|
36
|
+
"ACT",
|
|
37
|
+
"MBA",
|
|
38
|
+
"PhD",
|
|
39
|
+
"BA",
|
|
40
|
+
"MA",
|
|
41
|
+
"STEM",
|
|
42
|
+
"ESL",
|
|
43
|
+
"GED",
|
|
44
|
+
"AWOL",
|
|
45
|
+
"MIA",
|
|
46
|
+
"POW",
|
|
47
|
+
"IED",
|
|
48
|
+
"UAV",
|
|
49
|
+
"RPG",
|
|
50
|
+
"NATO",
|
|
51
|
+
"SEAL",
|
|
52
|
+
"JAG",
|
|
53
|
+
"ROTC",
|
|
54
|
+
"CERN",
|
|
55
|
+
"GMO",
|
|
56
|
+
"H2O",
|
|
57
|
+
"CO2",
|
|
58
|
+
"UV",
|
|
59
|
+
"IR",
|
|
60
|
+
"AI",
|
|
61
|
+
"VR",
|
|
62
|
+
"AR",
|
|
63
|
+
"NPR",
|
|
64
|
+
"BBC",
|
|
65
|
+
"MTV",
|
|
66
|
+
"CNN",
|
|
67
|
+
"HBO",
|
|
68
|
+
"ESPN",
|
|
69
|
+
"TMZ",
|
|
70
|
+
"AMC",
|
|
71
|
+
"IMAX",
|
|
72
|
+
"WWE",
|
|
73
|
+
"ASAP",
|
|
74
|
+
"DIY",
|
|
75
|
+
"ETA",
|
|
76
|
+
"RSVP",
|
|
77
|
+
"FYI",
|
|
78
|
+
"LOL",
|
|
79
|
+
"BRB",
|
|
80
|
+
"IDK",
|
|
81
|
+
"BTW",
|
|
82
|
+
"TMI",
|
|
83
|
+
"PBJ",
|
|
84
|
+
"AIDS",
|
|
85
|
+
"HIV",
|
|
86
|
+
"ADHD",
|
|
87
|
+
"COPD",
|
|
88
|
+
"PTSD",
|
|
89
|
+
"CHF",
|
|
90
|
+
"CAD",
|
|
91
|
+
"TB",
|
|
92
|
+
"UTI",
|
|
93
|
+
"GERD",
|
|
94
|
+
"MRI",
|
|
95
|
+
"CT",
|
|
96
|
+
"ECG",
|
|
97
|
+
"EEG",
|
|
98
|
+
"CBC",
|
|
99
|
+
"BMP",
|
|
100
|
+
"ABG",
|
|
101
|
+
"PFT",
|
|
102
|
+
"FOBT",
|
|
103
|
+
"ENT",
|
|
104
|
+
"OB",
|
|
105
|
+
"PCP",
|
|
106
|
+
"ICU",
|
|
107
|
+
"NICU",
|
|
108
|
+
"ER",
|
|
109
|
+
"OR",
|
|
110
|
+
"PT",
|
|
111
|
+
"OT",
|
|
112
|
+
"EM",
|
|
113
|
+
"OTC",
|
|
114
|
+
"NSAID",
|
|
115
|
+
"IV",
|
|
116
|
+
"IM",
|
|
117
|
+
"SC",
|
|
118
|
+
"PRN",
|
|
119
|
+
"BID",
|
|
120
|
+
"TID",
|
|
121
|
+
"QID",
|
|
122
|
+
"NPO",
|
|
123
|
+
"CNS",
|
|
124
|
+
"PNS",
|
|
125
|
+
"GI",
|
|
126
|
+
"GU",
|
|
127
|
+
"CV",
|
|
128
|
+
"MSK",
|
|
129
|
+
"ENT",
|
|
130
|
+
"BMI",
|
|
131
|
+
"BMR",
|
|
132
|
+
"BP",
|
|
133
|
+
"WBC",
|
|
134
|
+
"RBC",
|
|
135
|
+
"HGB",
|
|
136
|
+
"HCT",
|
|
137
|
+
"PLT",
|
|
138
|
+
"ESR",
|
|
139
|
+
"CRP",
|
|
140
|
+
"LFT",
|
|
141
|
+
"TFT",
|
|
142
|
+
"INR",
|
|
143
|
+
"MMR",
|
|
144
|
+
"DPT",
|
|
145
|
+
"HPV",
|
|
146
|
+
"Tdap",
|
|
147
|
+
"BCG",
|
|
148
|
+
"IPV",
|
|
149
|
+
"HBV",
|
|
150
|
+
"HAV",
|
|
151
|
+
"HCV",
|
|
152
|
+
"RSV",
|
|
153
|
+
"SOAP",
|
|
154
|
+
"DNR",
|
|
155
|
+
"AMA",
|
|
156
|
+
"LOS",
|
|
157
|
+
"EHR",
|
|
158
|
+
"EMR",
|
|
159
|
+
"ICD",
|
|
160
|
+
"CPT",
|
|
161
|
+
"HIPAA",
|
|
162
|
+
"HR",
|
|
163
|
+
"RR",
|
|
164
|
+
"SpO2",
|
|
165
|
+
"MAP",
|
|
166
|
+
"GFR",
|
|
167
|
+
"A1C",
|
|
168
|
+
"LDL",
|
|
169
|
+
"HDL",
|
|
170
|
+
"TG",
|
|
171
|
+
"BUN",
|
|
172
|
+
"SIDS",
|
|
173
|
+
"DVT",
|
|
174
|
+
"PE",
|
|
175
|
+
"ARDS",
|
|
176
|
+
"SLE",
|
|
177
|
+
"RA",
|
|
178
|
+
"TIA",
|
|
179
|
+
"CVA",
|
|
180
|
+
"ALS",
|
|
181
|
+
"MS",
|
|
182
|
+
]
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/BatchalignHK.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/BatchalignHK.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/chat/generator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/textgrid/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/textgrid/generator.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/formats/textgrid/parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/speaker/config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/training/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/dataset.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/execute.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/infer.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/utterance/train.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/wave2vec/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/wave2vec/infer_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/whisper/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/whisper/infer_asr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/models/whisper/infer_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/analysis/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/analysis/eval.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/num2chinese.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/oai_whisper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/asr/whisperx.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/cleanup.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/cleanup/retrace.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/fa/wave2vec_fa.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/fa/whisper_fa.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/speaker/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/translate/__init__.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/translate/gtrans.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/translate/seamless.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/translate/utils.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utr/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utr/whisper_utr.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/pipelines/utterance/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/fixures.py
RENAMED
|
File without changes
|
{batchalignhk-0.7.19.post7 → batchalignhk-0.7.19.post9}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|