batchalign 0.7.18.post12__tar.gz → 0.7.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of batchalign might be problematic. Click here for more details.
- {batchalign-0.7.18.post12/batchalign.egg-info → batchalign-0.7.19}/PKG-INFO +2 -1
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/cli/cli.py +5 -1
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/__init__.py +1 -1
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/asr/__init__.py +1 -0
- batchalign-0.7.19/batchalign/pipelines/asr/oai_whisper.py +97 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/dispatch.py +4 -3
- batchalign-0.7.19/batchalign/version +3 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19/batchalign.egg-info}/PKG-INFO +2 -1
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign.egg-info/SOURCES.txt +1 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign.egg-info/requires.txt +1 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/setup.py +2 -1
- batchalign-0.7.18.post12/batchalign/version +0 -3
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/LICENSE +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/MANIFEST.in +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/README.md +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/__main__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/constants.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/document.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/errors.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utterance/cantonese_infer.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/wave2vec/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/wave2vec/infer_fa.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/asr/num2chinese.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/translate/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/translate/gtrans.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/translate/seamless.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/translate/utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.18.post12 → batchalign-0.7.19}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: batchalign
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.19
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -34,6 +34,7 @@ Requires-Dist: tiktoken
|
|
|
34
34
|
Requires-Dist: blobfile
|
|
35
35
|
Requires-Dist: sentencepiece
|
|
36
36
|
Requires-Dist: googletrans
|
|
37
|
+
Requires-Dist: openai-whisper>=20240930
|
|
37
38
|
Provides-Extra: dev
|
|
38
39
|
Requires-Dist: pytest; extra == "dev"
|
|
39
40
|
Provides-Extra: train
|
|
@@ -144,8 +144,10 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, **kwargs):
|
|
|
144
144
|
|
|
145
145
|
@batchalign.command()
|
|
146
146
|
@common_options
|
|
147
|
+
@click.option("--whisper_oai/--rev",
|
|
148
|
+
default=False, help="Use the OpenAI's Whisper implementation instead of Rev.AI (default).")
|
|
147
149
|
@click.option("--whisper/--rev",
|
|
148
|
-
default=False, help="Use
|
|
150
|
+
default=False, help="Use Huggingface's Whisper implementation instead of Rev.AI (default).")
|
|
149
151
|
@click.option("--whisperx/--rev",
|
|
150
152
|
default=False, help="Use WhisperX instead of Rev.AI (default). Superceeds --whisper.")
|
|
151
153
|
@click.option("--diarize/--nodiarize",
|
|
@@ -169,6 +171,8 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
|
169
171
|
asr = "whisper"
|
|
170
172
|
if kwargs["whisperx"]:
|
|
171
173
|
asr = "whisperx"
|
|
174
|
+
if kwargs["whisper_oai"]:
|
|
175
|
+
asr = "whisper_oai"
|
|
172
176
|
|
|
173
177
|
|
|
174
178
|
def writer(doc, output):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .pipeline import BatchalignPipeline
|
|
2
2
|
from .base import BatchalignEngine
|
|
3
|
-
from .asr import WhisperEngine, RevEngine, WhisperXEngine
|
|
3
|
+
from .asr import WhisperEngine, RevEngine, WhisperXEngine, OAIWhisperEngine
|
|
4
4
|
|
|
5
5
|
from .morphosyntax import StanzaEngine, CorefEngine
|
|
6
6
|
from .cleanup import NgramRetraceEngine, DisfluencyReplacementEngine
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
from batchalign.document import *
|
|
2
|
+
from batchalign.pipelines.base import *
|
|
3
|
+
from batchalign.pipelines.asr.utils import *
|
|
4
|
+
from batchalign.models import WhisperASRModel, BertUtteranceModel, BertCantoneseUtteranceModel
|
|
5
|
+
|
|
6
|
+
import pycountry
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
L = logging.getLogger("batchalign")
|
|
10
|
+
|
|
11
|
+
from batchalign.utils.utils import correct_timing
|
|
12
|
+
from batchalign.models import resolve
|
|
13
|
+
|
|
14
|
+
import whisper
|
|
15
|
+
|
|
16
|
+
class OAIWhisperEngine(BatchalignEngine):
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def tasks(self):
|
|
20
|
+
# if there is no utterance segmentation scheme, we only
|
|
21
|
+
# run ASR
|
|
22
|
+
if self.__engine:
|
|
23
|
+
return [ Task.ASR, Task.UTTERANCE_SEGMENTATION ]
|
|
24
|
+
else:
|
|
25
|
+
return [ Task.ASR ]
|
|
26
|
+
|
|
27
|
+
def __init__(self, model=None, lang="eng"):
|
|
28
|
+
|
|
29
|
+
# try to resolve our internal model
|
|
30
|
+
res = resolve("whisper", lang)
|
|
31
|
+
if res:
|
|
32
|
+
model, base = res
|
|
33
|
+
else:
|
|
34
|
+
model = "openai/whisper-large-v3"
|
|
35
|
+
base = "openai/whisper-large-v3"
|
|
36
|
+
|
|
37
|
+
if lang == "mys":
|
|
38
|
+
language = "Malay"
|
|
39
|
+
else:
|
|
40
|
+
language = pycountry.languages.get(alpha_3=lang).name
|
|
41
|
+
if language == "Yue Chinese":
|
|
42
|
+
language = "Cantonese"
|
|
43
|
+
if "greek" in language.lower():
|
|
44
|
+
language = "Greek"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
self.__whisper = whisper.load_model("turbo")
|
|
48
|
+
self.__lang = lang
|
|
49
|
+
self.__language = language
|
|
50
|
+
|
|
51
|
+
if resolve("utterance", self.__lang) != None:
|
|
52
|
+
L.debug("Initializing utterance model...")
|
|
53
|
+
if lang != "yue":
|
|
54
|
+
self.__engine = BertUtteranceModel(resolve("utterance", lang))
|
|
55
|
+
else:
|
|
56
|
+
# we have special inference procedure for cantonese
|
|
57
|
+
self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
|
|
58
|
+
L.debug("Done.")
|
|
59
|
+
else:
|
|
60
|
+
self.__engine = None
|
|
61
|
+
|
|
62
|
+
def generate(self, source_path, **kwargs):
|
|
63
|
+
res = self.__whisper.transcribe(source_path,
|
|
64
|
+
word_timestamps=True,
|
|
65
|
+
language=self.__language)
|
|
66
|
+
turns = []
|
|
67
|
+
for i in res["segments"]:
|
|
68
|
+
turn = []
|
|
69
|
+
for j in i["words"]:
|
|
70
|
+
turn.append({
|
|
71
|
+
"type": "text",
|
|
72
|
+
"ts": j["start"],
|
|
73
|
+
"end_ts": j["end"],
|
|
74
|
+
"value": j["word"]
|
|
75
|
+
})
|
|
76
|
+
turns.append({
|
|
77
|
+
"elements": turn,
|
|
78
|
+
"speaker": 0
|
|
79
|
+
})
|
|
80
|
+
doc = process_generation({"monologues": turns},
|
|
81
|
+
self.__lang,
|
|
82
|
+
utterance_engine=self.__engine)
|
|
83
|
+
# define media tier
|
|
84
|
+
media = Media(type=MediaType.AUDIO, name=Path(source_path).stem, url=source_path)
|
|
85
|
+
doc.media = media
|
|
86
|
+
|
|
87
|
+
return correct_timing(doc)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# model="openai/whisper-large-v2", language="english"
|
|
91
|
+
|
|
92
|
+
# e = WhisperEngine()
|
|
93
|
+
# tmp = e.generate("./batchalign/tests/pipelines/asr/support/test.mp3", 1)
|
|
94
|
+
# tmp.model_dump()
|
|
95
|
+
# file = "./batchalign/tests/pipelines/asr/support/test.mp3"
|
|
96
|
+
|
|
97
|
+
|
|
@@ -7,7 +7,7 @@ from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
|
|
|
7
7
|
NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
|
|
8
8
|
RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
|
|
9
9
|
StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel,
|
|
10
|
-
GoogleTranslateEngine)
|
|
10
|
+
GoogleTranslateEngine, OAIWhisperEngine)
|
|
11
11
|
from batchalign import BatchalignPipeline
|
|
12
12
|
from batchalign.models import resolve
|
|
13
13
|
|
|
@@ -19,7 +19,7 @@ L = logging.getLogger("batchalign")
|
|
|
19
19
|
|
|
20
20
|
# default for all languages
|
|
21
21
|
DEFAULT_PACKAGES = {
|
|
22
|
-
"asr": "
|
|
22
|
+
"asr": "whisper_oai",
|
|
23
23
|
"utr": "whisper_utr",
|
|
24
24
|
"fa": "whisper_fa",
|
|
25
25
|
"speaker": "nemo_speaker",
|
|
@@ -135,7 +135,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
|
135
135
|
engines.append(SeamlessTranslationModel())
|
|
136
136
|
elif engine == "gtrans":
|
|
137
137
|
engines.append(GoogleTranslateEngine())
|
|
138
|
-
|
|
138
|
+
elif engine == "whisper_oai":
|
|
139
|
+
engines.append(OAIWhisperEngine())
|
|
139
140
|
|
|
140
141
|
L.debug(f"Done initalizing packages.")
|
|
141
142
|
return BatchalignPipeline(*engines)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: batchalign
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.19
|
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
|
@@ -34,6 +34,7 @@ Requires-Dist: tiktoken
|
|
|
34
34
|
Requires-Dist: blobfile
|
|
35
35
|
Requires-Dist: sentencepiece
|
|
36
36
|
Requires-Dist: googletrans
|
|
37
|
+
Requires-Dist: openai-whisper>=20240930
|
|
37
38
|
Provides-Extra: dev
|
|
38
39
|
Requires-Dist: pytest; extra == "dev"
|
|
39
40
|
Provides-Extra: train
|
|
@@ -59,6 +59,7 @@ batchalign/pipelines/analysis/__init__.py
|
|
|
59
59
|
batchalign/pipelines/analysis/eval.py
|
|
60
60
|
batchalign/pipelines/asr/__init__.py
|
|
61
61
|
batchalign/pipelines/asr/num2chinese.py
|
|
62
|
+
batchalign/pipelines/asr/oai_whisper.py
|
|
62
63
|
batchalign/pipelines/asr/rev.py
|
|
63
64
|
batchalign/pipelines/asr/utils.py
|
|
64
65
|
batchalign/pipelines/asr/whisper.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/models/utterance/cantonese_infer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/parse_support.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/cleanup/support/test.test
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/pipelines/utterance/ud_utterance.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_file.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{batchalign-0.7.18.post12 → batchalign-0.7.19}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|