batchalign 0.7.18.post11__tar.gz → 0.7.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (122) hide show
  1. {batchalign-0.7.18.post11/batchalign.egg-info → batchalign-0.7.19}/PKG-INFO +2 -1
  2. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/cli/cli.py +5 -1
  3. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/__init__.py +1 -1
  4. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/asr/__init__.py +1 -0
  5. batchalign-0.7.19/batchalign/pipelines/asr/oai_whisper.py +97 -0
  6. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/dispatch.py +4 -3
  7. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/fa/wave2vec_fa.py +7 -4
  8. batchalign-0.7.19/batchalign/version +3 -0
  9. {batchalign-0.7.18.post11 → batchalign-0.7.19/batchalign.egg-info}/PKG-INFO +2 -1
  10. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign.egg-info/SOURCES.txt +1 -0
  11. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign.egg-info/requires.txt +1 -0
  12. {batchalign-0.7.18.post11 → batchalign-0.7.19}/setup.py +2 -1
  13. batchalign-0.7.18.post11/batchalign/version +0 -3
  14. {batchalign-0.7.18.post11 → batchalign-0.7.19}/LICENSE +0 -0
  15. {batchalign-0.7.18.post11 → batchalign-0.7.19}/MANIFEST.in +0 -0
  16. {batchalign-0.7.18.post11 → batchalign-0.7.19}/README.md +0 -0
  17. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/__init__.py +0 -0
  18. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/__main__.py +0 -0
  19. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/cli/__init__.py +0 -0
  20. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/cli/dispatch.py +0 -0
  21. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/constants.py +0 -0
  22. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/document.py +0 -0
  23. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/errors.py +0 -0
  24. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/__init__.py +0 -0
  25. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/base.py +0 -0
  26. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/chat/__init__.py +0 -0
  27. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/chat/file.py +0 -0
  28. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/chat/generator.py +0 -0
  29. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/chat/lexer.py +0 -0
  30. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/chat/parser.py +0 -0
  31. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/chat/utils.py +0 -0
  32. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/textgrid/__init__.py +0 -0
  33. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/textgrid/file.py +0 -0
  34. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/textgrid/generator.py +0 -0
  35. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/formats/textgrid/parser.py +0 -0
  36. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/__init__.py +0 -0
  37. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/resolve.py +0 -0
  38. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/speaker/__init__.py +0 -0
  39. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/speaker/config.yaml +0 -0
  40. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/speaker/infer.py +0 -0
  41. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/speaker/utils.py +0 -0
  42. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/training/__init__.py +0 -0
  43. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/training/run.py +0 -0
  44. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/training/utils.py +0 -0
  45. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/utils.py +0 -0
  46. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/utterance/__init__.py +0 -0
  47. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/utterance/cantonese_infer.py +0 -0
  48. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/utterance/dataset.py +0 -0
  49. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/utterance/execute.py +0 -0
  50. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/utterance/infer.py +0 -0
  51. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/utterance/prep.py +0 -0
  52. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/utterance/train.py +0 -0
  53. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/wave2vec/__init__.py +0 -0
  54. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/wave2vec/infer_fa.py +0 -0
  55. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/whisper/__init__.py +0 -0
  56. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/whisper/infer_asr.py +0 -0
  57. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/models/whisper/infer_fa.py +0 -0
  58. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/analysis/__init__.py +0 -0
  59. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/analysis/eval.py +0 -0
  60. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/asr/num2chinese.py +0 -0
  61. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/asr/rev.py +0 -0
  62. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/asr/utils.py +0 -0
  63. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/asr/whisper.py +0 -0
  64. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/asr/whisperx.py +0 -0
  65. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/base.py +0 -0
  66. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/cleanup/__init__.py +0 -0
  67. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  68. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  69. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  70. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/cleanup/retrace.py +0 -0
  71. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  72. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  73. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/cleanup/support/test.test +0 -0
  74. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/fa/__init__.py +0 -0
  75. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  76. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  77. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  78. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  79. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  80. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  81. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  82. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  83. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  84. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/pipeline.py +0 -0
  85. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/speaker/__init__.py +0 -0
  86. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  87. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/translate/__init__.py +0 -0
  88. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/translate/gtrans.py +0 -0
  89. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/translate/seamless.py +0 -0
  90. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/translate/utils.py +0 -0
  91. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/utr/__init__.py +0 -0
  92. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/utr/rev_utr.py +0 -0
  93. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/utr/utils.py +0 -0
  94. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  95. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/utterance/__init__.py +0 -0
  96. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  97. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/__init__.py +0 -0
  98. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/conftest.py +0 -0
  99. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  100. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  101. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  102. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  103. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  104. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  105. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  106. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  107. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  108. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  109. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  110. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  111. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/fixures.py +0 -0
  112. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  113. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  114. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/tests/test_document.py +0 -0
  115. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/utils/__init__.py +0 -0
  116. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/utils/config.py +0 -0
  117. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/utils/dp.py +0 -0
  118. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign/utils/utils.py +0 -0
  119. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign.egg-info/dependency_links.txt +0 -0
  120. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign.egg-info/entry_points.txt +0 -0
  121. {batchalign-0.7.18.post11 → batchalign-0.7.19}/batchalign.egg-info/top_level.txt +0 -0
  122. {batchalign-0.7.18.post11 → batchalign-0.7.19}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.18.post11
3
+ Version: 0.7.19
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -34,6 +34,7 @@ Requires-Dist: tiktoken
34
34
  Requires-Dist: blobfile
35
35
  Requires-Dist: sentencepiece
36
36
  Requires-Dist: googletrans
37
+ Requires-Dist: openai-whisper>=20240930
37
38
  Provides-Extra: dev
38
39
  Requires-Dist: pytest; extra == "dev"
39
40
  Provides-Extra: train
@@ -144,8 +144,10 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, **kwargs):
144
144
 
145
145
  @batchalign.command()
146
146
  @common_options
147
+ @click.option("--whisper_oai/--rev",
148
+ default=False, help="Use the OpenAI's Whisper implementation instead of Rev.AI (default).")
147
149
  @click.option("--whisper/--rev",
148
- default=False, help="Use OpenAI Whisper (ASR) instead of Rev.AI (default).")
150
+ default=False, help="Use Huggingface's Whisper implementation instead of Rev.AI (default).")
149
151
  @click.option("--whisperx/--rev",
150
152
  default=False, help="Use WhisperX instead of Rev.AI (default). Superceeds --whisper.")
151
153
  @click.option("--diarize/--nodiarize",
@@ -169,6 +171,8 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
169
171
  asr = "whisper"
170
172
  if kwargs["whisperx"]:
171
173
  asr = "whisperx"
174
+ if kwargs["whisper_oai"]:
175
+ asr = "whisper_oai"
172
176
 
173
177
 
174
178
  def writer(doc, output):
@@ -1,6 +1,6 @@
1
1
  from .pipeline import BatchalignPipeline
2
2
  from .base import BatchalignEngine
3
- from .asr import WhisperEngine, RevEngine, WhisperXEngine
3
+ from .asr import WhisperEngine, RevEngine, WhisperXEngine, OAIWhisperEngine
4
4
 
5
5
  from .morphosyntax import StanzaEngine, CorefEngine
6
6
  from .cleanup import NgramRetraceEngine, DisfluencyReplacementEngine
@@ -1,3 +1,4 @@
1
1
  from .whisper import WhisperEngine
2
2
  from .rev import RevEngine
3
3
  from .whisperx import WhisperXEngine
4
+ from .oai_whisper import OAIWhisperEngine
@@ -0,0 +1,97 @@
1
+ from batchalign.document import *
2
+ from batchalign.pipelines.base import *
3
+ from batchalign.pipelines.asr.utils import *
4
+ from batchalign.models import WhisperASRModel, BertUtteranceModel, BertCantoneseUtteranceModel
5
+
6
+ import pycountry
7
+
8
+ import logging
9
+ L = logging.getLogger("batchalign")
10
+
11
+ from batchalign.utils.utils import correct_timing
12
+ from batchalign.models import resolve
13
+
14
+ import whisper
15
+
16
+ class OAIWhisperEngine(BatchalignEngine):
17
+
18
+ @property
19
+ def tasks(self):
20
+ # if there is no utterance segmentation scheme, we only
21
+ # run ASR
22
+ if self.__engine:
23
+ return [ Task.ASR, Task.UTTERANCE_SEGMENTATION ]
24
+ else:
25
+ return [ Task.ASR ]
26
+
27
+ def __init__(self, model=None, lang="eng"):
28
+
29
+ # try to resolve our internal model
30
+ res = resolve("whisper", lang)
31
+ if res:
32
+ model, base = res
33
+ else:
34
+ model = "openai/whisper-large-v3"
35
+ base = "openai/whisper-large-v3"
36
+
37
+ if lang == "mys":
38
+ language = "Malay"
39
+ else:
40
+ language = pycountry.languages.get(alpha_3=lang).name
41
+ if language == "Yue Chinese":
42
+ language = "Cantonese"
43
+ if "greek" in language.lower():
44
+ language = "Greek"
45
+
46
+
47
+ self.__whisper = whisper.load_model("turbo")
48
+ self.__lang = lang
49
+ self.__language = language
50
+
51
+ if resolve("utterance", self.__lang) != None:
52
+ L.debug("Initializing utterance model...")
53
+ if lang != "yue":
54
+ self.__engine = BertUtteranceModel(resolve("utterance", lang))
55
+ else:
56
+ # we have special inference procedure for cantonese
57
+ self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
58
+ L.debug("Done.")
59
+ else:
60
+ self.__engine = None
61
+
62
+ def generate(self, source_path, **kwargs):
63
+ res = self.__whisper.transcribe(source_path,
64
+ word_timestamps=True,
65
+ language=self.__language)
66
+ turns = []
67
+ for i in res["segments"]:
68
+ turn = []
69
+ for j in i["words"]:
70
+ turn.append({
71
+ "type": "text",
72
+ "ts": j["start"],
73
+ "end_ts": j["end"],
74
+ "value": j["word"]
75
+ })
76
+ turns.append({
77
+ "elements": turn,
78
+ "speaker": 0
79
+ })
80
+ doc = process_generation({"monologues": turns},
81
+ self.__lang,
82
+ utterance_engine=self.__engine)
83
+ # define media tier
84
+ media = Media(type=MediaType.AUDIO, name=Path(source_path).stem, url=source_path)
85
+ doc.media = media
86
+
87
+ return correct_timing(doc)
88
+
89
+
90
+ # model="openai/whisper-large-v2", language="english"
91
+
92
+ # e = WhisperEngine()
93
+ # tmp = e.generate("./batchalign/tests/pipelines/asr/support/test.mp3", 1)
94
+ # tmp.model_dump()
95
+ # file = "./batchalign/tests/pipelines/asr/support/test.mp3"
96
+
97
+
@@ -7,7 +7,7 @@ from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
7
7
  NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
8
8
  RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
9
9
  StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel,
10
- GoogleTranslateEngine)
10
+ GoogleTranslateEngine, OAIWhisperEngine)
11
11
  from batchalign import BatchalignPipeline
12
12
  from batchalign.models import resolve
13
13
 
@@ -19,7 +19,7 @@ L = logging.getLogger("batchalign")
19
19
 
20
20
  # default for all languages
21
21
  DEFAULT_PACKAGES = {
22
- "asr": "whisper",
22
+ "asr": "whisper_oai",
23
23
  "utr": "whisper_utr",
24
24
  "fa": "whisper_fa",
25
25
  "speaker": "nemo_speaker",
@@ -135,7 +135,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
135
135
  engines.append(SeamlessTranslationModel())
136
136
  elif engine == "gtrans":
137
137
  engines.append(GoogleTranslateEngine())
138
-
138
+ elif engine == "whisper_oai":
139
+ engines.append(OAIWhisperEngine())
139
140
 
140
141
  L.debug(f"Done initalizing packages.")
141
142
  return BatchalignPipeline(*engines)
@@ -92,10 +92,13 @@ class Wave2VecFAEngine(BatchalignEngine):
92
92
  # create target backplates for the timings
93
93
  payload_targets = []
94
94
  timings = []
95
- for indx, (word, time) in enumerate(res):
96
- timings.append(time)
97
- for char in word:
98
- payload_targets.append(PayloadTarget(char, payload=indx))
95
+ try:
96
+ for indx, (word, time) in enumerate(res):
97
+ timings.append(time)
98
+ for char in word:
99
+ payload_targets.append(PayloadTarget(char, payload=indx))
100
+ except:
101
+ continue
99
102
  # alignment!
100
103
  alignments = align(payload_targets, ref_targets, tqdm=False)
101
104
 
@@ -0,0 +1,3 @@
1
+ 0.7.19
2
+ May 9th, 2025
3
+ OpenAI Whisper Implementation
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.18.post11
3
+ Version: 0.7.19
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -34,6 +34,7 @@ Requires-Dist: tiktoken
34
34
  Requires-Dist: blobfile
35
35
  Requires-Dist: sentencepiece
36
36
  Requires-Dist: googletrans
37
+ Requires-Dist: openai-whisper>=20240930
37
38
  Provides-Extra: dev
38
39
  Requires-Dist: pytest; extra == "dev"
39
40
  Provides-Extra: train
@@ -59,6 +59,7 @@ batchalign/pipelines/analysis/__init__.py
59
59
  batchalign/pipelines/analysis/eval.py
60
60
  batchalign/pipelines/asr/__init__.py
61
61
  batchalign/pipelines/asr/num2chinese.py
62
+ batchalign/pipelines/asr/oai_whisper.py
62
63
  batchalign/pipelines/asr/rev.py
63
64
  batchalign/pipelines/asr/utils.py
64
65
  batchalign/pipelines/asr/whisper.py
@@ -24,6 +24,7 @@ tiktoken
24
24
  blobfile
25
25
  sentencepiece
26
26
  googletrans
27
+ openai-whisper>=20240930
27
28
 
28
29
  [dev]
29
30
  pytest
@@ -55,7 +55,8 @@ setup(
55
55
  "tiktoken",
56
56
  "blobfile",
57
57
  "sentencepiece",
58
- "googletrans"
58
+ "googletrans",
59
+ "openai-whisper>=20240930"
59
60
  ],
60
61
  extras_require={
61
62
  'dev': [
@@ -1,3 +0,0 @@
1
- 0.7.18-post.11
2
- April 30th, 2025
3
- align
File without changes