batchalign 0.7.18.post12__tar.gz → 0.7.19.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (122) hide show
  1. {batchalign-0.7.18.post12/batchalign.egg-info → batchalign-0.7.19.post1}/PKG-INFO +6 -3
  2. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/cli/cli.py +6 -1
  3. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/__init__.py +1 -1
  4. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/analysis/eval.py +31 -0
  5. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/asr/__init__.py +1 -0
  6. batchalign-0.7.19.post1/batchalign/pipelines/asr/oai_whisper.py +97 -0
  7. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/dispatch.py +4 -3
  8. batchalign-0.7.19.post1/batchalign/version +3 -0
  9. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1/batchalign.egg-info}/PKG-INFO +6 -3
  10. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign.egg-info/SOURCES.txt +1 -0
  11. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign.egg-info/requires.txt +5 -0
  12. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/setup.py +6 -1
  13. batchalign-0.7.18.post12/batchalign/version +0 -3
  14. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/LICENSE +0 -0
  15. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/MANIFEST.in +0 -0
  16. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/README.md +0 -0
  17. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/__init__.py +0 -0
  18. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/__main__.py +0 -0
  19. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/cli/__init__.py +0 -0
  20. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/cli/dispatch.py +0 -0
  21. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/constants.py +0 -0
  22. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/document.py +0 -0
  23. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/errors.py +0 -0
  24. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/__init__.py +0 -0
  25. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/base.py +0 -0
  26. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/chat/__init__.py +0 -0
  27. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/chat/file.py +0 -0
  28. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/chat/generator.py +0 -0
  29. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/chat/lexer.py +0 -0
  30. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/chat/parser.py +0 -0
  31. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/chat/utils.py +0 -0
  32. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/textgrid/__init__.py +0 -0
  33. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/textgrid/file.py +0 -0
  34. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/textgrid/generator.py +0 -0
  35. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/formats/textgrid/parser.py +0 -0
  36. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/__init__.py +0 -0
  37. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/resolve.py +0 -0
  38. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/speaker/__init__.py +0 -0
  39. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/speaker/config.yaml +0 -0
  40. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/speaker/infer.py +0 -0
  41. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/speaker/utils.py +0 -0
  42. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/training/__init__.py +0 -0
  43. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/training/run.py +0 -0
  44. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/training/utils.py +0 -0
  45. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/utils.py +0 -0
  46. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/utterance/__init__.py +0 -0
  47. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/utterance/cantonese_infer.py +0 -0
  48. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/utterance/dataset.py +0 -0
  49. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/utterance/execute.py +0 -0
  50. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/utterance/infer.py +0 -0
  51. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/utterance/prep.py +0 -0
  52. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/utterance/train.py +0 -0
  53. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/wave2vec/__init__.py +0 -0
  54. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/wave2vec/infer_fa.py +0 -0
  55. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/whisper/__init__.py +0 -0
  56. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/whisper/infer_asr.py +0 -0
  57. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/models/whisper/infer_fa.py +0 -0
  58. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/analysis/__init__.py +0 -0
  59. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/asr/num2chinese.py +0 -0
  60. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/asr/rev.py +0 -0
  61. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/asr/utils.py +0 -0
  62. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/asr/whisper.py +0 -0
  63. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/asr/whisperx.py +0 -0
  64. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/base.py +0 -0
  65. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/cleanup/__init__.py +0 -0
  66. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  67. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  68. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  69. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/cleanup/retrace.py +0 -0
  70. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  71. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  72. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/cleanup/support/test.test +0 -0
  73. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/fa/__init__.py +0 -0
  74. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  75. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  76. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  77. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  78. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  79. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  80. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  81. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  82. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  83. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  84. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/pipeline.py +0 -0
  85. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/speaker/__init__.py +0 -0
  86. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  87. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/translate/__init__.py +0 -0
  88. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/translate/gtrans.py +0 -0
  89. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/translate/seamless.py +0 -0
  90. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/translate/utils.py +0 -0
  91. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/utr/__init__.py +0 -0
  92. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/utr/rev_utr.py +0 -0
  93. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/utr/utils.py +0 -0
  94. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  95. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/utterance/__init__.py +0 -0
  96. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  97. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/__init__.py +0 -0
  98. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/conftest.py +0 -0
  99. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  100. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  101. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  102. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  103. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  104. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  105. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  106. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  107. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  108. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  109. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  110. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  111. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/fixures.py +0 -0
  112. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  113. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  114. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/tests/test_document.py +0 -0
  115. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/utils/__init__.py +0 -0
  116. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/utils/config.py +0 -0
  117. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/utils/dp.py +0 -0
  118. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign/utils/utils.py +0 -0
  119. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign.egg-info/dependency_links.txt +0 -0
  120. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign.egg-info/entry_points.txt +0 -0
  121. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/batchalign.egg-info/top_level.txt +0 -0
  122. {batchalign-0.7.18.post12 → batchalign-0.7.19.post1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.18.post12
3
+ Version: 0.7.19.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -34,16 +34,19 @@ Requires-Dist: tiktoken
34
34
  Requires-Dist: blobfile
35
35
  Requires-Dist: sentencepiece
36
36
  Requires-Dist: googletrans
37
+ Requires-Dist: openai-whisper>=20240930
37
38
  Provides-Extra: dev
38
39
  Requires-Dist: pytest; extra == "dev"
39
40
  Provides-Extra: train
40
41
  Requires-Dist: accelerate~=0.27; extra == "train"
42
+ Provides-Extra: docs
43
+ Requires-Dist: mkdocs-material; extra == "docs"
44
+ Requires-Dist: mkdocs-click; extra == "docs"
41
45
  Dynamic: author
42
46
  Dynamic: author-email
43
47
  Dynamic: classifier
44
48
  Dynamic: description
45
49
  Dynamic: description-content-type
46
- Dynamic: license-file
47
50
  Dynamic: provides-extra
48
51
  Dynamic: requires-dist
49
52
  Dynamic: summary
@@ -79,6 +79,7 @@ def handle_verbosity(verbosity):
79
79
 
80
80
  @click.group()
81
81
  @click.pass_context
82
+ @click.version_option(VERSION_NUMBER)
82
83
  @click.option("-v", "--verbose", type=int, count=True, default=0, help="How loquacious Batchalign should be.")
83
84
  def batchalign(ctx, verbose):
84
85
  """process .cha and/or audio files in IN_DIR and dumps them to OUT_DIR using recipe COMMAND"""
@@ -144,8 +145,10 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, **kwargs):
144
145
 
145
146
  @batchalign.command()
146
147
  @common_options
148
+ @click.option("--whisper_oai/--rev",
149
+ default=False, help="Use the OpenAI's Whisper implementation instead of Rev.AI (default).")
147
150
  @click.option("--whisper/--rev",
148
- default=False, help="Use OpenAI Whisper (ASR) instead of Rev.AI (default).")
151
+ default=False, help="Use Huggingface's Whisper implementation instead of Rev.AI (default).")
149
152
  @click.option("--whisperx/--rev",
150
153
  default=False, help="Use WhisperX instead of Rev.AI (default). Superceeds --whisper.")
151
154
  @click.option("--diarize/--nodiarize",
@@ -169,6 +172,8 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
169
172
  asr = "whisper"
170
173
  if kwargs["whisperx"]:
171
174
  asr = "whisperx"
175
+ if kwargs["whisper_oai"]:
176
+ asr = "whisper_oai"
172
177
 
173
178
 
174
179
  def writer(doc, output):
@@ -1,6 +1,6 @@
1
1
  from .pipeline import BatchalignPipeline
2
2
  from .base import BatchalignEngine
3
- from .asr import WhisperEngine, RevEngine, WhisperXEngine
3
+ from .asr import WhisperEngine, RevEngine, WhisperXEngine, OAIWhisperEngine
4
4
 
5
5
  from .morphosyntax import StanzaEngine, CorefEngine
6
6
  from .cleanup import NgramRetraceEngine, DisfluencyReplacementEngine
@@ -14,6 +14,34 @@ from batchalign.utils.dp import align, ExtraType, Extra, Match
14
14
  import logging
15
15
  L = logging.getLogger("batchalign")
16
16
 
17
+ def conform(x):
18
+ result = []
19
+ for i in x:
20
+ if "'s" in i.strip():
21
+ result.append(i.split("'")[0])
22
+ result.append("is")
23
+ elif "americanstyle" == i.strip():
24
+ result.append("american")
25
+ result.append("style")
26
+ elif "postwar" == i.strip():
27
+ result.append("post")
28
+ result.append("war")
29
+ elif "farmhouse" == i.strip():
30
+ result.append("farm")
31
+ result.append("house")
32
+ elif "aa" == i.strip():
33
+ result.append("a")
34
+ result.append("a")
35
+ elif "hmm" == i.strip():
36
+ result.append("hm")
37
+ elif "_" in i.strip():
38
+ for j in i.strip().split("_"):
39
+ result.append(j)
40
+ else:
41
+ result.append(i)
42
+
43
+ return result
44
+
17
45
  class EvaluationEngine(BatchalignEngine):
18
46
  tasks = [ Task.WER ]
19
47
 
@@ -64,6 +92,9 @@ class EvaluationEngine(BatchalignEngine):
64
92
  gold_final = gold_forms
65
93
  forms_final = forms_finished
66
94
 
95
+ gold_final = conform(gold_final)
96
+ forms_final = conform(forms_final)
97
+
67
98
  # dp!
68
99
  alignment = align(forms_final, gold_final, False)
69
100
 
@@ -1,3 +1,4 @@
1
1
  from .whisper import WhisperEngine
2
2
  from .rev import RevEngine
3
3
  from .whisperx import WhisperXEngine
4
+ from .oai_whisper import OAIWhisperEngine
@@ -0,0 +1,97 @@
1
+ from batchalign.document import *
2
+ from batchalign.pipelines.base import *
3
+ from batchalign.pipelines.asr.utils import *
4
+ from batchalign.models import WhisperASRModel, BertUtteranceModel, BertCantoneseUtteranceModel
5
+
6
+ import pycountry
7
+
8
+ import logging
9
+ L = logging.getLogger("batchalign")
10
+
11
+ from batchalign.utils.utils import correct_timing
12
+ from batchalign.models import resolve
13
+
14
+ import whisper
15
+
16
+ class OAIWhisperEngine(BatchalignEngine):
17
+
18
+ @property
19
+ def tasks(self):
20
+ # if there is no utterance segmentation scheme, we only
21
+ # run ASR
22
+ if self.__engine:
23
+ return [ Task.ASR, Task.UTTERANCE_SEGMENTATION ]
24
+ else:
25
+ return [ Task.ASR ]
26
+
27
+ def __init__(self, model=None, lang="eng"):
28
+
29
+ # try to resolve our internal model
30
+ res = resolve("whisper", lang)
31
+ if res:
32
+ model, base = res
33
+ else:
34
+ model = "openai/whisper-large-v3"
35
+ base = "openai/whisper-large-v3"
36
+
37
+ if lang == "mys":
38
+ language = "Malay"
39
+ else:
40
+ language = pycountry.languages.get(alpha_3=lang).name
41
+ if language == "Yue Chinese":
42
+ language = "Cantonese"
43
+ if "greek" in language.lower():
44
+ language = "Greek"
45
+
46
+
47
+ self.__whisper = whisper.load_model("turbo")
48
+ self.__lang = lang
49
+ self.__language = language
50
+
51
+ if resolve("utterance", self.__lang) != None:
52
+ L.debug("Initializing utterance model...")
53
+ if lang != "yue":
54
+ self.__engine = BertUtteranceModel(resolve("utterance", lang))
55
+ else:
56
+ # we have special inference procedure for cantonese
57
+ self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
58
+ L.debug("Done.")
59
+ else:
60
+ self.__engine = None
61
+
62
+ def generate(self, source_path, **kwargs):
63
+ res = self.__whisper.transcribe(source_path,
64
+ word_timestamps=True,
65
+ language=self.__language)
66
+ turns = []
67
+ for i in res["segments"]:
68
+ turn = []
69
+ for j in i["words"]:
70
+ turn.append({
71
+ "type": "text",
72
+ "ts": j["start"],
73
+ "end_ts": j["end"],
74
+ "value": j["word"]
75
+ })
76
+ turns.append({
77
+ "elements": turn,
78
+ "speaker": 0
79
+ })
80
+ doc = process_generation({"monologues": turns},
81
+ self.__lang,
82
+ utterance_engine=self.__engine)
83
+ # define media tier
84
+ media = Media(type=MediaType.AUDIO, name=Path(source_path).stem, url=source_path)
85
+ doc.media = media
86
+
87
+ return correct_timing(doc)
88
+
89
+
90
+ # model="openai/whisper-large-v2", language="english"
91
+
92
+ # e = WhisperEngine()
93
+ # tmp = e.generate("./batchalign/tests/pipelines/asr/support/test.mp3", 1)
94
+ # tmp.model_dump()
95
+ # file = "./batchalign/tests/pipelines/asr/support/test.mp3"
96
+
97
+
@@ -7,7 +7,7 @@ from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
7
7
  NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
8
8
  RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
9
9
  StanzaUtteranceEngine, CorefEngine, Wave2VecFAEngine, SeamlessTranslationModel,
10
- GoogleTranslateEngine)
10
+ GoogleTranslateEngine, OAIWhisperEngine)
11
11
  from batchalign import BatchalignPipeline
12
12
  from batchalign.models import resolve
13
13
 
@@ -19,7 +19,7 @@ L = logging.getLogger("batchalign")
19
19
 
20
20
  # default for all languages
21
21
  DEFAULT_PACKAGES = {
22
- "asr": "whisper",
22
+ "asr": "whisper_oai",
23
23
  "utr": "whisper_utr",
24
24
  "fa": "whisper_fa",
25
25
  "speaker": "nemo_speaker",
@@ -135,7 +135,8 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
135
135
  engines.append(SeamlessTranslationModel())
136
136
  elif engine == "gtrans":
137
137
  engines.append(GoogleTranslateEngine())
138
-
138
+ elif engine == "whisper_oai":
139
+ engines.append(OAIWhisperEngine())
139
140
 
140
141
  L.debug(f"Done initalizing packages.")
141
142
  return BatchalignPipeline(*engines)
@@ -0,0 +1,3 @@
1
+ 0.7.19-post.1
2
+ May 11th, 2025
3
+ Fixes for benchmarking
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.18.post12
3
+ Version: 0.7.19.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -34,16 +34,19 @@ Requires-Dist: tiktoken
34
34
  Requires-Dist: blobfile
35
35
  Requires-Dist: sentencepiece
36
36
  Requires-Dist: googletrans
37
+ Requires-Dist: openai-whisper>=20240930
37
38
  Provides-Extra: dev
38
39
  Requires-Dist: pytest; extra == "dev"
39
40
  Provides-Extra: train
40
41
  Requires-Dist: accelerate~=0.27; extra == "train"
42
+ Provides-Extra: docs
43
+ Requires-Dist: mkdocs-material; extra == "docs"
44
+ Requires-Dist: mkdocs-click; extra == "docs"
41
45
  Dynamic: author
42
46
  Dynamic: author-email
43
47
  Dynamic: classifier
44
48
  Dynamic: description
45
49
  Dynamic: description-content-type
46
- Dynamic: license-file
47
50
  Dynamic: provides-extra
48
51
  Dynamic: requires-dist
49
52
  Dynamic: summary
@@ -59,6 +59,7 @@ batchalign/pipelines/analysis/__init__.py
59
59
  batchalign/pipelines/analysis/eval.py
60
60
  batchalign/pipelines/asr/__init__.py
61
61
  batchalign/pipelines/asr/num2chinese.py
62
+ batchalign/pipelines/asr/oai_whisper.py
62
63
  batchalign/pipelines/asr/rev.py
63
64
  batchalign/pipelines/asr/utils.py
64
65
  batchalign/pipelines/asr/whisper.py
@@ -24,9 +24,14 @@ tiktoken
24
24
  blobfile
25
25
  sentencepiece
26
26
  googletrans
27
+ openai-whisper>=20240930
27
28
 
28
29
  [dev]
29
30
  pytest
30
31
 
32
+ [docs]
33
+ mkdocs-material
34
+ mkdocs-click
35
+
31
36
  [train]
32
37
  accelerate~=0.27
@@ -55,7 +55,8 @@ setup(
55
55
  "tiktoken",
56
56
  "blobfile",
57
57
  "sentencepiece",
58
- "googletrans"
58
+ "googletrans",
59
+ "openai-whisper>=20240930"
59
60
  ],
60
61
  extras_require={
61
62
  'dev': [
@@ -64,6 +65,10 @@ setup(
64
65
  'train': [
65
66
  'accelerate~=0.27',
66
67
  ],
68
+ 'docs': [
69
+ 'mkdocs-material',
70
+ 'mkdocs-click'
71
+ ],
67
72
  # 'speaker': [
68
73
  # "nemo-toolkit~=1.21.0",
69
74
  # "omegaconf~=2.3.0",
@@ -1,3 +0,0 @@
1
- 0.7.18-post.12
2
- May 1st, 2025
3
- align