batchalign 0.7.5a3__tar.gz → 0.7.5a5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.5a3/batchalign.egg-info → batchalign-0.7.5a5}/PKG-INFO +1 -1
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/cli/cli.py +9 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/analysis/eval.py +14 -4
- batchalign-0.7.5a5/batchalign/version +3 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.5a3/batchalign/version +0 -3
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/LICENSE +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/MANIFEST.in +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/README.md +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/__main__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/constants.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/document.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/errors.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/setup.cfg +0 -0
- {batchalign-0.7.5a3 → batchalign-0.7.5a5}/setup.py +0 -0
@@ -247,6 +247,12 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
247
247
|
@common_options
|
248
248
|
@click.option("--whisper/--rev",
|
249
249
|
default=False, help="Use OpenAI Whisper (ASR) instead of Rev.AI (default).")
|
250
|
+
@click.option("--lang",
|
251
|
+
help="sample language in three-letter ISO 3166-1 alpha-3 code",
|
252
|
+
show_default=True,
|
253
|
+
default="eng",
|
254
|
+
type=str)
|
255
|
+
@click.option("-n", "--num_speakers", type=int, help="number of speakers in the language sample", default=2)
|
250
256
|
@click.pass_context
|
251
257
|
def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
|
252
258
|
"""Benchmark ASR utilities for their word accuracy"""
|
@@ -266,6 +272,9 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
|
|
266
272
|
# write the wer
|
267
273
|
with open(Path(output).with_suffix(".wer.txt"), 'w') as df:
|
268
274
|
df.write(str(doc["wer"]))
|
275
|
+
with open(Path(output).with_suffix(".diff"), 'w') as df:
|
276
|
+
df.write(str(doc["diff"]))
|
277
|
+
|
269
278
|
|
270
279
|
_dispatch("benchmark", lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
|
271
280
|
in_dir, out_dir,
|
@@ -19,8 +19,8 @@ class EvaluationEngine(BatchalignEngine):
|
|
19
19
|
@staticmethod
|
20
20
|
def __compute_wer(doc, gold):
|
21
21
|
# get the text of the document and get the text of the gold
|
22
|
-
forms = [ j.text.lower() for i in doc.content for j in i.content ]
|
23
|
-
gold_forms = [ j.text.lower() for i in gold.content for j in i.content ]
|
22
|
+
forms = [ j.text.lower() for i in doc.content for j in i.content if isinstance(i, Utterance)]
|
23
|
+
gold_forms = [ j.text.lower() for i in gold.content for j in i.content if isinstance(i, Utterance)]
|
24
24
|
|
25
25
|
# dp!
|
26
26
|
alignment = align(forms, gold_forms, False)
|
@@ -61,8 +61,15 @@ class EvaluationEngine(BatchalignEngine):
|
|
61
61
|
else:
|
62
62
|
prev_error = None
|
63
63
|
|
64
|
+
diff = []
|
65
|
+
for i in alignment:
|
66
|
+
if isinstance(i, Extra):
|
67
|
+
diff.append(f"{'+' if i.extra_type == ExtraType.REFERENCE else '-'} {i.key}")
|
68
|
+
else:
|
69
|
+
diff.append(f" {i.key}")
|
70
|
+
|
64
71
|
# wer = (S+D+I)/N
|
65
|
-
return (sub+dl+ins)/len(gold_forms)
|
72
|
+
return (sub+dl+ins)/len(gold_forms), "\n".join(diff)
|
66
73
|
|
67
74
|
def analyze(self, doc, **kwargs):
|
68
75
|
gold = kwargs.get("gold")
|
@@ -71,8 +78,11 @@ class EvaluationEngine(BatchalignEngine):
|
|
71
78
|
if not gold or not isinstance(gold, Document):
|
72
79
|
raise ValueError(f"Unexpected format for gold transcript. Expected batchalign.Document, got '{type(gold)}'")
|
73
80
|
|
81
|
+
wer, diff = self.__compute_wer(doc, gold)
|
82
|
+
|
74
83
|
return {
|
75
|
-
"wer":
|
84
|
+
"wer": wer,
|
85
|
+
"diff": diff
|
76
86
|
}
|
77
87
|
|
78
88
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|