batchalign 0.7.5a4__tar.gz → 0.7.5a5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.5a4/batchalign.egg-info → batchalign-0.7.5a5}/PKG-INFO +1 -1
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/cli/cli.py +3 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/analysis/eval.py +14 -4
- batchalign-0.7.5a5/batchalign/version +3 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.5a4/batchalign/version +0 -3
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/LICENSE +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/MANIFEST.in +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/README.md +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/__main__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/constants.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/document.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/errors.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/setup.cfg +0 -0
- {batchalign-0.7.5a4 → batchalign-0.7.5a5}/setup.py +0 -0
@@ -272,6 +272,9 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
|
|
272
272
|
# write the wer
|
273
273
|
with open(Path(output).with_suffix(".wer.txt"), 'w') as df:
|
274
274
|
df.write(str(doc["wer"]))
|
275
|
+
with open(Path(output).with_suffix(".diff"), 'w') as df:
|
276
|
+
df.write(str(doc["diff"]))
|
277
|
+
|
275
278
|
|
276
279
|
_dispatch("benchmark", lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
|
277
280
|
in_dir, out_dir,
|
@@ -19,8 +19,8 @@ class EvaluationEngine(BatchalignEngine):
|
|
19
19
|
@staticmethod
|
20
20
|
def __compute_wer(doc, gold):
|
21
21
|
# get the text of the document and get the text of the gold
|
22
|
-
forms = [ j.text.lower() for i in doc.content for j in i.content ]
|
23
|
-
gold_forms = [ j.text.lower() for i in gold.content for j in i.content ]
|
22
|
+
forms = [ j.text.lower() for i in doc.content for j in i.content if isinstance(i, Utterance)]
|
23
|
+
gold_forms = [ j.text.lower() for i in gold.content for j in i.content if isinstance(i, Utterance)]
|
24
24
|
|
25
25
|
# dp!
|
26
26
|
alignment = align(forms, gold_forms, False)
|
@@ -61,8 +61,15 @@ class EvaluationEngine(BatchalignEngine):
|
|
61
61
|
else:
|
62
62
|
prev_error = None
|
63
63
|
|
64
|
+
diff = []
|
65
|
+
for i in alignment:
|
66
|
+
if isinstance(i, Extra):
|
67
|
+
diff.append(f"{'+' if i.extra_type == ExtraType.REFERENCE else '-'} {i.key}")
|
68
|
+
else:
|
69
|
+
diff.append(f" {i.key}")
|
70
|
+
|
64
71
|
# wer = (S+D+I)/N
|
65
|
-
return (sub+dl+ins)/len(gold_forms)
|
72
|
+
return (sub+dl+ins)/len(gold_forms), "\n".join(diff)
|
66
73
|
|
67
74
|
def analyze(self, doc, **kwargs):
|
68
75
|
gold = kwargs.get("gold")
|
@@ -71,8 +78,11 @@ class EvaluationEngine(BatchalignEngine):
|
|
71
78
|
if not gold or not isinstance(gold, Document):
|
72
79
|
raise ValueError(f"Unexpected format for gold transcript. Expected batchalign.Document, got '{type(gold)}'")
|
73
80
|
|
81
|
+
wer, diff = self.__compute_wer(doc, gold)
|
82
|
+
|
74
83
|
return {
|
75
|
-
"wer":
|
84
|
+
"wer": wer,
|
85
|
+
"diff": diff
|
76
86
|
}
|
77
87
|
|
78
88
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a4 → batchalign-0.7.5a5}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|