batchalign 0.7.5a3__tar.gz → 0.7.5a5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. {batchalign-0.7.5a3/batchalign.egg-info → batchalign-0.7.5a5}/PKG-INFO +1 -1
  2. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/cli/cli.py +9 -0
  3. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/analysis/eval.py +14 -4
  4. batchalign-0.7.5a5/batchalign/version +3 -0
  5. {batchalign-0.7.5a3 → batchalign-0.7.5a5/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.5a3/batchalign/version +0 -3
  7. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/LICENSE +0 -0
  8. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/MANIFEST.in +0 -0
  9. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/README.md +0 -0
  10. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/cli/dispatch.py +0 -0
  14. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/constants.py +0 -0
  15. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/document.py +0 -0
  16. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/__init__.py +0 -0
  51. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/rev.py +0 -0
  52. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/utils.py +0 -0
  53. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/whisper.py +0 -0
  54. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/asr/whisperx.py +0 -0
  55. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/base.py +0 -0
  56. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/__init__.py +0 -0
  57. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  58. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  59. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  60. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/retrace.py +0 -0
  61. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  62. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  63. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/cleanup/support/test.test +0 -0
  64. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/dispatch.py +0 -0
  65. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/fa/__init__.py +0 -0
  66. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  67. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  68. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  69. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  70. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  71. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/pipeline.py +0 -0
  72. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/speaker/__init__.py +0 -0
  73. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  74. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utr/__init__.py +0 -0
  75. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utr/rev_utr.py +0 -0
  76. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utr/utils.py +0 -0
  77. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  78. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utterance/__init__.py +0 -0
  79. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  80. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/__init__.py +0 -0
  81. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/conftest.py +0 -0
  82. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  83. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  84. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  85. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  86. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  87. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  88. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  89. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  90. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  91. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  92. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  93. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  94. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/fixures.py +0 -0
  95. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  96. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  97. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/tests/test_document.py +0 -0
  98. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/utils/__init__.py +0 -0
  99. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/utils/config.py +0 -0
  100. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/utils/dp.py +0 -0
  101. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign/utils/utils.py +0 -0
  102. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/SOURCES.txt +0 -0
  103. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/dependency_links.txt +0 -0
  104. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/entry_points.txt +0 -0
  105. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/requires.txt +0 -0
  106. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/batchalign.egg-info/top_level.txt +0 -0
  107. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/setup.cfg +0 -0
  108. {batchalign-0.7.5a3 → batchalign-0.7.5a5}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.5a3
3
+ Version: 0.7.5a5
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -247,6 +247,12 @@ def utseg(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
247
247
  @common_options
248
248
  @click.option("--whisper/--rev",
249
249
  default=False, help="Use OpenAI Whisper (ASR) instead of Rev.AI (default).")
250
+ @click.option("--lang",
251
+ help="sample language in three-letter ISO 3166-1 alpha-3 code",
252
+ show_default=True,
253
+ default="eng",
254
+ type=str)
255
+ @click.option("-n", "--num_speakers", type=int, help="number of speakers in the language sample", default=2)
250
256
  @click.pass_context
251
257
  def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
252
258
  """Benchmark ASR utilities for their word accuracy"""
@@ -266,6 +272,9 @@ def benchmark(ctx, in_dir, out_dir, lang, num_speakers, whisper, **kwargs):
266
272
  # write the wer
267
273
  with open(Path(output).with_suffix(".wer.txt"), 'w') as df:
268
274
  df.write(str(doc["wer"]))
275
+ with open(Path(output).with_suffix(".diff"), 'w') as df:
276
+ df.write(str(doc["diff"]))
277
+
269
278
 
270
279
  _dispatch("benchmark", lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
271
280
  in_dir, out_dir,
@@ -19,8 +19,8 @@ class EvaluationEngine(BatchalignEngine):
19
19
  @staticmethod
20
20
  def __compute_wer(doc, gold):
21
21
  # get the text of the document and get the text of the gold
22
- forms = [ j.text.lower() for i in doc.content for j in i.content ]
23
- gold_forms = [ j.text.lower() for i in gold.content for j in i.content ]
22
+ forms = [ j.text.lower() for i in doc.content for j in i.content if isinstance(i, Utterance)]
23
+ gold_forms = [ j.text.lower() for i in gold.content for j in i.content if isinstance(i, Utterance)]
24
24
 
25
25
  # dp!
26
26
  alignment = align(forms, gold_forms, False)
@@ -61,8 +61,15 @@ class EvaluationEngine(BatchalignEngine):
61
61
  else:
62
62
  prev_error = None
63
63
 
64
+ diff = []
65
+ for i in alignment:
66
+ if isinstance(i, Extra):
67
+ diff.append(f"{'+' if i.extra_type == ExtraType.REFERENCE else '-'} {i.key}")
68
+ else:
69
+ diff.append(f" {i.key}")
70
+
64
71
  # wer = (S+D+I)/N
65
- return (sub+dl+ins)/len(gold_forms)
72
+ return (sub+dl+ins)/len(gold_forms), "\n".join(diff)
66
73
 
67
74
  def analyze(self, doc, **kwargs):
68
75
  gold = kwargs.get("gold")
@@ -71,8 +78,11 @@ class EvaluationEngine(BatchalignEngine):
71
78
  if not gold or not isinstance(gold, Document):
72
79
  raise ValueError(f"Unexpected format for gold transcript. Expected batchalign.Document, got '{type(gold)}'")
73
80
 
81
+ wer, diff = self.__compute_wer(doc, gold)
82
+
74
83
  return {
75
- "wer": self.__compute_wer(doc, gold)
84
+ "wer": wer,
85
+ "diff": diff
76
86
  }
77
87
 
78
88
 
@@ -0,0 +1,3 @@
1
+ 0.7.5-alpha.5
2
+ September 3nd, 2024
3
+ fix benchmark command, part 2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.5a3
3
+ Version: 0.7.5a5
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.5-alpha.3
2
- September 3nd, 2024
3
- fix rev for mandarin
File without changes
File without changes
File without changes
File without changes
File without changes