BatchalignHK 0.7.19__tar.gz → 0.7.19.post1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/BatchalignHK.egg-info/PKG-INFO +5 -3
  2. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/BatchalignHK.egg-info/requires.txt +4 -0
  3. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/PKG-INFO +5 -3
  4. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/cli/cli.py +1 -0
  5. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/analysis/eval.py +31 -0
  6. batchalignhk-0.7.19.post1/batchalign/version +3 -0
  7. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/setup.py +4 -0
  8. batchalignhk-0.7.19/batchalign/version +0 -3
  9. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  10. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  11. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/BatchalignHK.egg-info/entry_points.txt +0 -0
  12. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/BatchalignHK.egg-info/top_level.txt +0 -0
  13. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/LICENSE +0 -0
  14. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/MANIFEST.in +0 -0
  15. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/README.md +0 -0
  16. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/__init__.py +0 -0
  17. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/__main__.py +0 -0
  18. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/cli/__init__.py +0 -0
  19. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/cli/dispatch.py +0 -0
  20. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/constants.py +0 -0
  21. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/document.py +0 -0
  22. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/errors.py +0 -0
  23. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/__init__.py +0 -0
  24. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/base.py +0 -0
  25. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/chat/__init__.py +0 -0
  26. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/chat/file.py +0 -0
  27. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/chat/generator.py +0 -0
  28. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/chat/lexer.py +0 -0
  29. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/chat/parser.py +0 -0
  30. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/chat/utils.py +0 -0
  31. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/textgrid/__init__.py +0 -0
  32. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/textgrid/file.py +0 -0
  33. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/textgrid/generator.py +0 -0
  34. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/formats/textgrid/parser.py +0 -0
  35. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/__init__.py +0 -0
  36. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/resolve.py +0 -0
  37. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/speaker/__init__.py +0 -0
  38. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/speaker/config.yaml +0 -0
  39. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/speaker/infer.py +0 -0
  40. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/speaker/utils.py +0 -0
  41. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/training/__init__.py +0 -0
  42. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/training/run.py +0 -0
  43. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/training/utils.py +0 -0
  44. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/utils.py +0 -0
  45. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/utterance/__init__.py +0 -0
  46. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/utterance/cantonese_infer.py +0 -0
  47. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/utterance/dataset.py +0 -0
  48. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/utterance/execute.py +0 -0
  49. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/utterance/infer.py +0 -0
  50. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/utterance/prep.py +0 -0
  51. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/utterance/train.py +0 -0
  52. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/wave2vec/__init__.py +0 -0
  53. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/wave2vec/infer_fa.py +0 -0
  54. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/whisper/__init__.py +0 -0
  55. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/whisper/infer_asr.py +0 -0
  56. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/models/whisper/infer_fa.py +0 -0
  57. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/__init__.py +0 -0
  58. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/analysis/__init__.py +0 -0
  59. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/asr/__init__.py +0 -0
  60. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/asr/num2chinese.py +0 -0
  61. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  62. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/asr/rev.py +0 -0
  63. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/asr/tencent.py +0 -0
  64. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/asr/utils.py +0 -0
  65. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/asr/whisper.py +0 -0
  66. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/asr/whisperx.py +0 -0
  67. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/base.py +0 -0
  68. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/cleanup/__init__.py +0 -0
  69. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  70. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  71. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  72. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/cleanup/retrace.py +0 -0
  73. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  74. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  75. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/cleanup/support/test.test +0 -0
  76. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/dispatch.py +0 -0
  77. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/fa/__init__.py +0 -0
  78. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  79. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  80. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  81. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  82. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  83. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  84. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  85. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  86. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  87. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  88. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/pipeline.py +0 -0
  89. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/speaker/__init__.py +0 -0
  90. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  91. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/translate/__init__.py +0 -0
  92. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/translate/gtrans.py +0 -0
  93. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/translate/seamless.py +0 -0
  94. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/translate/utils.py +0 -0
  95. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/utr/__init__.py +0 -0
  96. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/utr/rev_utr.py +0 -0
  97. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/utr/utils.py +0 -0
  98. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  99. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/utterance/__init__.py +0 -0
  100. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  101. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/__init__.py +0 -0
  102. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/conftest.py +0 -0
  103. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  104. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  105. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  106. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  107. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  108. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  109. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  110. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  111. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  112. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  113. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  114. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  115. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/fixures.py +0 -0
  116. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  117. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  118. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/tests/test_document.py +0 -0
  119. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/utils/__init__.py +0 -0
  120. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/utils/config.py +0 -0
  121. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/utils/dp.py +0 -0
  122. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/batchalign/utils/utils.py +0 -0
  123. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.19
3
+ Version: 0.7.19.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -41,12 +41,14 @@ Provides-Extra: dev
41
41
  Requires-Dist: pytest; extra == "dev"
42
42
  Provides-Extra: train
43
43
  Requires-Dist: accelerate~=0.27; extra == "train"
44
+ Provides-Extra: docs
45
+ Requires-Dist: mkdocs-material; extra == "docs"
46
+ Requires-Dist: mkdocs-click; extra == "docs"
44
47
  Dynamic: author
45
48
  Dynamic: author-email
46
49
  Dynamic: classifier
47
50
  Dynamic: description
48
51
  Dynamic: description-content-type
49
- Dynamic: license-file
50
52
  Dynamic: provides-extra
51
53
  Dynamic: requires-dist
52
54
  Dynamic: summary
@@ -31,5 +31,9 @@ openai-whisper>=20240930
31
31
  [dev]
32
32
  pytest
33
33
 
34
+ [docs]
35
+ mkdocs-material
36
+ mkdocs-click
37
+
34
38
  [train]
35
39
  accelerate~=0.27
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.19
3
+ Version: 0.7.19.post1
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -41,12 +41,14 @@ Provides-Extra: dev
41
41
  Requires-Dist: pytest; extra == "dev"
42
42
  Provides-Extra: train
43
43
  Requires-Dist: accelerate~=0.27; extra == "train"
44
+ Provides-Extra: docs
45
+ Requires-Dist: mkdocs-material; extra == "docs"
46
+ Requires-Dist: mkdocs-click; extra == "docs"
44
47
  Dynamic: author
45
48
  Dynamic: author-email
46
49
  Dynamic: classifier
47
50
  Dynamic: description
48
51
  Dynamic: description-content-type
49
- Dynamic: license-file
50
52
  Dynamic: provides-extra
51
53
  Dynamic: requires-dist
52
54
  Dynamic: summary
@@ -79,6 +79,7 @@ def handle_verbosity(verbosity):
79
79
 
80
80
  @click.group()
81
81
  @click.pass_context
82
+ @click.version_option(VERSION_NUMBER)
82
83
  @click.option("-v", "--verbose", type=int, count=True, default=0, help="How loquacious Batchalign should be.")
83
84
  def batchalign(ctx, verbose):
84
85
  """process .cha and/or audio files in IN_DIR and dumps them to OUT_DIR using recipe COMMAND"""
@@ -14,6 +14,34 @@ from batchalign.utils.dp import align, ExtraType, Extra, Match
14
14
  import logging
15
15
  L = logging.getLogger("batchalign")
16
16
 
17
+ def conform(x):
18
+ result = []
19
+ for i in x:
20
+ if "'s" in i.strip():
21
+ result.append(i.split("'")[0])
22
+ result.append("is")
23
+ elif "americanstyle" == i.strip():
24
+ result.append("american")
25
+ result.append("style")
26
+ elif "postwar" == i.strip():
27
+ result.append("post")
28
+ result.append("war")
29
+ elif "farmhouse" == i.strip():
30
+ result.append("farm")
31
+ result.append("house")
32
+ elif "aa" == i.strip():
33
+ result.append("a")
34
+ result.append("a")
35
+ elif "hmm" == i.strip():
36
+ result.append("hm")
37
+ elif "_" in i.strip():
38
+ for j in i.strip().split("_"):
39
+ result.append(j)
40
+ else:
41
+ result.append(i)
42
+
43
+ return result
44
+
17
45
  class EvaluationEngine(BatchalignEngine):
18
46
  tasks = [ Task.WER ]
19
47
 
@@ -64,6 +92,9 @@ class EvaluationEngine(BatchalignEngine):
64
92
  gold_final = gold_forms
65
93
  forms_final = forms_finished
66
94
 
95
+ gold_final = conform(gold_final)
96
+ forms_final = conform(forms_final)
97
+
67
98
  # dp!
68
99
  alignment = align(forms_final, gold_final, False)
69
100
 
@@ -0,0 +1,3 @@
1
+ 0.7.19-post.1
2
+ May 11th, 2025
3
+ Fixes for benchmarking
@@ -68,6 +68,10 @@ setup(
68
68
  'train': [
69
69
  'accelerate~=0.27',
70
70
  ],
71
+ 'docs': [
72
+ 'mkdocs-material',
73
+ 'mkdocs-click'
74
+ ],
71
75
  # 'speaker': [
72
76
  # "nemo-toolkit~=1.21.0",
73
77
  # "omegaconf~=2.3.0",
@@ -1,3 +0,0 @@
1
- 0.7.19
2
- May 9th, 2025
3
- OpenAI Whisper Implementation