BatchalignHK 0.7.19__tar.gz → 0.7.19.post2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/BatchalignHK.egg-info/PKG-INFO +6 -2
  2. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/BatchalignHK.egg-info/SOURCES.txt +1 -0
  3. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/BatchalignHK.egg-info/requires.txt +6 -1
  4. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/PKG-INFO +6 -2
  5. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/cli/cli.py +1 -0
  6. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/analysis/eval.py +46 -0
  7. batchalignhk-0.7.19.post2/batchalign/utils/names.py +6684 -0
  8. batchalignhk-0.7.19.post2/batchalign/version +3 -0
  9. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/setup.py +5 -1
  10. batchalignhk-0.7.19/batchalign/version +0 -3
  11. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  12. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/BatchalignHK.egg-info/entry_points.txt +0 -0
  13. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/BatchalignHK.egg-info/top_level.txt +0 -0
  14. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/LICENSE +0 -0
  15. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/MANIFEST.in +0 -0
  16. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/README.md +0 -0
  17. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/__init__.py +0 -0
  18. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/__main__.py +0 -0
  19. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/cli/__init__.py +0 -0
  20. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/cli/dispatch.py +0 -0
  21. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/constants.py +0 -0
  22. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/document.py +0 -0
  23. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/errors.py +0 -0
  24. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/__init__.py +0 -0
  25. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/base.py +0 -0
  26. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/chat/__init__.py +0 -0
  27. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/chat/file.py +0 -0
  28. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/chat/generator.py +0 -0
  29. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/chat/lexer.py +0 -0
  30. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/chat/parser.py +0 -0
  31. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/chat/utils.py +0 -0
  32. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/textgrid/__init__.py +0 -0
  33. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/textgrid/file.py +0 -0
  34. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/textgrid/generator.py +0 -0
  35. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/formats/textgrid/parser.py +0 -0
  36. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/__init__.py +0 -0
  37. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/resolve.py +0 -0
  38. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/speaker/__init__.py +0 -0
  39. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/speaker/config.yaml +0 -0
  40. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/speaker/infer.py +0 -0
  41. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/speaker/utils.py +0 -0
  42. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/training/__init__.py +0 -0
  43. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/training/run.py +0 -0
  44. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/training/utils.py +0 -0
  45. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/utils.py +0 -0
  46. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/utterance/__init__.py +0 -0
  47. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/utterance/cantonese_infer.py +0 -0
  48. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/utterance/dataset.py +0 -0
  49. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/utterance/execute.py +0 -0
  50. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/utterance/infer.py +0 -0
  51. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/utterance/prep.py +0 -0
  52. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/utterance/train.py +0 -0
  53. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/wave2vec/__init__.py +0 -0
  54. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/wave2vec/infer_fa.py +0 -0
  55. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/whisper/__init__.py +0 -0
  56. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/whisper/infer_asr.py +0 -0
  57. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/models/whisper/infer_fa.py +0 -0
  58. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/__init__.py +0 -0
  59. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/analysis/__init__.py +0 -0
  60. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/asr/__init__.py +0 -0
  61. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/asr/num2chinese.py +0 -0
  62. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  63. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/asr/rev.py +0 -0
  64. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/asr/tencent.py +0 -0
  65. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/asr/utils.py +0 -0
  66. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/asr/whisper.py +0 -0
  67. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/asr/whisperx.py +0 -0
  68. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/base.py +0 -0
  69. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/cleanup/__init__.py +0 -0
  70. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  71. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  72. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  73. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/cleanup/retrace.py +0 -0
  74. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  75. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  76. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/cleanup/support/test.test +0 -0
  77. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/dispatch.py +0 -0
  78. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/fa/__init__.py +0 -0
  79. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  80. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  81. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  82. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  83. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  84. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  85. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  86. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  87. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  88. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  89. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/pipeline.py +0 -0
  90. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/speaker/__init__.py +0 -0
  91. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  92. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/translate/__init__.py +0 -0
  93. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/translate/gtrans.py +0 -0
  94. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/translate/seamless.py +0 -0
  95. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/translate/utils.py +0 -0
  96. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/utr/__init__.py +0 -0
  97. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/utr/rev_utr.py +0 -0
  98. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/utr/utils.py +0 -0
  99. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  100. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/utterance/__init__.py +0 -0
  101. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  102. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/__init__.py +0 -0
  103. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/conftest.py +0 -0
  104. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  105. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  106. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  107. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  108. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  109. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  110. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  111. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  112. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  113. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  114. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  115. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  116. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/fixures.py +0 -0
  117. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  118. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  119. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/tests/test_document.py +0 -0
  120. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/utils/__init__.py +0 -0
  121. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/utils/config.py +0 -0
  122. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/utils/dp.py +0 -0
  123. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/batchalign/utils/utils.py +0 -0
  124. {batchalignhk-0.7.19 → batchalignhk-0.7.19.post2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.19
3
+ Version: 0.7.19.post2
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -35,12 +35,16 @@ Requires-Dist: tiktoken
35
35
  Requires-Dist: blobfile
36
36
  Requires-Dist: sentencepiece
37
37
  Requires-Dist: tencentcloud-sdk-python-common
38
- Requires-Dist: tencentcloud-sdk-python-asrgoogletrans
38
+ Requires-Dist: tencentcloud-sdk-python-asr
39
+ Requires-Dist: googletrans
39
40
  Requires-Dist: openai-whisper>=20240930
40
41
  Provides-Extra: dev
41
42
  Requires-Dist: pytest; extra == "dev"
42
43
  Provides-Extra: train
43
44
  Requires-Dist: accelerate~=0.27; extra == "train"
45
+ Provides-Extra: docs
46
+ Requires-Dist: mkdocs-material; extra == "docs"
47
+ Requires-Dist: mkdocs-click; extra == "docs"
44
48
  Dynamic: author
45
49
  Dynamic: author-email
46
50
  Dynamic: classifier
@@ -117,4 +117,5 @@ batchalign/tests/pipelines/fa/test_fa_pipeline.py
117
117
  batchalign/utils/__init__.py
118
118
  batchalign/utils/config.py
119
119
  batchalign/utils/dp.py
120
+ batchalign/utils/names.py
120
121
  batchalign/utils/utils.py
@@ -25,11 +25,16 @@ tiktoken
25
25
  blobfile
26
26
  sentencepiece
27
27
  tencentcloud-sdk-python-common
28
- tencentcloud-sdk-python-asrgoogletrans
28
+ tencentcloud-sdk-python-asr
29
+ googletrans
29
30
  openai-whisper>=20240930
30
31
 
31
32
  [dev]
32
33
  pytest
33
34
 
35
+ [docs]
36
+ mkdocs-material
37
+ mkdocs-click
38
+
34
39
  [train]
35
40
  accelerate~=0.27
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.19
3
+ Version: 0.7.19.post2
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -35,12 +35,16 @@ Requires-Dist: tiktoken
35
35
  Requires-Dist: blobfile
36
36
  Requires-Dist: sentencepiece
37
37
  Requires-Dist: tencentcloud-sdk-python-common
38
- Requires-Dist: tencentcloud-sdk-python-asrgoogletrans
38
+ Requires-Dist: tencentcloud-sdk-python-asr
39
+ Requires-Dist: googletrans
39
40
  Requires-Dist: openai-whisper>=20240930
40
41
  Provides-Extra: dev
41
42
  Requires-Dist: pytest; extra == "dev"
42
43
  Provides-Extra: train
43
44
  Requires-Dist: accelerate~=0.27; extra == "train"
45
+ Provides-Extra: docs
46
+ Requires-Dist: mkdocs-material; extra == "docs"
47
+ Requires-Dist: mkdocs-click; extra == "docs"
44
48
  Dynamic: author
45
49
  Dynamic: author-email
46
50
  Dynamic: classifier
@@ -79,6 +79,7 @@ def handle_verbosity(verbosity):
79
79
 
80
80
  @click.group()
81
81
  @click.pass_context
82
+ @click.version_option(VERSION_NUMBER)
82
83
  @click.option("-v", "--verbose", type=int, count=True, default=0, help="How loquacious Batchalign should be.")
83
84
  def batchalign(ctx, verbose):
84
85
  """process .cha and/or audio files in IN_DIR and dumps them to OUT_DIR using recipe COMMAND"""
@@ -10,10 +10,53 @@ from batchalign.pipelines.asr.utils import *
10
10
  from batchalign.utils.config import config_read
11
11
 
12
12
  from batchalign.utils.dp import align, ExtraType, Extra, Match
13
+ from batchalign.utils.names import names
13
14
 
14
15
  import logging
15
16
  L = logging.getLogger("batchalign")
16
17
 
18
+ def conform(x):
19
+ result = []
20
+ for i in x:
21
+ if "'s" in i.strip():
22
+ result.append(i.split("'")[0])
23
+ result.append("is")
24
+ elif "americanstyle" == i.strip():
25
+ result.append("american")
26
+ result.append("style")
27
+ elif "postwar" == i.strip():
28
+ result.append("post")
29
+ result.append("war")
30
+ elif "postwar" == i.strip():
31
+ result.append("post")
32
+ result.append("war")
33
+ elif i.strip() in names:
34
+ result.append("name")
35
+ elif "dunno" == i.strip():
36
+ result.append("don't")
37
+ result.append("know")
38
+ elif "wanna" == i.strip():
39
+ result.append("want")
40
+ result.append("to")
41
+ elif "gotta" == i.strip():
42
+ result.append("got")
43
+ result.append("to")
44
+ elif "farmhouse" == i.strip():
45
+ result.append("farm")
46
+ result.append("house")
47
+ elif "aa" == i.strip():
48
+ result.append("a")
49
+ result.append("a")
50
+ elif "hmm" == i.strip():
51
+ result.append("hm")
52
+ elif "_" in i.strip():
53
+ for j in i.strip().split("_"):
54
+ result.append(j)
55
+ else:
56
+ result.append(i)
57
+
58
+ return result
59
+
17
60
  class EvaluationEngine(BatchalignEngine):
18
61
  tasks = [ Task.WER ]
19
62
 
@@ -64,6 +107,9 @@ class EvaluationEngine(BatchalignEngine):
64
107
  gold_final = gold_forms
65
108
  forms_final = forms_finished
66
109
 
110
+ gold_final = conform(gold_final)
111
+ forms_final = conform(forms_final)
112
+
67
113
  # dp!
68
114
  alignment = align(forms_final, gold_final, False)
69
115