batchalign 0.7.21.post8__tar.gz → 0.7.21.post10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (129) hide show
  1. {batchalign-0.7.21.post8/batchalign.egg-info → batchalign-0.7.21.post10}/PKG-INFO +1 -1
  2. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/analysis/eval.py +48 -46
  3. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/asr/rev.py +1 -0
  4. batchalign-0.7.21.post10/batchalign/version +3 -0
  5. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.21.post8/batchalign/version +0 -3
  7. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/LICENSE +0 -0
  8. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/MANIFEST.in +0 -0
  9. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/README.md +0 -0
  10. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/document.py +0 -0
  17. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/utterance/cantonese_infer.py +0 -0
  42. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/wave2vec/__init__.py +0 -0
  48. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/wave2vec/infer_fa.py +0 -0
  49. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/whisper/__init__.py +0 -0
  50. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/whisper/infer_asr.py +0 -0
  51. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/models/whisper/infer_fa.py +0 -0
  52. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/__init__.py +0 -0
  53. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/analysis/__init__.py +0 -0
  54. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/asr/num2chinese.py +0 -0
  56. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  57. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/asr/utils.py +0 -0
  58. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/asr/whisper.py +0 -0
  59. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/asr/whisperx.py +0 -0
  60. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/avqi/__init__.py +0 -0
  61. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/avqi/engine.py +0 -0
  62. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/base.py +0 -0
  63. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/cleanup/__init__.py +0 -0
  64. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  65. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  66. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  67. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/cleanup/retrace.py +0 -0
  68. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  69. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  70. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/cleanup/support/test.test +0 -0
  71. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/diarization/__init__.py +0 -0
  72. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/diarization/pyannote.py +0 -0
  73. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/dispatch.py +0 -0
  74. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/fa/__init__.py +0 -0
  75. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  76. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  77. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  78. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  79. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  80. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  81. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  82. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  83. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  84. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  85. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/pipeline.py +0 -0
  86. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/speaker/__init__.py +0 -0
  87. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  88. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/translate/__init__.py +0 -0
  89. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/translate/gtrans.py +0 -0
  90. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/translate/seamless.py +0 -0
  91. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/translate/utils.py +0 -0
  92. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/utr/__init__.py +0 -0
  93. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/utr/rev_utr.py +0 -0
  94. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/utr/utils.py +0 -0
  95. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  96. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/utterance/__init__.py +0 -0
  97. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  98. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/__init__.py +0 -0
  99. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/conftest.py +0 -0
  100. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  101. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  102. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  103. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  104. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  105. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  106. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  107. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  108. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  109. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  110. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  111. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  112. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/fixures.py +0 -0
  113. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  114. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  115. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/tests/test_document.py +0 -0
  116. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/utils/__init__.py +0 -0
  117. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/utils/abbrev.py +0 -0
  118. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/utils/compounds.py +0 -0
  119. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/utils/config.py +0 -0
  120. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/utils/dp.py +0 -0
  121. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/utils/names.py +0 -0
  122. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign/utils/utils.py +0 -0
  123. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign.egg-info/SOURCES.txt +0 -0
  124. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign.egg-info/dependency_links.txt +0 -0
  125. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign.egg-info/entry_points.txt +0 -0
  126. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign.egg-info/requires.txt +0 -0
  127. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/batchalign.egg-info/top_level.txt +0 -0
  128. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/setup.cfg +0 -0
  129. {batchalign-0.7.21.post8 → batchalign-0.7.21.post10}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.21.post8
3
+ Version: 0.7.21.post10
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -18,134 +18,136 @@ import logging
18
18
  L = logging.getLogger("batchalign")
19
19
 
20
20
  joined_compounds = ["".join(k) for k in compounds]
21
- lowered_abbrev = [k.lower() for k in abbrev]
21
+ lowered_abbrev = [k for k in abbrev]
22
22
 
23
23
  fillers = ["um", "uhm", "em", "mhm", "uhhm", "eh", "uh", "hm"]
24
24
  def conform(x):
25
25
  result = []
26
26
  for i in x:
27
- if i.strip() in joined_compounds:
27
+ if i.strip().lower() in joined_compounds:
28
28
  for k in compounds[joined_compounds.index(i.strip())]:
29
29
  result.append(k)
30
30
  elif i.strip() in lowered_abbrev:
31
31
  for j in i.strip():
32
- result.append(j)
33
- elif "'s" in i.strip():
32
+ result.append(j.strip())
33
+ elif "'s" in i.strip().lower():
34
34
  result.append(i.split("'")[0])
35
35
  result.append("is")
36
- elif "'ve" in i.strip():
36
+ elif "'ve" in i.strip().lower():
37
37
  result.append(i.split("'")[0])
38
38
  result.append("have")
39
- elif "'s" in i.strip():
39
+ elif "'s" in i.strip().lower():
40
40
  result.append(i.split("'")[0])
41
41
  result.append("is")
42
- elif "'d" in i.strip():
42
+ elif "'d" in i.strip().lower():
43
43
  result.append(i.split("'")[0])
44
44
  result.append("had")
45
- elif "'m" in i.strip():
45
+ elif "'m" in i.strip().lower():
46
46
  result.append(i.split("'")[0])
47
47
  result.append("am")
48
- elif i.strip() in fillers:
48
+ elif i.strip().lower() in fillers:
49
49
  result.append("um")
50
- elif "-" in i.strip():
51
- result += [k.strip() for k in i.split("-")]
52
- elif "ok" == i.strip():
50
+ elif "-" in i.strip().lower():
51
+ result += [k.strip() for k in i.split("-").lower()]
52
+ elif "ok" == i.strip().lower():
53
53
  result.append("okay")
54
- elif "gimme" == i.strip():
54
+ elif "gimme" == i.strip().lower():
55
55
  result.append("give")
56
56
  result.append("me")
57
- elif "hafta" == i.strip() or "havta" == i.strip():
57
+ elif "hafta" == i.strip().lower() or "havta" == i.strip().lower():
58
58
  result.append("have")
59
59
  result.append("to")
60
- elif i.strip() in names:
60
+ elif i.strip().lower() in names:
61
61
  result.append("name")
62
- elif "dunno" == i.strip():
62
+ elif "dunno" == i.strip().lower():
63
63
  result.append("don't")
64
64
  result.append("know")
65
- elif "wanna" == i.strip():
65
+ elif "wanna" == i.strip().lower():
66
66
  result.append("want")
67
67
  result.append("to")
68
- elif "mba" == i.strip():
68
+ elif "mba" == i.strip().lower():
69
69
  result.append("m")
70
70
  result.append("b")
71
71
  result.append("a")
72
- elif "tli" == i.strip():
72
+ elif "tli" == i.strip().lower():
73
73
  result.append("t")
74
74
  result.append("l")
75
75
  result.append("i")
76
- elif "bbc" == i.strip():
76
+ elif "bbc" == i.strip().lower():
77
77
  result.append("b")
78
78
  result.append("b")
79
79
  result.append("c")
80
- elif "ii" == i.strip():
80
+ elif "ii" == i.strip().lower():
81
81
  result.append("i")
82
82
  result.append("i")
83
- elif "i'd" == i.strip():
83
+ elif "i'd" == i.strip().lower():
84
84
  result.append("i")
85
85
  result.append("had")
86
- elif "alright" == i.strip():
86
+ elif "alright" == i.strip().lower():
87
87
  result.append("all")
88
88
  result.append("right")
89
- elif "sorta" == i.strip():
89
+ elif "sorta" == i.strip().lower():
90
90
  result.append("sort")
91
91
  result.append("of")
92
- elif "alrightie" == i.strip():
92
+ elif "alrightie" == i.strip().lower():
93
93
  result.append("all")
94
94
  result.append("right")
95
- elif "mm" == i.strip():
95
+ elif "mm" == i.strip().lower():
96
96
  result.append("hm")
97
- elif "ai" == i.strip():
97
+ elif "ai" == i.strip().lower():
98
98
  result.append("a")
99
99
  result.append("i")
100
- elif "this'll" == i.strip():
100
+ elif "this'll" == i.strip().lower():
101
101
  result.append("this")
102
102
  result.append("will")
103
- elif "gotta" == i.strip():
103
+ elif "gotta" == i.strip().lower():
104
104
  result.append("got")
105
105
  result.append("to")
106
- elif "hadta" == i.strip():
106
+ elif "hadta" == i.strip().lower():
107
107
  result.append("had")
108
108
  result.append("to")
109
- elif "eh" == i.strip()j:
109
+ elif "eh" == i.strip().lower():
110
110
  result.append("uh")
111
- elif "kinda" == i.strip():
111
+ elif "kinda" == i.strip().lower():
112
112
  result.append("kind")
113
113
  result.append("of")
114
- elif "ed" == i.strip():
114
+ elif "ed" == i.strip().lower():
115
115
  result.append("education")
116
- elif "til" == i.strip():
116
+ elif "til" == i.strip().lower():
117
117
  result.append("until")
118
- elif "gonna" == i.strip():
118
+ elif "gonna" == i.strip().lower():
119
119
  result.append("going")
120
120
  result.append("to")
121
- elif "shoulda" == i.strip():
121
+ elif "shoulda" == i.strip().lower():
122
122
  result.append("should")
123
123
  result.append("have")
124
- elif "sposta" == i.strip():
124
+ elif "sposta" == i.strip().lower():
125
125
  result.append("supposed")
126
126
  result.append("to")
127
- elif "farmhouse" == i.strip():
127
+ elif "farmhouse" == i.strip().lower():
128
128
  result.append("farm")
129
129
  result.append("house")
130
- elif "aa" == i.strip():
130
+ elif "aa" == i.strip().lower():
131
131
  result.append("a")
132
132
  result.append("a")
133
- elif "aa" == i.strip():
133
+ elif "aa" == i.strip().lower():
134
134
  result.append("a")
135
135
  result.append("a")
136
- elif "em" == i.strip():
136
+ elif "em" == i.strip().lower():
137
137
  result.append("them")
138
- elif "hmm" == i.strip():
138
+ elif "hmm" == i.strip().lower():
139
139
  result.append("hm")
140
- elif "_" in i.strip():
140
+ elif "_" in i.strip().lower():
141
141
  for j in i.strip().split("_"):
142
142
  result.append(j)
143
143
  else:
144
- result.append(i)
144
+ result.append(i.lower())
145
145
 
146
146
  return result
147
147
 
148
148
  def match_fn(x,y):
149
+ x = x.lower()
150
+ y = y.lower()
149
151
  return (y == x or
150
152
  y.replace("(", "").replace(")", "") == x.replace("(", "").replace(")", "") or
151
153
  re.sub(r"\((.*)\)",r"", y) == x or re.sub(r"\((.*)\)",r"", x) == y)
@@ -156,8 +158,8 @@ class EvaluationEngine(BatchalignEngine):
156
158
  @staticmethod
157
159
  def __compute_wer(doc, gold):
158
160
  # get the text of the document and get the text of the gold
159
- forms = [ j.text.lower() for i in doc.content for j in i.content if isinstance(i, Utterance)]
160
- gold_forms = [ j.text.lower() for i in gold.content for j in i.content if isinstance(i, Utterance)]
161
+ forms = [ j.text for i in doc.content for j in i.content if isinstance(i, Utterance)]
162
+ gold_forms = [ j.text for i in gold.content for j in i.content if isinstance(i, Utterance)]
161
163
 
162
164
  forms = [i.replace("-", "") for i in forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
163
165
  gold_forms = [i.replace("-", "") for i in gold_forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
@@ -86,6 +86,7 @@ class RevEngine(BatchalignEngine):
86
86
  while status == JobStatus.IN_PROGRESS:
87
87
  time.sleep(15)
88
88
  status = client.get_job_details(job.id).status
89
+ L.debug(f"Rev.AI got '{status}'...")
89
90
 
90
91
  # if we failed, report failure and give up
91
92
  if status == JobStatus.FAILED:
@@ -0,0 +1,3 @@
1
+ 0.7.21-post.10
2
+ October 1st, 2025
3
+ Count initalisms only when its caps
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.21.post8
3
+ Version: 0.7.21.post10
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,3 +0,0 @@
1
- 0.7.21-post.8
2
- September 29th, 2025
3
- Arabic Commas