BatchalignHK 0.7.19.post6__tar.gz → 0.7.19.post8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/BatchalignHK.egg-info/PKG-INFO +1 -1
  2. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/BatchalignHK.egg-info/SOURCES.txt +1 -0
  3. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/PKG-INFO +1 -1
  4. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/document.py +6 -0
  5. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/utterance/cantonese_infer.py +1 -1
  6. batchalignhk-0.7.19.post8/batchalign/pipelines/asr/tencent.py +246 -0
  7. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/morphosyntax/ud.py +1 -0
  8. batchalignhk-0.7.19.post8/batchalign/utils/abbrev.py +182 -0
  9. batchalignhk-0.7.19.post8/batchalign/version +3 -0
  10. batchalignhk-0.7.19.post6/batchalign/pipelines/asr/tencent.py +0 -132
  11. batchalignhk-0.7.19.post6/batchalign/version +0 -3
  12. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  13. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/BatchalignHK.egg-info/entry_points.txt +0 -0
  14. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/BatchalignHK.egg-info/requires.txt +0 -0
  15. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/BatchalignHK.egg-info/top_level.txt +0 -0
  16. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/LICENSE +0 -0
  17. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/MANIFEST.in +0 -0
  18. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/README.md +0 -0
  19. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/__init__.py +0 -0
  20. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/__main__.py +0 -0
  21. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/cli/__init__.py +0 -0
  22. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/cli/cli.py +0 -0
  23. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/cli/dispatch.py +0 -0
  24. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/constants.py +0 -0
  25. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/errors.py +0 -0
  26. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/__init__.py +0 -0
  27. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/base.py +0 -0
  28. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/chat/__init__.py +0 -0
  29. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/chat/file.py +0 -0
  30. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/chat/generator.py +0 -0
  31. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/chat/lexer.py +0 -0
  32. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/chat/parser.py +0 -0
  33. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/chat/utils.py +0 -0
  34. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/textgrid/__init__.py +0 -0
  35. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/textgrid/file.py +0 -0
  36. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/textgrid/generator.py +0 -0
  37. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/formats/textgrid/parser.py +0 -0
  38. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/__init__.py +0 -0
  39. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/resolve.py +0 -0
  40. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/speaker/__init__.py +0 -0
  41. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/speaker/config.yaml +0 -0
  42. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/speaker/infer.py +0 -0
  43. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/speaker/utils.py +0 -0
  44. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/training/__init__.py +0 -0
  45. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/training/run.py +0 -0
  46. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/training/utils.py +0 -0
  47. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/utils.py +0 -0
  48. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/utterance/__init__.py +0 -0
  49. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/utterance/dataset.py +0 -0
  50. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/utterance/execute.py +0 -0
  51. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/utterance/infer.py +0 -0
  52. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/utterance/prep.py +0 -0
  53. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/utterance/train.py +0 -0
  54. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/wave2vec/__init__.py +0 -0
  55. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/wave2vec/infer_fa.py +0 -0
  56. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/whisper/__init__.py +0 -0
  57. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/whisper/infer_asr.py +0 -0
  58. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/models/whisper/infer_fa.py +0 -0
  59. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/__init__.py +0 -0
  60. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/analysis/__init__.py +0 -0
  61. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/analysis/eval.py +0 -0
  62. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/asr/__init__.py +0 -0
  63. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/asr/num2chinese.py +0 -0
  64. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  65. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/asr/rev.py +0 -0
  66. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/asr/utils.py +0 -0
  67. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/asr/whisper.py +0 -0
  68. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/asr/whisperx.py +0 -0
  69. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/base.py +0 -0
  70. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/cleanup/__init__.py +0 -0
  71. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  72. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  73. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  74. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/cleanup/retrace.py +0 -0
  75. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  76. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  77. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/cleanup/support/test.test +0 -0
  78. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/dispatch.py +0 -0
  79. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/fa/__init__.py +0 -0
  80. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  81. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  82. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  83. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  84. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  85. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  86. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  87. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  88. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  89. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/pipeline.py +0 -0
  90. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/speaker/__init__.py +0 -0
  91. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  92. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/translate/__init__.py +0 -0
  93. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/translate/gtrans.py +0 -0
  94. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/translate/seamless.py +0 -0
  95. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/translate/utils.py +0 -0
  96. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/utr/__init__.py +0 -0
  97. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/utr/rev_utr.py +0 -0
  98. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/utr/utils.py +0 -0
  99. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  100. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/utterance/__init__.py +0 -0
  101. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  102. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/__init__.py +0 -0
  103. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/conftest.py +0 -0
  104. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  105. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  106. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  107. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  108. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  109. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  110. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  111. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  112. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  113. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  114. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  115. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  116. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/fixures.py +0 -0
  117. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  118. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  119. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/tests/test_document.py +0 -0
  120. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/utils/__init__.py +0 -0
  121. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/utils/config.py +0 -0
  122. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/utils/dp.py +0 -0
  123. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/utils/names.py +0 -0
  124. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/batchalign/utils/utils.py +0 -0
  125. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/setup.cfg +0 -0
  126. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post8}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.19.post6
3
+ Version: 0.7.19.post8
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -115,6 +115,7 @@ batchalign/tests/pipelines/cleanup/test_disfluency.py
115
115
  batchalign/tests/pipelines/cleanup/test_parse_support.py
116
116
  batchalign/tests/pipelines/fa/test_fa_pipeline.py
117
117
  batchalign/utils/__init__.py
118
+ batchalign/utils/abbrev.py
118
119
  batchalign/utils/config.py
119
120
  batchalign/utils/dp.py
120
121
  batchalign/utils/names.py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.19.post6
3
+ Version: 0.7.19.post8
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -11,6 +11,7 @@ from pathlib import Path
11
11
 
12
12
  from batchalign.errors import *
13
13
  from batchalign.constants import *
14
+ from batchalign.utils.abbrev import abbrev
14
15
 
15
16
  import re
16
17
 
@@ -223,6 +224,11 @@ class Utterance(BaseModel):
223
224
  t = re.sub(r",", " , ", t.strip()).strip()
224
225
  t = re.sub(r" +", " ", t.strip()).strip()
225
226
  t = t.replace("+ ,", "+,").strip()
227
+
228
+ abbrevs = [" " .join(list(i)) for i in abbrev]
229
+ for i in abbrevs:
230
+ t = t.replace(i, i.replace(" ", ""))
231
+
226
232
  return t
227
233
 
228
234
  def __repr__(self):
@@ -59,7 +59,7 @@ class BertCantoneseUtteranceModel(object):
59
59
 
60
60
 
61
61
  # Step 2: Define keywords and split the passage based on them
62
- keywords = ['呀', '啦', '喎', '嘞', '㗎喇', '囉', '㗎', '啊', '嗯'] # Replace with your desired keywords
62
+ keywords = ['呀', '喎', '嘞', '㗎喇', '囉', '啊', '嗯'] # Replace with your desired keywords
63
63
 
64
64
  chunks = []
65
65
  start = 0
@@ -0,0 +1,246 @@
1
+ """
2
+ rev.py
3
+ Support for Rev.ai, a commerical ASR service
4
+ """
5
+
6
+ from batchalign.document import *
7
+ from batchalign.pipelines.base import *
8
+ from batchalign.pipelines.asr.utils import *
9
+ from batchalign.utils.config import config_read
10
+
11
+ from batchalign.errors import *
12
+
13
+ from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
14
+
15
+ from opencc import OpenCC
16
+ cc = OpenCC('s2hk')
17
+
18
+ import time
19
+ import pathlib
20
+ import tempfile
21
+ import pycountry
22
+ import numpy as np
23
+ import soundfile as sf
24
+ from pydub import AudioSegment
25
+ from pydub.effects import normalize
26
+ import base64
27
+ from tencentcloud.common.credential import Credential
28
+ from tencentcloud.asr.v20190614.asr_client import AsrClient, models
29
+
30
+ import asyncio
31
+ import tempfile
32
+ import os
33
+ from pydub import AudioSegment
34
+ from pydub.effects import normalize
35
+ from pydub.exceptions import CouldntDecodeError
36
+
37
+
38
+ import logging
39
+ L = logging.getLogger("batchalign")
40
+
41
+ class TencentEngine(BatchalignEngine):
42
+
43
+ @property
44
+ def tasks(self):
45
+ if self.__engine:
46
+ return [ Task.ASR, Task.SPEAKER_RECOGNITION, Task.UTTERANCE_SEGMENTATION ]
47
+ else:
48
+ return [ Task.ASR, Task.SPEAKER_RECOGNITION ]
49
+
50
+ def __init__(self, key:str=None, lang="eng", num_speakers=2):
51
+
52
+ if key == None or key.strip() == "":
53
+ config = config_read()
54
+ try:
55
+ id = config["asr"]["engine.tencent.id"]
56
+ key = config["asr"]["engine.tencent.key"]
57
+ except KeyError:
58
+ raise ConfigError("No Tencent Cloud key found. Tencent Cloud was not set up! Please write one yourself and place it at ~/.batchalign.ini.")
59
+
60
+ self.__lang_code = lang
61
+ self.__num_speakers = num_speakers
62
+
63
+ if lang == "yue":
64
+ self.__lang = "yue"
65
+ else:
66
+ self.__lang = pycountry.languages.get(alpha_3=lang).alpha_2
67
+
68
+ cred = Credential(id, key)
69
+ self.__client = AsrClient(cred, "ap-hongkong")
70
+
71
+ if resolve("utterance", lang) != None:
72
+ L.debug("Initializing utterance model...")
73
+ if lang != "yue":
74
+ self.__engine = BertUtteranceModel(resolve("utterance", lang))
75
+ else:
76
+ self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
77
+ L.debug("Done.")
78
+ else:
79
+ self.__engine = None
80
+
81
+ def __preprocess_audio(self, input_path):
82
+ """Enhanced audio preprocessing for low-volume speech"""
83
+ try:
84
+ L.info(f"Optimizing audio for ASR: {input_path}")
85
+
86
+ # read the audio file
87
+ audio = AudioSegment.from_file(input_path)
88
+
89
+ audio = audio.set_channels(1)
90
+ audio = audio.set_frame_rate(16000)
91
+
92
+
93
+ audio = audio.compress_dynamic_range(
94
+ threshold=-40,
95
+ ratio=3,
96
+ attack=5,
97
+ release=100
98
+ )
99
+ audio = audio.low_pass_filter(4000) # filter out high frequencies
100
+ audio = audio.normalize(headroom=2) # keep the headroom
101
+ audio = audio.compress_dynamic_range(
102
+ threshold=-55,
103
+ ratio=6,
104
+ attack=15,
105
+ release=200
106
+ )
107
+
108
+ # enhance low volume
109
+ audio = audio.high_pass_filter(80)
110
+ boosted = audio.high_pass_filter(1000).apply_gain(+4)
111
+ audio = audio.overlay(boosted)
112
+
113
+ if L.level <= logging.DEBUG:
114
+ self.__print_audio_stats(audio)
115
+
116
+ # output to a temporary file
117
+ temp_fd, temp_path = tempfile.mkstemp(suffix=".mp3")
118
+ os.close(temp_fd)
119
+ audio.export(
120
+ temp_path,
121
+ format="mp3",
122
+ codec="libmp3lame",
123
+ bitrate="96k",
124
+ tags={"title": "BA_Optimized"},
125
+ parameters=[
126
+ "-compression_level", "2",
127
+ "-reservoir", "0",
128
+ "-joint_stereo", "0"
129
+ ]
130
+ )
131
+
132
+ return temp_path
133
+
134
+ except CouldntDecodeError:
135
+ L.error(f"Audio decoding failed: {input_path}")
136
+ return input_path
137
+ except Exception as e:
138
+ L.error(f"Audio processing error: {str(e)}")
139
+ return input_path
140
+
141
+ def replace_cantonese_words(self, word):
142
+ """Function to replace Cantonese words with custom replacements."""
143
+ word_replacements = {
144
+ "系": "係",
145
+ "唔系": "唔係",
146
+ "噶": "㗎",
147
+ "咧": "呢",
148
+ "嗬": "喎",
149
+ "只": "隻",
150
+ "咯": "囉",
151
+ "嚇": "吓",
152
+ "飲": "飲",
153
+ "喐": "郁",
154
+ "食": "食",
155
+ "啫": "咋",
156
+ "哇": "嘩",
157
+ "着": "著",
158
+ "中意": "鍾意",
159
+ "嘞": "喇",
160
+ "啵": "噃",
161
+ "遊水": "游水",
162
+ "羣組": "群組",
163
+ "古仔": "故仔",
164
+ "甕": "㧬",
165
+ "牀": "床",
166
+ "松": "鬆",
167
+ "較剪": "鉸剪",
168
+ "吵": "嘈",
169
+ "衝涼": "沖涼",
170
+ "分鍾": "分鐘",
171
+ "重復": "重複"
172
+ }
173
+ return word_replacements.get(word, word)
174
+
175
+ def generate(self, f, **kwargs):
176
+ lang = self.__lang
177
+ client = self.__client
178
+
179
+ processed_path = self.__preprocess_audio(f)
180
+ audio = AudioSegment.from_file(processed_path)
181
+
182
+ try:
183
+ L.info(f"Uploading '{pathlib.Path(f).stem}'...")
184
+ with open(processed_path, "rb") as audio_file:
185
+ encoded_string = base64.b64encode(audio_file.read())
186
+
187
+ req = models.CreateRecTaskRequest()
188
+ if lang in {'zho', 'yue', 'wuu', 'nan','hak'}:
189
+ req.EngineModelType = "16k_zh_large"
190
+ else:
191
+ req.EngineModelType = f"16k_{lang}"
192
+ req.ResTextFormat = 1
193
+ req.SpeakerDiarization = 1
194
+ req.ChannelNum = 1
195
+ req.Data = encoded_string.decode('ascii')
196
+ req.SourceType = 1
197
+
198
+ resp = client.CreateRecTask(req)
199
+
200
+ L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
201
+ req = models.DescribeTaskStatusRequest()
202
+ req.TaskId = resp.Data.TaskId
203
+
204
+ res = client.DescribeTaskStatus(req)
205
+ while res.Data.Status not in [2, 3]:
206
+ time.sleep(15)
207
+ res = client.DescribeTaskStatus(req)
208
+
209
+ if res.Data.Status in ["3", 3]:
210
+ raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
211
+
212
+ turns = []
213
+ for i in res.Data.ResultDetail:
214
+ turn = []
215
+ start = i.StartMs
216
+ for j in i.Words:
217
+ word = j.Word
218
+ if self.__lang == "yue":
219
+ word = cc.convert(word)
220
+
221
+ word = self.replace_cantonese_words(word)
222
+
223
+ turn.append({
224
+ "type": "text",
225
+ "ts": (j.OffsetStartMs + start) / 1000,
226
+ "end_ts": (j.OffsetEndMs + start) / 1000,
227
+ "value": word
228
+ })
229
+ turns.append({
230
+ "elements": turn,
231
+ "speaker": i.SpeakerId
232
+ })
233
+ L.debug(f"Tencent done.")
234
+
235
+ # Extract the text from the small volume parts for translation
236
+
237
+ doc = process_generation({"monologues": turns},
238
+ self.__lang_code,
239
+ utterance_engine=self.__engine)
240
+ media = Media(type=MediaType.AUDIO, name=Path(f).stem, url=f)
241
+ doc.media = media
242
+ return doc
243
+
244
+ finally:
245
+ if processed_path != f and pathlib.Path(processed_path).exists():
246
+ pathlib.Path(processed_path).unlink()
@@ -711,6 +711,7 @@ def adlist_postprocessor(i, lang, adlist):
711
711
 
712
712
  ######
713
713
  def morphoanalyze(doc: Document, retokenize:bool, skipmultilang:bool, status_hook:callable = None, **kwargs):
714
+
714
715
  L.debug("Starting Stanza...")
715
716
  inputs = []
716
717
 
@@ -0,0 +1,182 @@
1
+ abbrev = [
2
+ "FBI",
3
+ "CIA",
4
+ "NSA",
5
+ "NATO",
6
+ "UN",
7
+ "WHO",
8
+ "NASA",
9
+ "CDC",
10
+ "IRS",
11
+ "EPA",
12
+ "HTTP",
13
+ "URL",
14
+ "HTML",
15
+ "CSS",
16
+ "API",
17
+ "IP",
18
+ "DNS",
19
+ "SQL",
20
+ "USB",
21
+ "VPN",
22
+ "ATT",
23
+ "AT&T",
24
+ "CEO",
25
+ "CFO",
26
+ "COO",
27
+ "IPO",
28
+ "ROI",
29
+ "GDP",
30
+ "LLC",
31
+ "HR",
32
+ "M&",
33
+ "KPI",
34
+ "GPA",
35
+ "SAT",
36
+ "ACT",
37
+ "MBA",
38
+ "PhD",
39
+ "BA",
40
+ "MA",
41
+ "STEM",
42
+ "ESL",
43
+ "GED",
44
+ "AWOL",
45
+ "MIA",
46
+ "POW",
47
+ "IED",
48
+ "UAV",
49
+ "RPG",
50
+ "NATO",
51
+ "SEAL",
52
+ "JAG",
53
+ "ROTC",
54
+ "CERN",
55
+ "GMO",
56
+ "H2O",
57
+ "CO2",
58
+ "UV",
59
+ "IR",
60
+ "AI",
61
+ "VR",
62
+ "AR",
63
+ "NPR",
64
+ "BBC",
65
+ "MTV",
66
+ "CNN",
67
+ "HBO",
68
+ "ESPN",
69
+ "TMZ",
70
+ "AMC",
71
+ "IMAX",
72
+ "WWE",
73
+ "ASAP",
74
+ "DIY",
75
+ "ETA",
76
+ "RSVP",
77
+ "FYI",
78
+ "LOL",
79
+ "BRB",
80
+ "IDK",
81
+ "BTW",
82
+ "TMI",
83
+ "PBJ",
84
+ "AIDS",
85
+ "HIV",
86
+ "ADHD",
87
+ "COPD",
88
+ "PTSD",
89
+ "CHF",
90
+ "CAD",
91
+ "TB",
92
+ "UTI",
93
+ "GERD",
94
+ "MRI",
95
+ "CT",
96
+ "ECG",
97
+ "EEG",
98
+ "CBC",
99
+ "BMP",
100
+ "ABG",
101
+ "PFT",
102
+ "FOBT",
103
+ "ENT",
104
+ "OB",
105
+ "PCP",
106
+ "ICU",
107
+ "NICU",
108
+ "ER",
109
+ "OR",
110
+ "PT",
111
+ "OT",
112
+ "EM",
113
+ "OTC",
114
+ "NSAID",
115
+ "IV",
116
+ "IM",
117
+ "SC",
118
+ "PRN",
119
+ "BID",
120
+ "TID",
121
+ "QID",
122
+ "NPO",
123
+ "CNS",
124
+ "PNS",
125
+ "GI",
126
+ "GU",
127
+ "CV",
128
+ "MSK",
129
+ "ENT",
130
+ "BMI",
131
+ "BMR",
132
+ "BP",
133
+ "WBC",
134
+ "RBC",
135
+ "HGB",
136
+ "HCT",
137
+ "PLT",
138
+ "ESR",
139
+ "CRP",
140
+ "LFT",
141
+ "TFT",
142
+ "INR",
143
+ "MMR",
144
+ "DPT",
145
+ "HPV",
146
+ "Tdap",
147
+ "BCG",
148
+ "IPV",
149
+ "HBV",
150
+ "HAV",
151
+ "HCV",
152
+ "RSV",
153
+ "SOAP",
154
+ "DNR",
155
+ "AMA",
156
+ "LOS",
157
+ "EHR",
158
+ "EMR",
159
+ "ICD",
160
+ "CPT",
161
+ "HIPAA",
162
+ "HR",
163
+ "RR",
164
+ "SpO2",
165
+ "MAP",
166
+ "GFR",
167
+ "A1C",
168
+ "LDL",
169
+ "HDL",
170
+ "TG",
171
+ "BUN",
172
+ "SIDS",
173
+ "DVT",
174
+ "PE",
175
+ "ARDS",
176
+ "SLE",
177
+ "RA",
178
+ "TIA",
179
+ "CVA",
180
+ "ALS",
181
+ "MS",
182
+ ]
@@ -0,0 +1,3 @@
1
+ 0.7.19-post.8
2
+ May 23th, 2025
3
+ abbreviations
@@ -1,132 +0,0 @@
1
- """
2
- rev.py
3
- Support for Rev.ai, a commerical ASR service
4
- """
5
-
6
- from batchalign.document import *
7
- from batchalign.pipelines.base import *
8
- from batchalign.pipelines.asr.utils import *
9
- from batchalign.utils.config import config_read
10
-
11
- from batchalign.errors import *
12
-
13
- from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
14
-
15
- from opencc import OpenCC
16
- cc = OpenCC('s2hk')
17
-
18
- import time
19
- import pathlib
20
- import pycountry
21
-
22
- import base64
23
- from tencentcloud.common.credential import Credential
24
- from tencentcloud.asr.v20190614.asr_client import AsrClient, models
25
-
26
- import logging
27
- L = logging.getLogger("batchalign")
28
-
29
- class TencentEngine(BatchalignEngine):
30
-
31
- @property
32
- def tasks(self):
33
- # if there is no utterance segmentation scheme, we only
34
- # run ASR
35
- if self.__engine:
36
- return [ Task.ASR, Task.SPEAKER_RECOGNITION, Task.UTTERANCE_SEGMENTATION ]
37
- else:
38
- return [ Task.ASR, Task.SPEAKER_RECOGNITION ]
39
-
40
- def __init__(self, key:str=None, lang="eng", num_speakers=2):
41
-
42
- if key == None or key.strip() == "":
43
- config = config_read()
44
- try:
45
- id = config["asr"]["engine.tencent.id"]
46
- key = config["asr"]["engine.tencent.key"]
47
- except KeyError:
48
- raise ConfigError("No Tencent Cloud key found. Tencent Cloud was not set up! Please write one yourself and place it at `~/.batchalign.ini`.")
49
-
50
- self.__lang_code = lang
51
- self.__num_speakers = num_speakers
52
-
53
- if lang == "yue":
54
- self.__lang = "yue"
55
- else:
56
- self.__lang = pycountry.languages.get(alpha_3=lang).alpha_2
57
-
58
- cred = Credential(id,key)
59
- self.__client = AsrClient(cred, "ap-hongkong")
60
-
61
- if resolve("utterance", lang) != None:
62
- L.debug("Initializing utterance model...")
63
- if lang != "yue":
64
- self.__engine = BertUtteranceModel(resolve("utterance", lang))
65
- else:
66
- # we have special inference procedure for cantonese
67
- self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
68
- L.debug("Done.")
69
- else:
70
- self.__engine = None
71
-
72
-
73
- def generate(self, f, **kwargs):
74
- # bring language code into the stack to access
75
- lang = self.__lang
76
- client = self.__client
77
-
78
- L.info(f"Uploading '{pathlib.Path(f).stem}'...")
79
- # we will send the file for processing
80
- if not str(f).startswith("http"):
81
- with open(f, "rb") as image_file:
82
- encoded_string = base64.b64encode(image_file.read())
83
-
84
- req = models.CreateRecTaskRequest()
85
- req.EngineModelType = f"16k_{lang}"
86
- req.ResTextFormat = 1
87
- req.SpeakerDiarization = 1
88
- req.ChannelNum = 1
89
- if not str(f).startswith("http"):
90
- req.Data = encoded_string.decode('ascii')
91
- req.SourceType = 1
92
- else:
93
- req.Url = f
94
- req.SourceType = 0
95
- resp = client.CreateRecTask(req)
96
-
97
- L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
98
- req = models.DescribeTaskStatusRequest()
99
- req.TaskId = resp.Data.TaskId
100
-
101
- res = client.DescribeTaskStatus(req)
102
- while res.Data.Status not in [2,3]:
103
- time.sleep(15)
104
- res = client.DescribeTaskStatus(req)
105
-
106
- # if failed, raise
107
- if res.Data.Status == "3" or res.Data.Status == 3:
108
- raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
109
-
110
- turns = []
111
- for i in res.Data.ResultDetail:
112
- turn = []
113
- start = i.StartMs
114
- for j in i.Words:
115
- turn.append({
116
- "type": "text",
117
- "ts": (j.OffsetStartMs+start)/1000,
118
- "end_ts": (j.OffsetEndMs+start)/1000,
119
- "value": cc.convert(j.Word) if self.__lang == "yue" else j.Word
120
- })
121
- turns.append({
122
- "elements": turn,
123
- "speaker": i.SpeakerId
124
- })
125
- L.debug(f"Tencent done.")
126
-
127
- # postprocess the output and define media tier
128
- doc = process_generation({"monologues": turns},
129
- self.__lang_code, utterance_engine=self.__engine)
130
- media = Media(type=MediaType.AUDIO, name=Path(f).stem, url=f)
131
- doc.media = media
132
- return doc
@@ -1,3 +0,0 @@
1
- 0.7.19-post.6
2
- May 20th, 2025
3
- fixes for ASR