BatchalignHK 0.7.19.post8__tar.gz → 0.7.19.post10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/BatchalignHK.egg-info/PKG-INFO +1 -1
  2. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/PKG-INFO +1 -1
  3. batchalignhk-0.7.19.post10/batchalign/pipelines/asr/tencent.py +187 -0
  4. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/cleanup/retrace.py +1 -1
  5. batchalignhk-0.7.19.post10/batchalign/version +3 -0
  6. batchalignhk-0.7.19.post8/batchalign/pipelines/asr/tencent.py +0 -246
  7. batchalignhk-0.7.19.post8/batchalign/version +0 -3
  8. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  9. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  10. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/BatchalignHK.egg-info/entry_points.txt +0 -0
  11. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/BatchalignHK.egg-info/requires.txt +0 -0
  12. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/BatchalignHK.egg-info/top_level.txt +0 -0
  13. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/LICENSE +0 -0
  14. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/MANIFEST.in +0 -0
  15. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/README.md +0 -0
  16. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/__init__.py +0 -0
  17. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/__main__.py +0 -0
  18. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/cli/__init__.py +0 -0
  19. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/cli/cli.py +0 -0
  20. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/cli/dispatch.py +0 -0
  21. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/constants.py +0 -0
  22. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/document.py +0 -0
  23. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/errors.py +0 -0
  24. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/__init__.py +0 -0
  25. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/base.py +0 -0
  26. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/chat/__init__.py +0 -0
  27. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/chat/file.py +0 -0
  28. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/chat/generator.py +0 -0
  29. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/chat/lexer.py +0 -0
  30. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/chat/parser.py +0 -0
  31. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/chat/utils.py +0 -0
  32. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/textgrid/__init__.py +0 -0
  33. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/textgrid/file.py +0 -0
  34. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/textgrid/generator.py +0 -0
  35. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/formats/textgrid/parser.py +0 -0
  36. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/__init__.py +0 -0
  37. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/resolve.py +0 -0
  38. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/speaker/__init__.py +0 -0
  39. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/speaker/config.yaml +0 -0
  40. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/speaker/infer.py +0 -0
  41. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/speaker/utils.py +0 -0
  42. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/training/__init__.py +0 -0
  43. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/training/run.py +0 -0
  44. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/training/utils.py +0 -0
  45. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/utils.py +0 -0
  46. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/utterance/__init__.py +0 -0
  47. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/utterance/cantonese_infer.py +0 -0
  48. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/utterance/dataset.py +0 -0
  49. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/utterance/execute.py +0 -0
  50. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/utterance/infer.py +0 -0
  51. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/utterance/prep.py +0 -0
  52. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/utterance/train.py +0 -0
  53. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/wave2vec/__init__.py +0 -0
  54. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/wave2vec/infer_fa.py +0 -0
  55. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/whisper/__init__.py +0 -0
  56. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/whisper/infer_asr.py +0 -0
  57. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/models/whisper/infer_fa.py +0 -0
  58. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/__init__.py +0 -0
  59. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/analysis/__init__.py +0 -0
  60. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/analysis/eval.py +0 -0
  61. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/asr/__init__.py +0 -0
  62. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/asr/num2chinese.py +0 -0
  63. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  64. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/asr/rev.py +0 -0
  65. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/asr/utils.py +0 -0
  66. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/asr/whisper.py +0 -0
  67. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/asr/whisperx.py +0 -0
  68. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/base.py +0 -0
  69. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/cleanup/__init__.py +0 -0
  70. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  71. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  72. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  73. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  74. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  75. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/cleanup/support/test.test +0 -0
  76. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/dispatch.py +0 -0
  77. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/fa/__init__.py +0 -0
  78. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  79. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  80. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  81. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  82. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  83. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  84. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  85. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  86. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  87. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  88. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/pipeline.py +0 -0
  89. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/speaker/__init__.py +0 -0
  90. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  91. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/translate/__init__.py +0 -0
  92. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/translate/gtrans.py +0 -0
  93. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/translate/seamless.py +0 -0
  94. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/translate/utils.py +0 -0
  95. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/utr/__init__.py +0 -0
  96. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/utr/rev_utr.py +0 -0
  97. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/utr/utils.py +0 -0
  98. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  99. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/utterance/__init__.py +0 -0
  100. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  101. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/__init__.py +0 -0
  102. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/conftest.py +0 -0
  103. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  104. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  105. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  106. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  107. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  108. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  109. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  110. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  111. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  112. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  113. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  114. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  115. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/fixures.py +0 -0
  116. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  117. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  118. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/tests/test_document.py +0 -0
  119. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/utils/__init__.py +0 -0
  120. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/utils/abbrev.py +0 -0
  121. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/utils/config.py +0 -0
  122. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/utils/dp.py +0 -0
  123. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/utils/names.py +0 -0
  124. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/batchalign/utils/utils.py +0 -0
  125. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/setup.cfg +0 -0
  126. {batchalignhk-0.7.19.post8 → batchalignhk-0.7.19.post10}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.19.post8
3
+ Version: 0.7.19.post10
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.19.post8
3
+ Version: 0.7.19.post10
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -0,0 +1,187 @@
1
+ """
2
+ rev.py
3
+ Support for Rev.ai, a commerical ASR service
4
+ """
5
+
6
+ from batchalign.document import *
7
+ from batchalign.pipelines.base import *
8
+ from batchalign.pipelines.asr.utils import *
9
+ from batchalign.utils.config import config_read
10
+
11
+ from batchalign.errors import *
12
+
13
+ from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
14
+
15
+ from opencc import OpenCC
16
+ cc = OpenCC('s2hk')
17
+
18
+ import time
19
+ import pathlib
20
+ import tempfile
21
+ import pycountry
22
+ import numpy as np
23
+ import soundfile as sf
24
+ # from pydub import AudioSegment
25
+ # from pydub.effects import normalize
26
+ import base64
27
+ from tencentcloud.common.credential import Credential
28
+ from tencentcloud.asr.v20190614.asr_client import AsrClient, models
29
+
30
+ import asyncio
31
+ import tempfile
32
+ import os
33
+ # from pydub import AudioSegment
34
+ # from pydub.effects import normalize
35
+ # from pydub.exceptions import CouldntDecodeError
36
+
37
+
38
+ import logging
39
+ L = logging.getLogger("batchalign")
40
+
41
+ class TencentEngine(BatchalignEngine):
42
+
43
+ @property
44
+ def tasks(self):
45
+ if self.__engine:
46
+ return [ Task.ASR, Task.SPEAKER_RECOGNITION, Task.UTTERANCE_SEGMENTATION ]
47
+ else:
48
+ return [ Task.ASR, Task.SPEAKER_RECOGNITION ]
49
+
50
+ def __init__(self, key:str=None, lang="eng", num_speakers=2):
51
+
52
+ if key == None or key.strip() == "":
53
+ config = config_read()
54
+ try:
55
+ id = config["asr"]["engine.tencent.id"]
56
+ key = config["asr"]["engine.tencent.key"]
57
+ except KeyError:
58
+ raise ConfigError("No Tencent Cloud key found. Tencent Cloud was not set up! Please write one yourself and place it at ~/.batchalign.ini.")
59
+
60
+ self.__lang_code = lang
61
+ self.__num_speakers = num_speakers
62
+
63
+ if lang == "yue":
64
+ self.__lang = "yue"
65
+ else:
66
+ self.__lang = pycountry.languages.get(alpha_3=lang).alpha_2
67
+
68
+ cred = Credential(id, key)
69
+ self.__client = AsrClient(cred, "ap-hongkong")
70
+
71
+ if resolve("utterance", lang) != None:
72
+ L.debug("Initializing utterance model...")
73
+ if lang != "yue":
74
+ self.__engine = BertUtteranceModel(resolve("utterance", lang))
75
+ else:
76
+ self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
77
+ L.debug("Done.")
78
+ else:
79
+ self.__engine = None
80
+
81
+ def replace_cantonese_words(self, word):
82
+ """Function to replace Cantonese words with custom replacements."""
83
+ word_replacements = {
84
+ "系": "係",
85
+ "唔系": "唔係",
86
+ "噶": "㗎",
87
+ "咧": "呢",
88
+ "嗬": "喎",
89
+ "只": "隻",
90
+ "咯": "囉",
91
+ "嚇": "吓",
92
+ "飲": "飲",
93
+ "喐": "郁",
94
+ "食": "食",
95
+ "啫": "咋",
96
+ "哇": "嘩",
97
+ "着": "著",
98
+ "中意": "鍾意",
99
+ "嘞": "喇",
100
+ "啵": "噃",
101
+ "遊水": "游水",
102
+ "羣組": "群組",
103
+ "古仔": "故仔",
104
+ "甕": "㧬",
105
+ "牀": "床",
106
+ "松": "鬆",
107
+ "較剪": "鉸剪",
108
+ "吵": "嘈",
109
+ "衝涼": "沖涼",
110
+ "分鍾": "分鐘",
111
+ "重復": "重複"
112
+ }
113
+ return word_replacements.get(word, word)
114
+
115
+ def generate(self, f, **kwargs):
116
+ lang = self.__lang
117
+ client = self.__client
118
+
119
+ # processed_path = self.__preprocess_audio(f)
120
+ # audio = AudioSegment.from_file(processed_path)
121
+
122
+ L.info(f"Uploading '{pathlib.Path(f).stem}'...")
123
+ # we will send the file for processing
124
+ if not str(f).startswith("http"):
125
+ with open(f, "rb") as image_file:
126
+ encoded_string = base64.b64encode(image_file.read())
127
+
128
+ req = models.CreateRecTaskRequest()
129
+ if lang in {'zho', 'yue', 'wuu', 'nan','hak'}:
130
+ req.EngineModelType = "16k_zh_large"
131
+ else:
132
+ req.EngineModelType = f"16k_{lang}"
133
+ req.ResTextFormat = 1
134
+ req.SpeakerDiarization = 1
135
+ req.ChannelNum = 1
136
+ if not str(f).startswith("http"):
137
+ req.Data = encoded_string.decode('ascii')
138
+ req.SourceType = 1
139
+ else:
140
+ req.Url = f
141
+ req.SourceType = 0
142
+ resp = client.CreateRecTask(req)
143
+
144
+ L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
145
+ req = models.DescribeTaskStatusRequest()
146
+ req.TaskId = resp.Data.TaskId
147
+
148
+ res = client.DescribeTaskStatus(req)
149
+ while res.Data.Status not in [2, 3]:
150
+ time.sleep(15)
151
+ res = client.DescribeTaskStatus(req)
152
+
153
+ if res.Data.Status in ["3", 3]:
154
+ raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
155
+
156
+ turns = []
157
+ for i in res.Data.ResultDetail:
158
+ turn = []
159
+ start = i.StartMs
160
+ for j in i.Words:
161
+ word = j.Word
162
+ if self.__lang == "yue":
163
+ word = cc.convert(word)
164
+
165
+ word = self.replace_cantonese_words(word)
166
+
167
+ turn.append({
168
+ "type": "text",
169
+ "ts": (j.OffsetStartMs + start) / 1000,
170
+ "end_ts": (j.OffsetEndMs + start) / 1000,
171
+ "value": word
172
+ })
173
+ turns.append({
174
+ "elements": turn,
175
+ "speaker": i.SpeakerId
176
+ })
177
+ L.debug(f"Tencent done.")
178
+
179
+ # Extract the text from the small volume parts for translation
180
+
181
+ doc = process_generation({"monologues": turns},
182
+ self.__lang_code,
183
+ utterance_engine=self.__engine)
184
+ media = Media(type=MediaType.AUDIO, name=Path(f).stem, url=f)
185
+ doc.media = media
186
+ return doc
187
+
@@ -22,7 +22,7 @@ class NgramRetraceEngine(BatchalignEngine):
22
22
  if i.type in [TokenType.REGULAR, TokenType.PUNCT, TokenType.FP]:
23
23
  content.append(i)
24
24
  # scan for n-gram retraces
25
- for n in range(1, len(content)):
25
+ for n in range(1 if "yue" not in doc.langs and "zho" not in doc.langs else 2, len(content)):
26
26
  begin = 0
27
27
  while begin < len(content)-(n):
28
28
  # get the n gram info; we convert it to
@@ -0,0 +1,3 @@
1
+ 0.7.19-post.10
2
+ May 24th, 2025
3
+ more asr changes
@@ -1,246 +0,0 @@
1
- """
2
- rev.py
3
- Support for Rev.ai, a commerical ASR service
4
- """
5
-
6
- from batchalign.document import *
7
- from batchalign.pipelines.base import *
8
- from batchalign.pipelines.asr.utils import *
9
- from batchalign.utils.config import config_read
10
-
11
- from batchalign.errors import *
12
-
13
- from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
14
-
15
- from opencc import OpenCC
16
- cc = OpenCC('s2hk')
17
-
18
- import time
19
- import pathlib
20
- import tempfile
21
- import pycountry
22
- import numpy as np
23
- import soundfile as sf
24
- from pydub import AudioSegment
25
- from pydub.effects import normalize
26
- import base64
27
- from tencentcloud.common.credential import Credential
28
- from tencentcloud.asr.v20190614.asr_client import AsrClient, models
29
-
30
- import asyncio
31
- import tempfile
32
- import os
33
- from pydub import AudioSegment
34
- from pydub.effects import normalize
35
- from pydub.exceptions import CouldntDecodeError
36
-
37
-
38
- import logging
39
- L = logging.getLogger("batchalign")
40
-
41
- class TencentEngine(BatchalignEngine):
42
-
43
- @property
44
- def tasks(self):
45
- if self.__engine:
46
- return [ Task.ASR, Task.SPEAKER_RECOGNITION, Task.UTTERANCE_SEGMENTATION ]
47
- else:
48
- return [ Task.ASR, Task.SPEAKER_RECOGNITION ]
49
-
50
- def __init__(self, key:str=None, lang="eng", num_speakers=2):
51
-
52
- if key == None or key.strip() == "":
53
- config = config_read()
54
- try:
55
- id = config["asr"]["engine.tencent.id"]
56
- key = config["asr"]["engine.tencent.key"]
57
- except KeyError:
58
- raise ConfigError("No Tencent Cloud key found. Tencent Cloud was not set up! Please write one yourself and place it at ~/.batchalign.ini.")
59
-
60
- self.__lang_code = lang
61
- self.__num_speakers = num_speakers
62
-
63
- if lang == "yue":
64
- self.__lang = "yue"
65
- else:
66
- self.__lang = pycountry.languages.get(alpha_3=lang).alpha_2
67
-
68
- cred = Credential(id, key)
69
- self.__client = AsrClient(cred, "ap-hongkong")
70
-
71
- if resolve("utterance", lang) != None:
72
- L.debug("Initializing utterance model...")
73
- if lang != "yue":
74
- self.__engine = BertUtteranceModel(resolve("utterance", lang))
75
- else:
76
- self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
77
- L.debug("Done.")
78
- else:
79
- self.__engine = None
80
-
81
- def __preprocess_audio(self, input_path):
82
- """Enhanced audio preprocessing for low-volume speech"""
83
- try:
84
- L.info(f"Optimizing audio for ASR: {input_path}")
85
-
86
- # read the audio file
87
- audio = AudioSegment.from_file(input_path)
88
-
89
- audio = audio.set_channels(1)
90
- audio = audio.set_frame_rate(16000)
91
-
92
-
93
- audio = audio.compress_dynamic_range(
94
- threshold=-40,
95
- ratio=3,
96
- attack=5,
97
- release=100
98
- )
99
- audio = audio.low_pass_filter(4000) # filter out high frequencies
100
- audio = audio.normalize(headroom=2) # keep the headroom
101
- audio = audio.compress_dynamic_range(
102
- threshold=-55,
103
- ratio=6,
104
- attack=15,
105
- release=200
106
- )
107
-
108
- # enhance low volume
109
- audio = audio.high_pass_filter(80)
110
- boosted = audio.high_pass_filter(1000).apply_gain(+4)
111
- audio = audio.overlay(boosted)
112
-
113
- if L.level <= logging.DEBUG:
114
- self.__print_audio_stats(audio)
115
-
116
- # output to a temporary file
117
- temp_fd, temp_path = tempfile.mkstemp(suffix=".mp3")
118
- os.close(temp_fd)
119
- audio.export(
120
- temp_path,
121
- format="mp3",
122
- codec="libmp3lame",
123
- bitrate="96k",
124
- tags={"title": "BA_Optimized"},
125
- parameters=[
126
- "-compression_level", "2",
127
- "-reservoir", "0",
128
- "-joint_stereo", "0"
129
- ]
130
- )
131
-
132
- return temp_path
133
-
134
- except CouldntDecodeError:
135
- L.error(f"Audio decoding failed: {input_path}")
136
- return input_path
137
- except Exception as e:
138
- L.error(f"Audio processing error: {str(e)}")
139
- return input_path
140
-
141
- def replace_cantonese_words(self, word):
142
- """Function to replace Cantonese words with custom replacements."""
143
- word_replacements = {
144
- "系": "係",
145
- "唔系": "唔係",
146
- "噶": "㗎",
147
- "咧": "呢",
148
- "嗬": "喎",
149
- "只": "隻",
150
- "咯": "囉",
151
- "嚇": "吓",
152
- "飲": "飲",
153
- "喐": "郁",
154
- "食": "食",
155
- "啫": "咋",
156
- "哇": "嘩",
157
- "着": "著",
158
- "中意": "鍾意",
159
- "嘞": "喇",
160
- "啵": "噃",
161
- "遊水": "游水",
162
- "羣組": "群組",
163
- "古仔": "故仔",
164
- "甕": "㧬",
165
- "牀": "床",
166
- "松": "鬆",
167
- "較剪": "鉸剪",
168
- "吵": "嘈",
169
- "衝涼": "沖涼",
170
- "分鍾": "分鐘",
171
- "重復": "重複"
172
- }
173
- return word_replacements.get(word, word)
174
-
175
- def generate(self, f, **kwargs):
176
- lang = self.__lang
177
- client = self.__client
178
-
179
- processed_path = self.__preprocess_audio(f)
180
- audio = AudioSegment.from_file(processed_path)
181
-
182
- try:
183
- L.info(f"Uploading '{pathlib.Path(f).stem}'...")
184
- with open(processed_path, "rb") as audio_file:
185
- encoded_string = base64.b64encode(audio_file.read())
186
-
187
- req = models.CreateRecTaskRequest()
188
- if lang in {'zho', 'yue', 'wuu', 'nan','hak'}:
189
- req.EngineModelType = "16k_zh_large"
190
- else:
191
- req.EngineModelType = f"16k_{lang}"
192
- req.ResTextFormat = 1
193
- req.SpeakerDiarization = 1
194
- req.ChannelNum = 1
195
- req.Data = encoded_string.decode('ascii')
196
- req.SourceType = 1
197
-
198
- resp = client.CreateRecTask(req)
199
-
200
- L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
201
- req = models.DescribeTaskStatusRequest()
202
- req.TaskId = resp.Data.TaskId
203
-
204
- res = client.DescribeTaskStatus(req)
205
- while res.Data.Status not in [2, 3]:
206
- time.sleep(15)
207
- res = client.DescribeTaskStatus(req)
208
-
209
- if res.Data.Status in ["3", 3]:
210
- raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
211
-
212
- turns = []
213
- for i in res.Data.ResultDetail:
214
- turn = []
215
- start = i.StartMs
216
- for j in i.Words:
217
- word = j.Word
218
- if self.__lang == "yue":
219
- word = cc.convert(word)
220
-
221
- word = self.replace_cantonese_words(word)
222
-
223
- turn.append({
224
- "type": "text",
225
- "ts": (j.OffsetStartMs + start) / 1000,
226
- "end_ts": (j.OffsetEndMs + start) / 1000,
227
- "value": word
228
- })
229
- turns.append({
230
- "elements": turn,
231
- "speaker": i.SpeakerId
232
- })
233
- L.debug(f"Tencent done.")
234
-
235
- # Extract the text from the small volume parts for translation
236
-
237
- doc = process_generation({"monologues": turns},
238
- self.__lang_code,
239
- utterance_engine=self.__engine)
240
- media = Media(type=MediaType.AUDIO, name=Path(f).stem, url=f)
241
- doc.media = media
242
- return doc
243
-
244
- finally:
245
- if processed_path != f and pathlib.Path(processed_path).exists():
246
- pathlib.Path(processed_path).unlink()
@@ -1,3 +0,0 @@
1
- 0.7.19-post.8
2
- May 23th, 2025
3
- abbreviations