BatchalignHK 0.7.19.post6__tar.gz → 0.7.19.post7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/BatchalignHK.egg-info/PKG-INFO +3 -2
  2. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/PKG-INFO +3 -2
  3. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/utterance/cantonese_infer.py +1 -1
  4. batchalignhk-0.7.19.post7/batchalign/pipelines/asr/tencent.py +246 -0
  5. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/version +1 -1
  6. batchalignhk-0.7.19.post6/batchalign/pipelines/asr/tencent.py +0 -132
  7. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  8. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  9. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/BatchalignHK.egg-info/entry_points.txt +0 -0
  10. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/BatchalignHK.egg-info/requires.txt +0 -0
  11. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/BatchalignHK.egg-info/top_level.txt +0 -0
  12. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/LICENSE +0 -0
  13. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/MANIFEST.in +0 -0
  14. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/README.md +0 -0
  15. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/__init__.py +0 -0
  16. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/__main__.py +0 -0
  17. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/cli/__init__.py +0 -0
  18. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/cli/cli.py +0 -0
  19. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/cli/dispatch.py +0 -0
  20. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/constants.py +0 -0
  21. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/document.py +0 -0
  22. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/errors.py +0 -0
  23. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/__init__.py +0 -0
  24. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/base.py +0 -0
  25. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/chat/__init__.py +0 -0
  26. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/chat/file.py +0 -0
  27. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/chat/generator.py +0 -0
  28. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/chat/lexer.py +0 -0
  29. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/chat/parser.py +0 -0
  30. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/chat/utils.py +0 -0
  31. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/textgrid/__init__.py +0 -0
  32. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/textgrid/file.py +0 -0
  33. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/textgrid/generator.py +0 -0
  34. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/formats/textgrid/parser.py +0 -0
  35. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/__init__.py +0 -0
  36. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/resolve.py +0 -0
  37. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/speaker/__init__.py +0 -0
  38. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/speaker/config.yaml +0 -0
  39. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/speaker/infer.py +0 -0
  40. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/speaker/utils.py +0 -0
  41. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/training/__init__.py +0 -0
  42. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/training/run.py +0 -0
  43. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/training/utils.py +0 -0
  44. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/utils.py +0 -0
  45. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/utterance/__init__.py +0 -0
  46. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/utterance/dataset.py +0 -0
  47. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/utterance/execute.py +0 -0
  48. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/utterance/infer.py +0 -0
  49. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/utterance/prep.py +0 -0
  50. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/utterance/train.py +0 -0
  51. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/wave2vec/__init__.py +0 -0
  52. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/wave2vec/infer_fa.py +0 -0
  53. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/whisper/__init__.py +0 -0
  54. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/whisper/infer_asr.py +0 -0
  55. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/models/whisper/infer_fa.py +0 -0
  56. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/__init__.py +0 -0
  57. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/analysis/__init__.py +0 -0
  58. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/analysis/eval.py +0 -0
  59. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/asr/__init__.py +0 -0
  60. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/asr/num2chinese.py +0 -0
  61. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  62. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/asr/rev.py +0 -0
  63. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/asr/utils.py +0 -0
  64. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/asr/whisper.py +0 -0
  65. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/asr/whisperx.py +0 -0
  66. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/base.py +0 -0
  67. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/cleanup/__init__.py +0 -0
  68. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  69. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  70. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  71. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/cleanup/retrace.py +0 -0
  72. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  73. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  74. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/cleanup/support/test.test +0 -0
  75. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/dispatch.py +0 -0
  76. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/fa/__init__.py +0 -0
  77. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  78. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  79. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  80. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  81. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  82. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  83. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  84. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  85. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  86. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  87. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/pipeline.py +0 -0
  88. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/speaker/__init__.py +0 -0
  89. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  90. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/translate/__init__.py +0 -0
  91. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/translate/gtrans.py +0 -0
  92. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/translate/seamless.py +0 -0
  93. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/translate/utils.py +0 -0
  94. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/utr/__init__.py +0 -0
  95. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/utr/rev_utr.py +0 -0
  96. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/utr/utils.py +0 -0
  97. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  98. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/utterance/__init__.py +0 -0
  99. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  100. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/__init__.py +0 -0
  101. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/conftest.py +0 -0
  102. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  103. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  104. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  105. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  106. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  107. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  108. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  109. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  110. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  111. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  112. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  113. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  114. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/fixures.py +0 -0
  115. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  116. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  117. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/tests/test_document.py +0 -0
  118. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/utils/__init__.py +0 -0
  119. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/utils/config.py +0 -0
  120. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/utils/dp.py +0 -0
  121. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/utils/names.py +0 -0
  122. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/batchalign/utils/utils.py +0 -0
  123. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/setup.cfg +0 -0
  124. {batchalignhk-0.7.19.post6 → batchalignhk-0.7.19.post7}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.19.post6
3
+ Version: 0.7.19.post7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -50,6 +50,7 @@ Dynamic: author-email
50
50
  Dynamic: classifier
51
51
  Dynamic: description
52
52
  Dynamic: description-content-type
53
+ Dynamic: license-file
53
54
  Dynamic: provides-extra
54
55
  Dynamic: requires-dist
55
56
  Dynamic: summary
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.19.post6
3
+ Version: 0.7.19.post7
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -50,6 +50,7 @@ Dynamic: author-email
50
50
  Dynamic: classifier
51
51
  Dynamic: description
52
52
  Dynamic: description-content-type
53
+ Dynamic: license-file
53
54
  Dynamic: provides-extra
54
55
  Dynamic: requires-dist
55
56
  Dynamic: summary
@@ -59,7 +59,7 @@ class BertCantoneseUtteranceModel(object):
59
59
 
60
60
 
61
61
  # Step 2: Define keywords and split the passage based on them
62
- keywords = ['呀', '啦', '喎', '嘞', '㗎喇', '囉', '㗎', '啊', '嗯'] # Replace with your desired keywords
62
+ keywords = ['呀', '喎', '嘞', '㗎喇', '囉', '啊', '嗯'] # Replace with your desired keywords
63
63
 
64
64
  chunks = []
65
65
  start = 0
@@ -0,0 +1,246 @@
1
+ """
2
+ rev.py
3
+ Support for Rev.ai, a commerical ASR service
4
+ """
5
+
6
+ from batchalign.document import *
7
+ from batchalign.pipelines.base import *
8
+ from batchalign.pipelines.asr.utils import *
9
+ from batchalign.utils.config import config_read
10
+
11
+ from batchalign.errors import *
12
+
13
+ from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
14
+
15
+ from opencc import OpenCC
16
+ cc = OpenCC('s2hk')
17
+
18
+ import time
19
+ import pathlib
20
+ import tempfile
21
+ import pycountry
22
+ import numpy as np
23
+ import soundfile as sf
24
+ from pydub import AudioSegment
25
+ from pydub.effects import normalize
26
+ import base64
27
+ from tencentcloud.common.credential import Credential
28
+ from tencentcloud.asr.v20190614.asr_client import AsrClient, models
29
+
30
+ import asyncio
31
+ import tempfile
32
+ import os
33
+ from pydub import AudioSegment
34
+ from pydub.effects import normalize
35
+ from pydub.exceptions import CouldntDecodeError
36
+
37
+
38
+ import logging
39
+ L = logging.getLogger("batchalign")
40
+
41
+ class TencentEngine(BatchalignEngine):
42
+
43
+ @property
44
+ def tasks(self):
45
+ if self.__engine:
46
+ return [ Task.ASR, Task.SPEAKER_RECOGNITION, Task.UTTERANCE_SEGMENTATION ]
47
+ else:
48
+ return [ Task.ASR, Task.SPEAKER_RECOGNITION ]
49
+
50
+ def __init__(self, key:str=None, lang="eng", num_speakers=2):
51
+
52
+ if key == None or key.strip() == "":
53
+ config = config_read()
54
+ try:
55
+ id = config["asr"]["engine.tencent.id"]
56
+ key = config["asr"]["engine.tencent.key"]
57
+ except KeyError:
58
+ raise ConfigError("No Tencent Cloud key found. Tencent Cloud was not set up! Please write one yourself and place it at ~/.batchalign.ini.")
59
+
60
+ self.__lang_code = lang
61
+ self.__num_speakers = num_speakers
62
+
63
+ if lang == "yue":
64
+ self.__lang = "yue"
65
+ else:
66
+ self.__lang = pycountry.languages.get(alpha_3=lang).alpha_2
67
+
68
+ cred = Credential(id, key)
69
+ self.__client = AsrClient(cred, "ap-hongkong")
70
+
71
+ if resolve("utterance", lang) != None:
72
+ L.debug("Initializing utterance model...")
73
+ if lang != "yue":
74
+ self.__engine = BertUtteranceModel(resolve("utterance", lang))
75
+ else:
76
+ self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
77
+ L.debug("Done.")
78
+ else:
79
+ self.__engine = None
80
+
81
+ def __preprocess_audio(self, input_path):
82
+ """Enhanced audio preprocessing for low-volume speech"""
83
+ try:
84
+ L.info(f"Optimizing audio for ASR: {input_path}")
85
+
86
+ # read the audio file
87
+ audio = AudioSegment.from_file(input_path)
88
+
89
+ audio = audio.set_channels(1)
90
+ audio = audio.set_frame_rate(16000)
91
+
92
+
93
+ audio = audio.compress_dynamic_range(
94
+ threshold=-40,
95
+ ratio=3,
96
+ attack=5,
97
+ release=100
98
+ )
99
+ audio = audio.low_pass_filter(4000) # filter out high frequencies
100
+ audio = audio.normalize(headroom=2) # keep the headroom
101
+ audio = audio.compress_dynamic_range(
102
+ threshold=-55,
103
+ ratio=6,
104
+ attack=15,
105
+ release=200
106
+ )
107
+
108
+ # enhance low volume
109
+ audio = audio.high_pass_filter(80)
110
+ boosted = audio.high_pass_filter(1000).apply_gain(+4)
111
+ audio = audio.overlay(boosted)
112
+
113
+ if L.level <= logging.DEBUG:
114
+ self.__print_audio_stats(audio)
115
+
116
+ # output to a temporary file
117
+ temp_fd, temp_path = tempfile.mkstemp(suffix=".mp3")
118
+ os.close(temp_fd)
119
+ audio.export(
120
+ temp_path,
121
+ format="mp3",
122
+ codec="libmp3lame",
123
+ bitrate="96k",
124
+ tags={"title": "BA_Optimized"},
125
+ parameters=[
126
+ "-compression_level", "2",
127
+ "-reservoir", "0",
128
+ "-joint_stereo", "0"
129
+ ]
130
+ )
131
+
132
+ return temp_path
133
+
134
+ except CouldntDecodeError:
135
+ L.error(f"Audio decoding failed: {input_path}")
136
+ return input_path
137
+ except Exception as e:
138
+ L.error(f"Audio processing error: {str(e)}")
139
+ return input_path
140
+
141
+ def replace_cantonese_words(self, word):
142
+ """Function to replace Cantonese words with custom replacements."""
143
+ word_replacements = {
144
+ "系": "係",
145
+ "唔系": "唔係",
146
+ "噶": "㗎",
147
+ "咧": "呢",
148
+ "嗬": "喎",
149
+ "只": "隻",
150
+ "咯": "囉",
151
+ "嚇": "吓",
152
+ "飲": "飲",
153
+ "喐": "郁",
154
+ "食": "食",
155
+ "啫": "咋",
156
+ "哇": "嘩",
157
+ "着": "著",
158
+ "中意": "鍾意",
159
+ "嘞": "喇",
160
+ "啵": "噃",
161
+ "遊水": "游水",
162
+ "羣組": "群組",
163
+ "古仔": "故仔",
164
+ "甕": "㧬",
165
+ "牀": "床",
166
+ "松": "鬆",
167
+ "較剪": "鉸剪",
168
+ "吵": "嘈",
169
+ "衝涼": "沖涼",
170
+ "分鍾": "分鐘",
171
+ "重復": "重複"
172
+ }
173
+ return word_replacements.get(word, word)
174
+
175
+ def generate(self, f, **kwargs):
176
+ lang = self.__lang
177
+ client = self.__client
178
+
179
+ processed_path = self.__preprocess_audio(f)
180
+ audio = AudioSegment.from_file(processed_path)
181
+
182
+ try:
183
+ L.info(f"Uploading '{pathlib.Path(f).stem}'...")
184
+ with open(processed_path, "rb") as audio_file:
185
+ encoded_string = base64.b64encode(audio_file.read())
186
+
187
+ req = models.CreateRecTaskRequest()
188
+ if lang in {'zho', 'yue', 'wuu', 'nan','hak'}:
189
+ req.EngineModelType = "16k_zh_large"
190
+ else:
191
+ req.EngineModelType = f"16k_{lang}"
192
+ req.ResTextFormat = 1
193
+ req.SpeakerDiarization = 1
194
+ req.ChannelNum = 1
195
+ req.Data = encoded_string.decode('ascii')
196
+ req.SourceType = 1
197
+
198
+ resp = client.CreateRecTask(req)
199
+
200
+ L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
201
+ req = models.DescribeTaskStatusRequest()
202
+ req.TaskId = resp.Data.TaskId
203
+
204
+ res = client.DescribeTaskStatus(req)
205
+ while res.Data.Status not in [2, 3]:
206
+ time.sleep(15)
207
+ res = client.DescribeTaskStatus(req)
208
+
209
+ if res.Data.Status in ["3", 3]:
210
+ raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
211
+
212
+ turns = []
213
+ for i in res.Data.ResultDetail:
214
+ turn = []
215
+ start = i.StartMs
216
+ for j in i.Words:
217
+ word = j.Word
218
+ if self.__lang == "yue":
219
+ word = cc.convert(word)
220
+
221
+ word = self.replace_cantonese_words(word)
222
+
223
+ turn.append({
224
+ "type": "text",
225
+ "ts": (j.OffsetStartMs + start) / 1000,
226
+ "end_ts": (j.OffsetEndMs + start) / 1000,
227
+ "value": word
228
+ })
229
+ turns.append({
230
+ "elements": turn,
231
+ "speaker": i.SpeakerId
232
+ })
233
+ L.debug(f"Tencent done.")
234
+
235
+ # Extract the text from the small volume parts for translation
236
+
237
+ doc = process_generation({"monologues": turns},
238
+ self.__lang_code,
239
+ utterance_engine=self.__engine)
240
+ media = Media(type=MediaType.AUDIO, name=Path(f).stem, url=f)
241
+ doc.media = media
242
+ return doc
243
+
244
+ finally:
245
+ if processed_path != f and pathlib.Path(processed_path).exists():
246
+ pathlib.Path(processed_path).unlink()
@@ -1,3 +1,3 @@
1
- 0.7.19-post.6
1
+ 0.7.19-post.7
2
2
  May 20th, 2025
3
3
  fixes for ASR
@@ -1,132 +0,0 @@
1
- """
2
- rev.py
3
- Support for Rev.ai, a commerical ASR service
4
- """
5
-
6
- from batchalign.document import *
7
- from batchalign.pipelines.base import *
8
- from batchalign.pipelines.asr.utils import *
9
- from batchalign.utils.config import config_read
10
-
11
- from batchalign.errors import *
12
-
13
- from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
14
-
15
- from opencc import OpenCC
16
- cc = OpenCC('s2hk')
17
-
18
- import time
19
- import pathlib
20
- import pycountry
21
-
22
- import base64
23
- from tencentcloud.common.credential import Credential
24
- from tencentcloud.asr.v20190614.asr_client import AsrClient, models
25
-
26
- import logging
27
- L = logging.getLogger("batchalign")
28
-
29
- class TencentEngine(BatchalignEngine):
30
-
31
- @property
32
- def tasks(self):
33
- # if there is no utterance segmentation scheme, we only
34
- # run ASR
35
- if self.__engine:
36
- return [ Task.ASR, Task.SPEAKER_RECOGNITION, Task.UTTERANCE_SEGMENTATION ]
37
- else:
38
- return [ Task.ASR, Task.SPEAKER_RECOGNITION ]
39
-
40
- def __init__(self, key:str=None, lang="eng", num_speakers=2):
41
-
42
- if key == None or key.strip() == "":
43
- config = config_read()
44
- try:
45
- id = config["asr"]["engine.tencent.id"]
46
- key = config["asr"]["engine.tencent.key"]
47
- except KeyError:
48
- raise ConfigError("No Tencent Cloud key found. Tencent Cloud was not set up! Please write one yourself and place it at `~/.batchalign.ini`.")
49
-
50
- self.__lang_code = lang
51
- self.__num_speakers = num_speakers
52
-
53
- if lang == "yue":
54
- self.__lang = "yue"
55
- else:
56
- self.__lang = pycountry.languages.get(alpha_3=lang).alpha_2
57
-
58
- cred = Credential(id,key)
59
- self.__client = AsrClient(cred, "ap-hongkong")
60
-
61
- if resolve("utterance", lang) != None:
62
- L.debug("Initializing utterance model...")
63
- if lang != "yue":
64
- self.__engine = BertUtteranceModel(resolve("utterance", lang))
65
- else:
66
- # we have special inference procedure for cantonese
67
- self.__engine = BertCantoneseUtteranceModel(resolve("utterance", lang))
68
- L.debug("Done.")
69
- else:
70
- self.__engine = None
71
-
72
-
73
- def generate(self, f, **kwargs):
74
- # bring language code into the stack to access
75
- lang = self.__lang
76
- client = self.__client
77
-
78
- L.info(f"Uploading '{pathlib.Path(f).stem}'...")
79
- # we will send the file for processing
80
- if not str(f).startswith("http"):
81
- with open(f, "rb") as image_file:
82
- encoded_string = base64.b64encode(image_file.read())
83
-
84
- req = models.CreateRecTaskRequest()
85
- req.EngineModelType = f"16k_{lang}"
86
- req.ResTextFormat = 1
87
- req.SpeakerDiarization = 1
88
- req.ChannelNum = 1
89
- if not str(f).startswith("http"):
90
- req.Data = encoded_string.decode('ascii')
91
- req.SourceType = 1
92
- else:
93
- req.Url = f
94
- req.SourceType = 0
95
- resp = client.CreateRecTask(req)
96
-
97
- L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
98
- req = models.DescribeTaskStatusRequest()
99
- req.TaskId = resp.Data.TaskId
100
-
101
- res = client.DescribeTaskStatus(req)
102
- while res.Data.Status not in [2,3]:
103
- time.sleep(15)
104
- res = client.DescribeTaskStatus(req)
105
-
106
- # if failed, raise
107
- if res.Data.Status == "3" or res.Data.Status == 3:
108
- raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
109
-
110
- turns = []
111
- for i in res.Data.ResultDetail:
112
- turn = []
113
- start = i.StartMs
114
- for j in i.Words:
115
- turn.append({
116
- "type": "text",
117
- "ts": (j.OffsetStartMs+start)/1000,
118
- "end_ts": (j.OffsetEndMs+start)/1000,
119
- "value": cc.convert(j.Word) if self.__lang == "yue" else j.Word
120
- })
121
- turns.append({
122
- "elements": turn,
123
- "speaker": i.SpeakerId
124
- })
125
- L.debug(f"Tencent done.")
126
-
127
- # postprocess the output and define media tier
128
- doc = process_generation({"monologues": turns},
129
- self.__lang_code, utterance_engine=self.__engine)
130
- media = Media(type=MediaType.AUDIO, name=Path(f).stem, url=f)
131
- doc.media = media
132
- return doc