BatchalignHK 0.7.17.post15__tar.gz → 0.7.17.post17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/PKG-INFO +4 -2
  2. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/requires.txt +1 -0
  3. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/PKG-INFO +4 -2
  4. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/cli/cli.py +3 -0
  5. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/cli/dispatch.py +14 -3
  6. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/tencent.py +14 -6
  7. batchalignhk-0.7.17.post17/batchalign/version +3 -0
  8. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/setup.py +1 -0
  9. batchalignhk-0.7.17.post15/batchalign/version +0 -3
  10. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  11. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  12. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/entry_points.txt +0 -0
  13. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/BatchalignHK.egg-info/top_level.txt +0 -0
  14. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/LICENSE +0 -0
  15. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/MANIFEST.in +0 -0
  16. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/README.md +0 -0
  17. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/__init__.py +0 -0
  18. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/__main__.py +0 -0
  19. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/cli/__init__.py +0 -0
  20. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/constants.py +0 -0
  21. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/document.py +0 -0
  22. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/errors.py +0 -0
  23. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/__init__.py +0 -0
  24. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/base.py +0 -0
  25. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/__init__.py +0 -0
  26. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/file.py +0 -0
  27. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/generator.py +0 -0
  28. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/lexer.py +0 -0
  29. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/parser.py +0 -0
  30. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/chat/utils.py +0 -0
  31. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/__init__.py +0 -0
  32. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/file.py +0 -0
  33. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/generator.py +0 -0
  34. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/formats/textgrid/parser.py +0 -0
  35. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/__init__.py +0 -0
  36. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/resolve.py +0 -0
  37. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/__init__.py +0 -0
  38. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/config.yaml +0 -0
  39. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/infer.py +0 -0
  40. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/speaker/utils.py +0 -0
  41. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/training/__init__.py +0 -0
  42. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/training/run.py +0 -0
  43. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/training/utils.py +0 -0
  44. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utils.py +0 -0
  45. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/__init__.py +0 -0
  46. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/cantonese_infer.py +0 -0
  47. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/dataset.py +0 -0
  48. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/execute.py +0 -0
  49. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/infer.py +0 -0
  50. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/prep.py +0 -0
  51. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/utterance/train.py +0 -0
  52. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/wave2vec/__init__.py +0 -0
  53. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/wave2vec/infer_fa.py +0 -0
  54. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/__init__.py +0 -0
  55. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/infer_asr.py +0 -0
  56. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/models/whisper/infer_fa.py +0 -0
  57. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/__init__.py +0 -0
  58. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/analysis/__init__.py +0 -0
  59. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/analysis/eval.py +0 -0
  60. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/__init__.py +0 -0
  61. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/num2chinese.py +0 -0
  62. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/rev.py +0 -0
  63. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/utils.py +0 -0
  64. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/whisper.py +0 -0
  65. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/asr/whisperx.py +0 -0
  66. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/base.py +0 -0
  67. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/__init__.py +0 -0
  68. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  69. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  70. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  71. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/retrace.py +0 -0
  72. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  73. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  74. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/cleanup/support/test.test +0 -0
  75. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/dispatch.py +0 -0
  76. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/__init__.py +0 -0
  77. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  78. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  79. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  80. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  81. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  82. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  83. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  84. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  85. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  86. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  87. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/pipeline.py +0 -0
  88. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/speaker/__init__.py +0 -0
  89. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  90. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/translate/__init__.py +0 -0
  91. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/translate/seamless.py +0 -0
  92. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/__init__.py +0 -0
  93. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/rev_utr.py +0 -0
  94. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/utils.py +0 -0
  95. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  96. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utterance/__init__.py +0 -0
  97. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  98. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/__init__.py +0 -0
  99. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/conftest.py +0 -0
  100. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  101. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  102. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  103. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  104. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  105. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  106. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  107. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  108. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  109. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  110. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  111. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  112. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/fixures.py +0 -0
  113. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  114. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  115. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/tests/test_document.py +0 -0
  116. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/utils/__init__.py +0 -0
  117. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/utils/config.py +0 -0
  118. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/utils/dp.py +0 -0
  119. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/batchalign/utils/utils.py +0 -0
  120. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post17}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.17.post15
3
+ Version: 0.7.17.post17
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -15,6 +15,7 @@ Requires-Dist: torch>=2.6.0
15
15
  Requires-Dist: torchaudio
16
16
  Requires-Dist: hmmlearn==0.3.0
17
17
  Requires-Dist: eyed3
18
+ Requires-Dist: opencc-python-reimplemented
18
19
  Requires-Dist: pydub
19
20
  Requires-Dist: imblearn
20
21
  Requires-Dist: plotly>=5.3.0
@@ -47,6 +48,7 @@ Dynamic: author-email
47
48
  Dynamic: classifier
48
49
  Dynamic: description
49
50
  Dynamic: description-content-type
51
+ Dynamic: license-file
50
52
  Dynamic: provides-extra
51
53
  Dynamic: requires-dist
52
54
  Dynamic: summary
@@ -5,6 +5,7 @@ torch>=2.6.0
5
5
  torchaudio
6
6
  hmmlearn==0.3.0
7
7
  eyed3
8
+ opencc-python-reimplemented
8
9
  pydub
9
10
  imblearn
10
11
  plotly>=5.3.0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: BatchalignHK
3
- Version: 0.7.17.post15
3
+ Version: 0.7.17.post17
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -15,6 +15,7 @@ Requires-Dist: torch>=2.6.0
15
15
  Requires-Dist: torchaudio
16
16
  Requires-Dist: hmmlearn==0.3.0
17
17
  Requires-Dist: eyed3
18
+ Requires-Dist: opencc-python-reimplemented
18
19
  Requires-Dist: pydub
19
20
  Requires-Dist: imblearn
20
21
  Requires-Dist: plotly>=5.3.0
@@ -47,6 +48,7 @@ Dynamic: author-email
47
48
  Dynamic: classifier
48
49
  Dynamic: description
49
50
  Dynamic: description-content-type
51
+ Dynamic: license-file
50
52
  Dynamic: provides-extra
51
53
  Dynamic: requires-dist
52
54
  Dynamic: summary
@@ -154,6 +154,9 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, **kwargs):
154
154
  default=False, help="Perform speaker diarization (this flag is ignored with Rev.AI)")
155
155
  @click.option("--wor/--nowor",
156
156
  default=False, help="Should we write word level alignment line? Default to no.")
157
+ @click.option("--data",
158
+ help="the URL of the data",
159
+ type=str)
157
160
  @click.option("--lang",
158
161
  help="sample language in three-letter ISO 3166-1 alpha-3 code",
159
162
  show_default=True,
@@ -5,6 +5,7 @@ and actual BatchalignPipeline.
5
5
  """
6
6
 
7
7
  from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn, BarColumn
8
+ from urllib.parse import urlparse
8
9
 
9
10
  import warnings
10
11
 
@@ -63,6 +64,15 @@ def _dispatch(command, lang, num_speakers,
63
64
  files = []
64
65
  outputs = []
65
66
 
67
+ if kwargs.get("data"):
68
+ url = kwargs.get("data")
69
+ url = urlparse(url)
70
+ if url.scheme == "":
71
+ url = url._replace(scheme="http")
72
+ base = os.path.basename(url.path)
73
+ files.append(url)
74
+ outputs.append(os.path.join(out_dir, base))
75
+
66
76
  for basedir, _, fs in os.walk(in_dir):
67
77
  for f in fs:
68
78
  path = Path(os.path.join(basedir, f))
@@ -128,7 +138,8 @@ def _dispatch(command, lang, num_speakers,
128
138
  errors = []
129
139
  # create the spinner bars
130
140
  for f in files:
131
- tasks[f] = prog.add_task(Path(f).name, start=False, processor="")
141
+ tasks[f] = prog.add_task(Path(f).name if isinstance(f, str) else Path(f.geturl()).name,
142
+ start=False, processor="")
132
143
 
133
144
  # create pipeline and read files
134
145
  baL.debug("Attempting to create BatchalignPipeline for CLI...")
@@ -152,7 +163,7 @@ def _dispatch(command, lang, num_speakers,
152
163
  prog.start_task(tasks[file])
153
164
  with warnings.catch_warnings(record=True) as w:
154
165
  # parse the input format, as needed
155
- doc = loader(os.path.abspath(file))
166
+ doc = loader(os.path.abspath(file) if isinstance(file, str) else file.geturl())
156
167
  # if we ended up with a tuple of length two,
157
168
  # that means that the loader requested kwargs
158
169
  kw = {}
@@ -179,7 +190,7 @@ def _dispatch(command, lang, num_speakers,
179
190
  if len(errors) > 0:
180
191
  C.print()
181
192
  for file, trcbk, e in errors:
182
- C.print(f"[bold red]ERROR[/bold red] on file [italic]{os.path.relpath(str(Path(file).absolute()), in_dir)}[/italic]: {escape(str(e))}\n")
193
+ C.print(f"[bold red]ERROR[/bold red] on file [italic]{os.path.relpath(str(Path(file).absolute()), in_dir) if isinstance(file, str) else file.geturl()}[/italic]: {escape(str(e))}\n")
183
194
  if ctx.obj["verbose"] == 1:
184
195
  C.print(escape(str(trcbk)))
185
196
  elif ctx.obj["verbose"] > 1:
@@ -12,6 +12,9 @@ from batchalign.errors import *
12
12
 
13
13
  from batchalign.models import BertUtteranceModel, BertCantoneseUtteranceModel, resolve
14
14
 
15
+ from opencc import OpenCC
16
+ cc = OpenCC('s2hk')
17
+
15
18
  import time
16
19
  import pathlib
17
20
  import pycountry
@@ -74,16 +77,21 @@ class TencentEngine(BatchalignEngine):
74
77
 
75
78
  L.info(f"Uploading '{pathlib.Path(f).stem}'...")
76
79
  # we will send the file for processing
77
- with open(f, "rb") as image_file:
78
- encoded_string = base64.b64encode(image_file.read())
80
+ if not str(f).startswith("http"):
81
+ with open(f, "rb") as image_file:
82
+ encoded_string = base64.b64encode(image_file.read())
79
83
 
80
84
  req = models.CreateRecTaskRequest()
81
85
  req.EngineModelType = f"16k_{lang}"
82
86
  req.ResTextFormat = 1
83
- req.SourceType = 1
84
87
  req.SpeakerDiarization = 1
85
88
  req.ChannelNum = 1
86
- req.Data = encoded_string.decode('ascii')
89
+ if not str(f).startswith("http"):
90
+ req.Data = encoded_string.decode('ascii')
91
+ req.SourceType = 1
92
+ else:
93
+ req.Url = f
94
+ req.SourceType = 0
87
95
  resp = client.CreateRecTask(req)
88
96
 
89
97
  L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
@@ -96,7 +104,7 @@ class TencentEngine(BatchalignEngine):
96
104
  res = client.DescribeTaskStatus(req)
97
105
 
98
106
  # if failed, raise
99
- if res.Data.Status == "3":
107
+ if res.Data.Status == "3" or res.Data.Status == 3:
100
108
  raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
101
109
 
102
110
  turns = []
@@ -108,7 +116,7 @@ class TencentEngine(BatchalignEngine):
108
116
  "type": "text",
109
117
  "ts": (j.OffsetStartMs+start)/1000,
110
118
  "end_ts": (j.OffsetEndMs+start)/1000,
111
- "value": j.Word
119
+ "value": cc.convert(j.Word)
112
120
  })
113
121
  turns.append({
114
122
  "elements": turn,
@@ -0,0 +1,3 @@
1
+ 0.7.17-post.17
2
+ March 26th, 2025
3
+ better tencent ASR
@@ -34,6 +34,7 @@ setup(
34
34
  # "pyAudioAnalysis",
35
35
  "hmmlearn==0.3.0",
36
36
  "eyed3",
37
+ "opencc-python-reimplemented",
37
38
  "pydub",
38
39
  "imblearn",
39
40
  "plotly>=5.3.0",
@@ -1,3 +0,0 @@
1
- 0.7.17-post.15
2
- March 26th, 2025
3
- better coref model