BatchalignHK 0.7.17.post15__tar.gz → 0.7.17.post16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/BatchalignHK.egg-info/PKG-INFO +1 -1
  2. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/PKG-INFO +1 -1
  3. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/cli/cli.py +3 -0
  4. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/cli/dispatch.py +14 -3
  5. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/asr/tencent.py +10 -5
  6. batchalignhk-0.7.17.post16/batchalign/version +3 -0
  7. batchalignhk-0.7.17.post15/batchalign/version +0 -3
  8. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  9. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  10. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/BatchalignHK.egg-info/entry_points.txt +0 -0
  11. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/BatchalignHK.egg-info/requires.txt +0 -0
  12. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/BatchalignHK.egg-info/top_level.txt +0 -0
  13. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/LICENSE +0 -0
  14. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/MANIFEST.in +0 -0
  15. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/README.md +0 -0
  16. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/__init__.py +0 -0
  17. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/__main__.py +0 -0
  18. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/cli/__init__.py +0 -0
  19. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/constants.py +0 -0
  20. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/document.py +0 -0
  21. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/errors.py +0 -0
  22. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/__init__.py +0 -0
  23. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/base.py +0 -0
  24. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/chat/__init__.py +0 -0
  25. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/chat/file.py +0 -0
  26. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/chat/generator.py +0 -0
  27. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/chat/lexer.py +0 -0
  28. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/chat/parser.py +0 -0
  29. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/chat/utils.py +0 -0
  30. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/textgrid/__init__.py +0 -0
  31. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/textgrid/file.py +0 -0
  32. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/textgrid/generator.py +0 -0
  33. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/formats/textgrid/parser.py +0 -0
  34. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/__init__.py +0 -0
  35. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/resolve.py +0 -0
  36. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/speaker/__init__.py +0 -0
  37. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/speaker/config.yaml +0 -0
  38. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/speaker/infer.py +0 -0
  39. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/speaker/utils.py +0 -0
  40. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/training/__init__.py +0 -0
  41. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/training/run.py +0 -0
  42. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/training/utils.py +0 -0
  43. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/utils.py +0 -0
  44. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/utterance/__init__.py +0 -0
  45. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/utterance/cantonese_infer.py +0 -0
  46. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/utterance/dataset.py +0 -0
  47. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/utterance/execute.py +0 -0
  48. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/utterance/infer.py +0 -0
  49. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/utterance/prep.py +0 -0
  50. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/utterance/train.py +0 -0
  51. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/wave2vec/__init__.py +0 -0
  52. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/wave2vec/infer_fa.py +0 -0
  53. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/whisper/__init__.py +0 -0
  54. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/whisper/infer_asr.py +0 -0
  55. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/models/whisper/infer_fa.py +0 -0
  56. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/__init__.py +0 -0
  57. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/analysis/__init__.py +0 -0
  58. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/analysis/eval.py +0 -0
  59. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/asr/__init__.py +0 -0
  60. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/asr/num2chinese.py +0 -0
  61. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/asr/rev.py +0 -0
  62. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/asr/utils.py +0 -0
  63. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/asr/whisper.py +0 -0
  64. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/asr/whisperx.py +0 -0
  65. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/base.py +0 -0
  66. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/cleanup/__init__.py +0 -0
  67. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  68. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  69. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  70. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/cleanup/retrace.py +0 -0
  71. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  72. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  73. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/cleanup/support/test.test +0 -0
  74. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/dispatch.py +0 -0
  75. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/fa/__init__.py +0 -0
  76. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  77. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  78. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  79. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  80. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  81. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  82. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  83. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  84. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  85. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  86. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/pipeline.py +0 -0
  87. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/speaker/__init__.py +0 -0
  88. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  89. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/translate/__init__.py +0 -0
  90. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/translate/seamless.py +0 -0
  91. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/utr/__init__.py +0 -0
  92. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/utr/rev_utr.py +0 -0
  93. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/utr/utils.py +0 -0
  94. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  95. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/utterance/__init__.py +0 -0
  96. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  97. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/__init__.py +0 -0
  98. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/conftest.py +0 -0
  99. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  100. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  101. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  102. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  103. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  104. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  105. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  106. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  107. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  108. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  109. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  110. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  111. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/fixures.py +0 -0
  112. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  113. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  114. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/tests/test_document.py +0 -0
  115. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/utils/__init__.py +0 -0
  116. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/utils/config.py +0 -0
  117. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/utils/dp.py +0 -0
  118. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/batchalign/utils/utils.py +0 -0
  119. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/setup.cfg +0 -0
  120. {batchalignhk-0.7.17.post15 → batchalignhk-0.7.17.post16}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.17.post15
3
+ Version: 0.7.17.post16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.17.post15
3
+ Version: 0.7.17.post16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -154,6 +154,9 @@ def align(ctx, in_dir, out_dir, whisper, wav2vec, **kwargs):
154
154
  default=False, help="Perform speaker diarization (this flag is ignored with Rev.AI)")
155
155
  @click.option("--wor/--nowor",
156
156
  default=False, help="Should we write word level alignment line? Default to no.")
157
+ @click.option("--data",
158
+ help="the URL of the data",
159
+ type=str)
157
160
  @click.option("--lang",
158
161
  help="sample language in three-letter ISO 3166-1 alpha-3 code",
159
162
  show_default=True,
@@ -5,6 +5,7 @@ and actual BatchalignPipeline.
5
5
  """
6
6
 
7
7
  from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn, BarColumn
8
+ from urllib.parse import urlparse
8
9
 
9
10
  import warnings
10
11
 
@@ -63,6 +64,15 @@ def _dispatch(command, lang, num_speakers,
63
64
  files = []
64
65
  outputs = []
65
66
 
67
+ if kwargs.get("data"):
68
+ url = kwargs.get("data")
69
+ url = urlparse(url)
70
+ if url.scheme == "":
71
+ url = url._replace(scheme="http")
72
+ base = os.path.basename(url.path)
73
+ files.append(url)
74
+ outputs.append(os.path.join(out_dir, base))
75
+
66
76
  for basedir, _, fs in os.walk(in_dir):
67
77
  for f in fs:
68
78
  path = Path(os.path.join(basedir, f))
@@ -128,7 +138,8 @@ def _dispatch(command, lang, num_speakers,
128
138
  errors = []
129
139
  # create the spinner bars
130
140
  for f in files:
131
- tasks[f] = prog.add_task(Path(f).name, start=False, processor="")
141
+ tasks[f] = prog.add_task(Path(f).name if isinstance(f, str) else Path(f.geturl()).name,
142
+ start=False, processor="")
132
143
 
133
144
  # create pipeline and read files
134
145
  baL.debug("Attempting to create BatchalignPipeline for CLI...")
@@ -152,7 +163,7 @@ def _dispatch(command, lang, num_speakers,
152
163
  prog.start_task(tasks[file])
153
164
  with warnings.catch_warnings(record=True) as w:
154
165
  # parse the input format, as needed
155
- doc = loader(os.path.abspath(file))
166
+ doc = loader(os.path.abspath(file) if isinstance(file, str) else file.geturl())
156
167
  # if we ended up with a tuple of length two,
157
168
  # that means that the loader requested kwargs
158
169
  kw = {}
@@ -179,7 +190,7 @@ def _dispatch(command, lang, num_speakers,
179
190
  if len(errors) > 0:
180
191
  C.print()
181
192
  for file, trcbk, e in errors:
182
- C.print(f"[bold red]ERROR[/bold red] on file [italic]{os.path.relpath(str(Path(file).absolute()), in_dir)}[/italic]: {escape(str(e))}\n")
193
+ C.print(f"[bold red]ERROR[/bold red] on file [italic]{os.path.relpath(str(Path(file).absolute()), in_dir) if isinstance(file, str) else file.geturl()}[/italic]: {escape(str(e))}\n")
183
194
  if ctx.obj["verbose"] == 1:
184
195
  C.print(escape(str(trcbk)))
185
196
  elif ctx.obj["verbose"] > 1:
@@ -74,16 +74,21 @@ class TencentEngine(BatchalignEngine):
74
74
 
75
75
  L.info(f"Uploading '{pathlib.Path(f).stem}'...")
76
76
  # we will send the file for processing
77
- with open(f, "rb") as image_file:
78
- encoded_string = base64.b64encode(image_file.read())
77
+ if not str(f).startswith("http"):
78
+ with open(f, "rb") as image_file:
79
+ encoded_string = base64.b64encode(image_file.read())
79
80
 
80
81
  req = models.CreateRecTaskRequest()
81
82
  req.EngineModelType = f"16k_{lang}"
82
83
  req.ResTextFormat = 1
83
- req.SourceType = 1
84
84
  req.SpeakerDiarization = 1
85
85
  req.ChannelNum = 1
86
- req.Data = encoded_string.decode('ascii')
86
+ if not str(f).startswith("http"):
87
+ req.Data = encoded_string.decode('ascii')
88
+ req.SourceType = 1
89
+ else:
90
+ req.Url = f
91
+ req.SourceType = 0
87
92
  resp = client.CreateRecTask(req)
88
93
 
89
94
  L.info(f"Tencent is transcribing '{pathlib.Path(f).stem}'...")
@@ -96,7 +101,7 @@ class TencentEngine(BatchalignEngine):
96
101
  res = client.DescribeTaskStatus(req)
97
102
 
98
103
  # if failed, raise
99
- if res.Data.Status == "3":
104
+ if res.Data.Status == "3" or res.Data.Status == 3:
100
105
  raise RuntimeError(f"Tencent reports job failed! error='{res.Data.ErrorMsg}'")
101
106
 
102
107
  turns = []
@@ -0,0 +1,3 @@
1
+ 0.7.17-post.16
2
+ March 26th, 2025
3
+ better tencent ASR
@@ -1,3 +0,0 @@
1
- 0.7.17-post.15
2
- March 26th, 2025
3
- better coref model