BatchalignHK 0.7.18.post7__tar.gz → 0.7.18.post8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/BatchalignHK.egg-info/PKG-INFO +1 -1
  2. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/PKG-INFO +1 -1
  3. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/cli/cli.py +5 -1
  4. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/document.py +1 -0
  5. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/chat/parser.py +3 -1
  6. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/morphosyntax/ud.py +4 -2
  7. batchalignhk-0.7.18.post8/batchalign/version +3 -0
  8. batchalignhk-0.7.18.post7/batchalign/version +0 -3
  9. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/BatchalignHK.egg-info/SOURCES.txt +0 -0
  10. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/BatchalignHK.egg-info/dependency_links.txt +0 -0
  11. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/BatchalignHK.egg-info/entry_points.txt +0 -0
  12. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/BatchalignHK.egg-info/requires.txt +0 -0
  13. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/BatchalignHK.egg-info/top_level.txt +0 -0
  14. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/LICENSE +0 -0
  15. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/MANIFEST.in +0 -0
  16. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/README.md +0 -0
  17. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/__init__.py +0 -0
  18. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/__main__.py +0 -0
  19. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/cli/__init__.py +0 -0
  20. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/cli/dispatch.py +0 -0
  21. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/constants.py +0 -0
  22. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/errors.py +0 -0
  23. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/__init__.py +0 -0
  24. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/base.py +0 -0
  25. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/chat/__init__.py +0 -0
  26. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/chat/file.py +0 -0
  27. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/chat/generator.py +0 -0
  28. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/chat/lexer.py +0 -0
  29. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/chat/utils.py +0 -0
  30. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/textgrid/__init__.py +0 -0
  31. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/textgrid/file.py +0 -0
  32. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/textgrid/generator.py +0 -0
  33. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/formats/textgrid/parser.py +0 -0
  34. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/__init__.py +0 -0
  35. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/resolve.py +0 -0
  36. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/speaker/__init__.py +0 -0
  37. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/speaker/config.yaml +0 -0
  38. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/speaker/infer.py +0 -0
  39. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/speaker/utils.py +0 -0
  40. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/training/__init__.py +0 -0
  41. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/training/run.py +0 -0
  42. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/training/utils.py +0 -0
  43. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/utils.py +0 -0
  44. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/utterance/__init__.py +0 -0
  45. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/utterance/cantonese_infer.py +0 -0
  46. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/utterance/dataset.py +0 -0
  47. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/utterance/execute.py +0 -0
  48. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/utterance/infer.py +0 -0
  49. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/utterance/prep.py +0 -0
  50. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/utterance/train.py +0 -0
  51. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/wave2vec/__init__.py +0 -0
  52. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/wave2vec/infer_fa.py +0 -0
  53. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/whisper/__init__.py +0 -0
  54. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/whisper/infer_asr.py +0 -0
  55. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/models/whisper/infer_fa.py +0 -0
  56. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/__init__.py +0 -0
  57. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/analysis/__init__.py +0 -0
  58. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/analysis/eval.py +0 -0
  59. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/asr/__init__.py +0 -0
  60. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/asr/num2chinese.py +0 -0
  61. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/asr/rev.py +0 -0
  62. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/asr/tencent.py +0 -0
  63. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/asr/utils.py +0 -0
  64. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/asr/whisper.py +0 -0
  65. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/asr/whisperx.py +0 -0
  66. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/base.py +0 -0
  67. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/cleanup/__init__.py +0 -0
  68. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  69. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  70. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  71. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/cleanup/retrace.py +0 -0
  72. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  73. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  74. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/cleanup/support/test.test +0 -0
  75. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/dispatch.py +0 -0
  76. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/fa/__init__.py +0 -0
  77. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  78. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  79. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  80. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  81. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  82. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  83. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  84. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  85. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  86. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/pipeline.py +0 -0
  87. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/speaker/__init__.py +0 -0
  88. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  89. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/translate/__init__.py +0 -0
  90. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/translate/gtrans.py +0 -0
  91. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/translate/seamless.py +0 -0
  92. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/translate/utils.py +0 -0
  93. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/utr/__init__.py +0 -0
  94. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/utr/rev_utr.py +0 -0
  95. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/utr/utils.py +0 -0
  96. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  97. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/utterance/__init__.py +0 -0
  98. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  99. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/__init__.py +0 -0
  100. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/conftest.py +0 -0
  101. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  102. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  103. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  104. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  105. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  106. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  107. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  108. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  109. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  110. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  111. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  112. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  113. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/fixures.py +0 -0
  114. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  115. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  116. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/tests/test_document.py +0 -0
  117. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/utils/__init__.py +0 -0
  118. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/utils/config.py +0 -0
  119. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/utils/dp.py +0 -0
  120. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/batchalign/utils/utils.py +0 -0
  121. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/setup.cfg +0 -0
  122. {batchalignhk-0.7.18.post7 → batchalignhk-0.7.18.post8}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.18.post7
3
+ Version: 0.7.18.post8
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: BatchalignHK
3
- Version: 0.7.18.post7
3
+ Version: 0.7.18.post8
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -230,6 +230,8 @@ def translate(ctx, in_dir, out_dir, **kwargs):
230
230
  @common_options
231
231
  @click.option("--retokenize/--keeptokens",
232
232
  default=False, help="Retokenize the main line to fit the UD tokenizations.")
233
+ @click.option("--skipmultilang/--multilang",
234
+ default=False, help="skip code switching")
233
235
  @click.option("--lexicon",
234
236
  type=click.Path(exists=True,
235
237
  file_okay=True, dir_okay=False),
@@ -253,7 +255,9 @@ def morphotag(ctx, in_dir, out_dir, **kwargs):
253
255
  doc.ba_special_["special_mor_notation"] = True
254
256
  return (
255
257
  doc,
256
- {"retokenize": kwargs["retokenize"], "mwt": mwt}
258
+ {"retokenize": kwargs["retokenize"],
259
+ "skipmultilang": kwargs["skipmultilang"],
260
+ "mwt": mwt}
257
261
  )
258
262
 
259
263
  def writer(doc, output):
@@ -153,6 +153,7 @@ class Utterance(BaseModel):
153
153
  tier: Tier = Field(default=Tier())
154
154
  content: Sentence
155
155
  text: Optional[str] = Field(default=None)
156
+ override_lang: Optional[str] = Field(default=None)
156
157
  translation: Optional[str] = Field(default=None)
157
158
  time: Optional[Tuple[int,int]] = Field(default=None)
158
159
  custom_dependencies: List[CustomLine] = Field(default=[])
@@ -300,6 +300,7 @@ def chat_parse_doc(lines, special_mor=False):
300
300
  content=line.strip()))
301
301
 
302
302
  # parse the actual utterance
303
+ multilingual = re.findall(r"^\[- (\w+)\]", text)
303
304
  parsed, delim = chat_parse_utterance(text, mor, gra, wor, additional)
304
305
 
305
306
  # get the timing of the utterance
@@ -313,7 +314,8 @@ def chat_parse_doc(lines, special_mor=False):
313
314
  "text": text,
314
315
  "delim": delim,
315
316
  "custom_dependencies": additional,
316
- "translation": translation
317
+ "translation": translation,
318
+ "override_lang": None if len(multilingual) == 0 else multilingual[0]
317
319
  })
318
320
 
319
321
  timing = re.findall(rf"\x15(\d+)_(\d+)\x15", text)
@@ -710,7 +710,7 @@ def adlist_postprocessor(i, lang, adlist):
710
710
  return cpy
711
711
 
712
712
  ######
713
- def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, **kwargs):
713
+ def morphoanalyze(doc: Document, retokenize:bool, skipmultilang:bool, status_hook:callable = None, **kwargs):
714
714
  L.debug("Starting Stanza...")
715
715
  inputs = []
716
716
 
@@ -788,6 +788,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
788
788
  L.info(f"Stanza processing utterance {indx+1}/{len(doc.content)}")
789
789
  if not isinstance(i, Utterance):
790
790
  continue
791
+ if i.override_lang and skipmultilang:
792
+ continue
791
793
 
792
794
  # generate simplified version of the line
793
795
  line = str(i)
@@ -1014,4 +1016,4 @@ class StanzaEngine(BatchalignEngine):
1014
1016
  self.status_hook = status_hook
1015
1017
 
1016
1018
  def process(self, doc, **kwargs):
1017
- return morphoanalyze(doc, retokenize=kwargs.get("retokenize", False), status_hook=self.status_hook, mwt=kwargs.get("mwt", {}))
1019
+ return morphoanalyze(doc, retokenize=kwargs.get("retokenize", False), skipmultilang=kwargs.get("skipmultilang", False), status_hook=self.status_hook, mwt=kwargs.get("mwt", {}))
@@ -0,0 +1,3 @@
1
+ 0.7.18-post.8
2
+ April 27th, 2025
3
+ skip multilang
@@ -1,3 +0,0 @@
1
- 0.7.18-post.7
2
- April 26th, 2025
3
- l2 for @s