batchalign 0.7.6a27__tar.gz → 0.7.6a29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.6a27/batchalign.egg-info → batchalign-0.7.6a29}/PKG-INFO +1 -1
  2. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/morphosyntax/ja/verbforms.py +16 -0
  3. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/morphosyntax/ud.py +10 -4
  4. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/version +1 -1
  5. {batchalign-0.7.6a27 → batchalign-0.7.6a29/batchalign.egg-info}/PKG-INFO +1 -1
  6. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/LICENSE +0 -0
  7. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/MANIFEST.in +0 -0
  8. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/README.md +0 -0
  9. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/__init__.py +0 -0
  10. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/__main__.py +0 -0
  11. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/cli/__init__.py +0 -0
  12. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/cli/cli.py +0 -0
  13. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/cli/dispatch.py +0 -0
  14. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/constants.py +0 -0
  15. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/document.py +0 -0
  16. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/errors.py +0 -0
  17. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/__init__.py +0 -0
  18. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/base.py +0 -0
  19. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/chat/__init__.py +0 -0
  20. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/chat/file.py +0 -0
  21. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/chat/generator.py +0 -0
  22. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/chat/lexer.py +0 -0
  23. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/chat/parser.py +0 -0
  24. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/chat/utils.py +0 -0
  25. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/textgrid/__init__.py +0 -0
  26. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/textgrid/file.py +0 -0
  27. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/textgrid/generator.py +0 -0
  28. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/formats/textgrid/parser.py +0 -0
  29. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/__init__.py +0 -0
  30. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/resolve.py +0 -0
  31. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/speaker/__init__.py +0 -0
  32. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/speaker/config.yaml +0 -0
  33. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/speaker/infer.py +0 -0
  34. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/speaker/utils.py +0 -0
  35. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/training/__init__.py +0 -0
  36. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/training/run.py +0 -0
  37. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/training/utils.py +0 -0
  38. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/utils.py +0 -0
  39. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/utterance/__init__.py +0 -0
  40. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/utterance/dataset.py +0 -0
  41. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/utterance/execute.py +0 -0
  42. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/utterance/infer.py +0 -0
  43. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/utterance/prep.py +0 -0
  44. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/utterance/train.py +0 -0
  45. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/whisper/__init__.py +0 -0
  46. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/whisper/infer_asr.py +0 -0
  47. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/models/whisper/infer_fa.py +0 -0
  48. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/__init__.py +0 -0
  49. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/analysis/__init__.py +0 -0
  50. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/analysis/eval.py +0 -0
  51. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/asr/__init__.py +0 -0
  52. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/asr/rev.py +0 -0
  53. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/asr/utils.py +0 -0
  54. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/asr/whisper.py +0 -0
  55. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/asr/whisperx.py +0 -0
  56. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/base.py +0 -0
  57. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/cleanup/__init__.py +0 -0
  58. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  59. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  60. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  61. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/cleanup/retrace.py +0 -0
  62. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  63. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  64. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/cleanup/support/test.test +0 -0
  65. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/dispatch.py +0 -0
  66. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/fa/__init__.py +0 -0
  67. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  68. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  69. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  70. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  71. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  72. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/setup.cfg +0 -0
  109. {batchalign-0.7.6a27 → batchalign-0.7.6a29}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a27
3
+ Version: 0.7.6a29
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -4,6 +4,22 @@ Fix Japanese verb forms.
4
4
  """
5
5
 
6
6
  def verbform(upos, target, text):
7
+ if "ちゃ" in text:
8
+ return "sconj", "ば"
9
+ if "なきゃ" in text:
10
+ return "sconj", "なきゃ"
11
+ if "じゃ" in text:
12
+ return "sconj", "ちゃ"
13
+ if "れる" in text:
14
+ return "aux", "られる"
15
+ if "じゃう" in text:
16
+ return "aux", "ちゃう"
17
+ if "よう" in text:
18
+ return "aux", "おう"
19
+ if "だら" in text:
20
+ return "aux", "たら"
21
+ if "だ" in target:
22
+ return "aux", "た"
7
23
  if "為る" in target and 'さ' == text:
8
24
  return "part", "為る"
9
25
  if "無い" in target:
@@ -258,6 +258,8 @@ def handler__VERB(word, lang=None):
258
258
  res = handler(word, lang)
259
259
  if "sconj" in res:
260
260
  return res
261
+ elif word.text == "ろ":
262
+ return res
261
263
  elif "verb" not in res and "aux" not in res:
262
264
  if word.text == "たり":
263
265
  return res+stringify_feats("Inf", "S")
@@ -893,8 +895,11 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
893
895
 
894
896
  for i in aligned:
895
897
  if isinstance(i, Match):
896
- if i.reference_payload not in chunks_backplate[i.payload]:
897
- chunks_backplate[i.payload].append(i.reference_payload)
898
+ if not creaky:
899
+ if i.reference_payload not in chunks_backplate[i.payload]:
900
+ chunks_backplate[i.payload].append(i.reference_payload)
901
+ else:
902
+ collected += i.key
898
903
  elif isinstance(i, Extra) and i.extra_type == ExtraType.PAYLOAD:
899
904
  if i.key == "⁎":
900
905
  creaky = not creaky
@@ -913,7 +918,6 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
913
918
  pos = "x",
914
919
  feats = ""
915
920
  )]
916
-
917
921
  poses = [i.morphology[0].pos.upper() for i in ut
918
922
  if i.morphology
919
923
  and len(i.morphology) > 0]
@@ -938,9 +942,11 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
938
942
  retokenized_ut = retokenized_ut.replace(" ↑", "↑")
939
943
  retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
940
944
  retokenized_ut = retokenized_ut.replace(" @", "@")
941
- retokenized_ut = re.sub(r"\*[* ]*", "*", retokenized_ut)
945
+ # retokenized_ut = re.sub(r"\*[* ]*", "*", retokenized_ut)
942
946
  retokenized_ut = re.sub(r"⁎[⁎ ]*(.*?)[⁎ ]*⁎", r"⁎\1⁎ ", retokenized_ut)
943
947
  retokenized_ut = re.sub(r"\[\*(.)\]", r"[* \1]", retokenized_ut)
948
+ retokenized_ut = re.sub(r" +", r" ", retokenized_ut)
949
+
944
950
  # pray to everyone that it works---this will simply crash and ignore
945
951
  # the utterance if it didn't work, so we are doing this as a sanity
946
952
  # check rather than needing the parsed result
@@ -1,3 +1,3 @@
1
- 0.7.6-alpha.27
1
+ 0.7.6-alpha.29
2
2
  November 10, 2024
3
3
  Japanese changes + packaging
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a27
3
+ Version: 0.7.6a29
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
File without changes
File without changes
File without changes
File without changes
File without changes