batchalign 0.7.3b10__tar.gz → 0.7.3b12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. {batchalign-0.7.3b10/batchalign.egg-info → batchalign-0.7.3b12}/PKG-INFO +1 -1
  2. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/document.py +3 -3
  3. batchalign-0.7.3b12/batchalign/pipelines/morphosyntax/ja/verbforms.py +34 -0
  4. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/morphosyntax/ud.py +7 -1
  5. batchalign-0.7.3b12/batchalign/version +3 -0
  6. {batchalign-0.7.3b10 → batchalign-0.7.3b12/batchalign.egg-info}/PKG-INFO +1 -1
  7. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign.egg-info/SOURCES.txt +1 -0
  8. batchalign-0.7.3b10/batchalign/version +0 -3
  9. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/LICENSE +0 -0
  10. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/MANIFEST.in +0 -0
  11. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/README.md +0 -0
  12. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/__init__.py +0 -0
  13. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/__main__.py +0 -0
  14. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/cli/__init__.py +0 -0
  15. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/cli/cli.py +0 -0
  16. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/cli/dispatch.py +0 -0
  17. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/constants.py +0 -0
  18. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/errors.py +0 -0
  19. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/__init__.py +0 -0
  20. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/base.py +0 -0
  21. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/chat/__init__.py +0 -0
  22. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/chat/file.py +0 -0
  23. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/chat/lexer.py +0 -0
  25. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/chat/parser.py +0 -0
  26. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/chat/utils.py +0 -0
  27. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/textgrid/__init__.py +0 -0
  28. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/textgrid/file.py +0 -0
  29. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/textgrid/generator.py +0 -0
  30. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/formats/textgrid/parser.py +0 -0
  31. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/__init__.py +0 -0
  32. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/resolve.py +0 -0
  33. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/speaker/__init__.py +0 -0
  34. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/speaker/config.yaml +0 -0
  35. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/speaker/infer.py +0 -0
  36. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/speaker/utils.py +0 -0
  37. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/training/__init__.py +0 -0
  38. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/training/run.py +0 -0
  39. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/training/utils.py +0 -0
  40. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/utils.py +0 -0
  41. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/utterance/__init__.py +0 -0
  42. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/whisper/infer_asr.py +0 -0
  49. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/models/whisper/infer_fa.py +0 -0
  50. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/__init__.py +0 -0
  51. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/analysis/__init__.py +0 -0
  52. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/analysis/eval.py +0 -0
  53. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/asr/__init__.py +0 -0
  54. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/asr/rev.py +0 -0
  55. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/asr/utils.py +0 -0
  56. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/asr/whisper.py +0 -0
  57. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/asr/whisperx.py +0 -0
  58. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/base.py +0 -0
  59. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/__init__.py +0 -0
  60. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  61. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  62. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  63. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/retrace.py +0 -0
  64. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  65. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  66. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/support/test.test +0 -0
  67. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/dispatch.py +0 -0
  68. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/fa/__init__.py +0 -0
  69. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  70. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  71. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  72. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign.egg-info/dependency_links.txt +0 -0
  104. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign.egg-info/entry_points.txt +0 -0
  105. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign.egg-info/requires.txt +0 -0
  106. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/batchalign.egg-info/top_level.txt +0 -0
  107. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/setup.cfg +0 -0
  108. {batchalign-0.7.3b10 → batchalign-0.7.3b12}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.3b10
3
+ Version: 0.7.3b12
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -203,9 +203,9 @@ class Utterance(BaseModel):
203
203
  t = re.sub(r"^\+\.\.\.", "", t.strip()).strip()
204
204
  # this is here thrice to prevent stuff from not
205
205
  # matching once because .sub seems to only match once
206
- t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
207
- t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
208
- t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
206
+ # t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
207
+ # t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
208
+ # t = re.sub(r"^[^\w\d\s<]+", "", t.strip()).strip()
209
209
  t = re.sub(r",", " , ", t.strip()).strip()
210
210
  t = re.sub(r" +", " ", t.strip()).strip()
211
211
  return t
@@ -0,0 +1,34 @@
1
+ """
2
+ verbforms.py
3
+ Fix Japanese verb forms.
4
+ """
5
+
6
+ def verbform(upos, target, text):
7
+ if text == "な" and upos == "part":
8
+ return "aux", "うな"
9
+ if text == "呼ん":
10
+ return upos, "呼ん"
11
+ if text == "たり":
12
+ return "aux", "たり"
13
+ if text == "たら":
14
+ return "sconj", "たら"
15
+ if text == "たっ":
16
+ return "sconj", "たって"
17
+ if text == "て" and upos == "sconj":
18
+ return "aux", "て"
19
+ if text == "なさい" and target == "為さる":
20
+ return "aux", "為さい"
21
+ if text == "な" and upos == "part":
22
+ return "aux", "な"
23
+ if text == "脱" and upos == "noun":
24
+ return "verb", "脱"
25
+ if text == "よう" and upos == "aux":
26
+ return "aux", "よう"
27
+ if text == "ろ" and upos == "aux" and target == "為る":
28
+ return "aux", "ろ"
29
+ if upos == "verb" and "る" in target:
30
+ return "verb", target.replace("る","").strip()
31
+
32
+ return upos,target
33
+
34
+
@@ -134,7 +134,13 @@ def handler(word, lang=None):
134
134
  if "“" in target:
135
135
  target = word.text
136
136
 
137
- return f"{'' if not unknown else '0'}{word.upos.lower()}|{target}"
137
+ pos = word.upos.lower()
138
+
139
+ if lang == "ja":
140
+ from batchalign.pipelines.morphosyntax.ja.verbforms import verbform
141
+ pos,target = verbform(pos,target,word.text)
142
+
143
+ return f"{'' if not unknown else '0'}{pos}|{target}"
138
144
 
139
145
  # POS specific handler
140
146
  def handler__PRON(word, lang=None):
@@ -0,0 +1,3 @@
1
+ 0.7.3-beta.12
2
+ June 19th, 2024
3
+ UD Fixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.3b10
3
+ Version: 0.7.3b12
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -72,6 +72,7 @@ batchalign/pipelines/fa/whisper_fa.py
72
72
  batchalign/pipelines/morphosyntax/__init__.py
73
73
  batchalign/pipelines/morphosyntax/ud.py
74
74
  batchalign/pipelines/morphosyntax/fr/case.py
75
+ batchalign/pipelines/morphosyntax/ja/verbforms.py
75
76
  batchalign/pipelines/speaker/__init__.py
76
77
  batchalign/pipelines/speaker/nemo_speaker.py
77
78
  batchalign/pipelines/utr/__init__.py
@@ -1,3 +0,0 @@
1
- 0.7.3-beta.10
2
- June 17, 2024
3
- more fixes for charmap encoding
File without changes
File without changes
File without changes
File without changes
File without changes