batchalign 0.7.3b15__tar.gz → 0.7.3b16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {batchalign-0.7.3b15/batchalign.egg-info → batchalign-0.7.3b16}/PKG-INFO +1 -1
  2. batchalign-0.7.3b16/batchalign/pipelines/morphosyntax/ja/verbforms.py +118 -0
  3. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/ud.py +6 -1
  4. batchalign-0.7.3b16/batchalign/version +3 -0
  5. {batchalign-0.7.3b15 → batchalign-0.7.3b16/batchalign.egg-info}/PKG-INFO +1 -1
  6. batchalign-0.7.3b15/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -56
  7. batchalign-0.7.3b15/batchalign/version +0 -3
  8. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/LICENSE +0 -0
  9. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/MANIFEST.in +0 -0
  10. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/README.md +0 -0
  11. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/__init__.py +0 -0
  12. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/__main__.py +0 -0
  13. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/cli/__init__.py +0 -0
  14. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/cli/cli.py +0 -0
  15. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/cli/dispatch.py +0 -0
  16. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/constants.py +0 -0
  17. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/document.py +0 -0
  18. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/errors.py +0 -0
  19. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/__init__.py +0 -0
  20. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/base.py +0 -0
  21. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/chat/__init__.py +0 -0
  22. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/chat/file.py +0 -0
  23. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/chat/lexer.py +0 -0
  25. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/chat/parser.py +0 -0
  26. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/chat/utils.py +0 -0
  27. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/textgrid/__init__.py +0 -0
  28. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/textgrid/file.py +0 -0
  29. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/textgrid/generator.py +0 -0
  30. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/formats/textgrid/parser.py +0 -0
  31. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/__init__.py +0 -0
  32. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/resolve.py +0 -0
  33. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/speaker/__init__.py +0 -0
  34. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/speaker/config.yaml +0 -0
  35. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/speaker/infer.py +0 -0
  36. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/speaker/utils.py +0 -0
  37. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/training/__init__.py +0 -0
  38. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/training/run.py +0 -0
  39. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/training/utils.py +0 -0
  40. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/utils.py +0 -0
  41. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/utterance/__init__.py +0 -0
  42. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/whisper/infer_asr.py +0 -0
  49. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/models/whisper/infer_fa.py +0 -0
  50. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/__init__.py +0 -0
  51. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/analysis/__init__.py +0 -0
  52. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/analysis/eval.py +0 -0
  53. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/asr/__init__.py +0 -0
  54. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/asr/rev.py +0 -0
  55. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/asr/utils.py +0 -0
  56. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/asr/whisper.py +0 -0
  57. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/asr/whisperx.py +0 -0
  58. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/base.py +0 -0
  59. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/__init__.py +0 -0
  60. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  61. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  62. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  63. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/retrace.py +0 -0
  64. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  65. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  66. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/test.test +0 -0
  67. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/dispatch.py +0 -0
  68. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/fa/__init__.py +0 -0
  69. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  70. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  71. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  72. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/pipeline.py +0 -0
  73. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/speaker/__init__.py +0 -0
  74. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  75. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/utr/__init__.py +0 -0
  76. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/utr/rev_utr.py +0 -0
  77. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/utr/utils.py +0 -0
  78. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  79. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/utterance/__init__.py +0 -0
  80. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  81. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/__init__.py +0 -0
  82. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/conftest.py +0 -0
  83. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  84. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  85. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  86. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  87. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  88. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  89. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  90. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  91. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  92. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  93. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  94. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  95. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/fixures.py +0 -0
  96. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  97. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  98. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/tests/test_document.py +0 -0
  99. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/utils/__init__.py +0 -0
  100. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/utils/config.py +0 -0
  101. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/utils/dp.py +0 -0
  102. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign/utils/utils.py +0 -0
  103. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign.egg-info/SOURCES.txt +0 -0
  104. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign.egg-info/dependency_links.txt +0 -0
  105. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign.egg-info/entry_points.txt +0 -0
  106. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign.egg-info/requires.txt +0 -0
  107. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/batchalign.egg-info/top_level.txt +0 -0
  108. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/setup.cfg +0 -0
  109. {batchalign-0.7.3b15 → batchalign-0.7.3b16}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.3b15
3
+ Version: 0.7.3b16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -0,0 +1,118 @@
1
+ """
2
+ verbforms.py
3
+ Fix Japanese verb forms.
4
+ """
5
+
6
+ def verbform(upos, target, text):
7
+ if "撮る" in text:
8
+ return "verb", "撮る"
9
+ if "貼る" in text:
10
+ return "verb", "貼る"
11
+ if "混ぜ" in text:
12
+ return "verb", "混ぜる"
13
+ if "釣る" in text:
14
+ return "verb", "釣る"
15
+ if "速い" in text and upos == "adj":
16
+ return "adj", "速い"
17
+ if "治ま" in text:
18
+ return "verb", "治まる"
19
+ if "刺す" in text:
20
+ return "verb", "刺す"
21
+ if "降り" in text:
22
+ return "verb", "降りる"
23
+ if "降" in text:
24
+ return "verb", "降る"
25
+ if "載せ" in text:
26
+ return "verb", "載せる"
27
+ if "帰" in text:
28
+ return "verb", "帰る"
29
+ if "はい" in text:
30
+ return "intj", "はい"
31
+ if "うん" in text:
32
+ return "intj", "うん"
33
+ if "おっ" in text:
34
+ return "intj", "おっ"
35
+ if "ほら" in text:
36
+ return "intj", "ほら"
37
+ if "ヤッホー" in text:
38
+ return "intj", "ヤッホー"
39
+ if "ただいま" in text:
40
+ return "intj", "ただいま"
41
+ if "あたし" in text:
42
+ return "pron", "あたし"
43
+ if "舐め" in text:
44
+ return "verb", "舐める"
45
+ if "バツ" in text:
46
+ return "noun", "バツ"
47
+ if "ブラシ" in text:
48
+ return "noun", "ブラシ"
49
+ if "引き出し" in text:
50
+ return "noun", "引き出し"
51
+ if "下さい" in text:
52
+ return "noun", "下さい"
53
+ if target in ["シャャミー", "物コャミ"]:
54
+ return "noun", "クシャミ"
55
+ if "マヨネーズ" in text:
56
+ return "noun", "マヨネーズ"
57
+ if "マヨ" in text:
58
+ return "noun", "マヨ"
59
+ if "チップス" in text:
60
+ return "noun", "チップス"
61
+ if "ゴロンっ" in text:
62
+ return "noun", "ゴロンっ"
63
+ if "モチーンっ" in text:
64
+ return "noun", "モチーンっ"
65
+ if "人っ" == text:
66
+ return "noun", "人"
67
+ if text == "掻く":
68
+ return "part", "かい"
69
+ if "遣" in text and upos == "noun":
70
+ return "verb", "遣る"
71
+ if "死" in text:
72
+ return "verb", "死ぬ"
73
+ if "立" in text:
74
+ return "verb", "立つ"
75
+ if "引" in text:
76
+ return "verb", "引く"
77
+ if "出" in text:
78
+ return "verb", "出す"
79
+ if "引" in text:
80
+ return "verb", "引く"
81
+ if "飲" in text:
82
+ return "verb", "飲む"
83
+ if "呼" in text:
84
+ return "verb", "呼ぶ"
85
+ if "脱" in text:
86
+ return "verb", "脱ぐ"
87
+ if text == "な" and upos == "part":
88
+ return "aux", "な"
89
+ if text == "呼ん":
90
+ return "verb", "呼ぶ"
91
+ if text == "な" and upos == "aux":
92
+ return "aux", "な"
93
+ if text == "だり":
94
+ return "aux", "たり"
95
+ if text == "たり":
96
+ return "aux", "たり"
97
+ if text == "たら":
98
+ return "sconj", "たら"
99
+ if text == "たっ":
100
+ return "sconj", "たって"
101
+ # if text == "て" and upos == "sconj":
102
+ # return "aux", "て"
103
+ if text == "なさい" and target == "為さる":
104
+ return "aux", "為さい"
105
+ if text == "な" and upos == "part":
106
+ return "aux", "な"
107
+ if text == "脱" and upos == "noun":
108
+ return "verb", "脱"
109
+ if text == "よう" and upos == "aux":
110
+ return "aux", "よう"
111
+ if text == "ろ" and upos == "aux" and target == "為る":
112
+ return "aux", "ろ"
113
+ # if upos == "verb" and "る" in target:
114
+ # return "verb", target.replace("る","").strip()
115
+
116
+ return upos,target
117
+
118
+
@@ -237,6 +237,8 @@ def handler__VERB(word, lang=None):
237
237
  res = handler(word, lang)
238
238
  if "sconj" in res:
239
239
  return res
240
+ elif "verb" not in res:
241
+ return res
240
242
  else:
241
243
  return res+flag+stringify_feats(aspect, mood,
242
244
  tense, polarity, polite,
@@ -266,7 +268,10 @@ def handler__PUNCT(word, lang=None):
266
268
  return "noun|da"
267
269
  elif re.match(r"^['\w-]+$", word.text): # we match text here because .text is the ultumate content
268
270
  # instead of the lemma, which maybe entirely weird
269
- return f"x|{word.text}"
271
+ if word.text == "もん":
272
+ return f"part|{word.text}"
273
+ else:
274
+ return f"x|{word.text}"
270
275
 
271
276
  # Register handlers
272
277
  HANDLERS = {
@@ -0,0 +1,3 @@
1
+ 0.7.3-beta.16
2
+ August 3rd, 2024
3
+ more Japanese hand-parse rules
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.3b15
3
+ Version: 0.7.3b16
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -1,56 +0,0 @@
1
- """
2
- verbforms.py
3
- Fix Japanese verb forms.
4
- """
5
-
6
- def verbform(upos, target, text):
7
- if "遣" in text and upos == "noun":
8
- return "verb", "遣る"
9
- if "死" in text:
10
- return "verb", "死ぬ"
11
- if "立" in text:
12
- return "verb", "立つ"
13
- if "引" in text:
14
- return "verb", "引く"
15
- if "出" in text:
16
- return "verb", "出す"
17
- if "引" in text:
18
- return "verb", "引く"
19
- if "飲" in text:
20
- return "verb", "飲む"
21
- if "呼" in text:
22
- return "verb", "呼ぶ"
23
- if "脱" in text:
24
- return "verb", "脱ぐ"
25
- if text == "な" and upos == "part":
26
- return "aux", "な"
27
- if text == "呼ん":
28
- return "verb", "呼ぶ"
29
- if text == "な" and upos == "aux":
30
- return "aux", "な"
31
- if text == "だり":
32
- return "aux", "たり"
33
- if text == "たり":
34
- return "aux", "たり"
35
- if text == "たら":
36
- return "sconj", "たら"
37
- if text == "たっ":
38
- return "sconj", "たって"
39
- # if text == "て" and upos == "sconj":
40
- # return "aux", "て"
41
- if text == "なさい" and target == "為さる":
42
- return "aux", "為さい"
43
- if text == "な" and upos == "part":
44
- return "aux", "な"
45
- if text == "脱" and upos == "noun":
46
- return "verb", "脱"
47
- if text == "よう" and upos == "aux":
48
- return "aux", "よう"
49
- if text == "ろ" and upos == "aux" and target == "為る":
50
- return "aux", "ろ"
51
- # if upos == "verb" and "る" in target:
52
- # return "verb", target.replace("る","").strip()
53
-
54
- return upos,target
55
-
56
-
@@ -1,3 +0,0 @@
1
- 0.7.3-beta.15
2
- July 29th, 2024
3
- Correct Unicode Escapes?
File without changes
File without changes
File without changes
File without changes
File without changes