batchalign 0.7.3b11__tar.gz → 0.7.3b12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.3b11/batchalign.egg-info → batchalign-0.7.3b12}/PKG-INFO +1 -1
- batchalign-0.7.3b12/batchalign/pipelines/morphosyntax/ja/verbforms.py +34 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/morphosyntax/ud.py +7 -1
- batchalign-0.7.3b12/batchalign/version +3 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12/batchalign.egg-info}/PKG-INFO +1 -1
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign.egg-info/SOURCES.txt +1 -0
- batchalign-0.7.3b11/batchalign/version +0 -3
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/LICENSE +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/MANIFEST.in +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/README.md +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/__main__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/constants.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/document.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/errors.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/setup.cfg +0 -0
- {batchalign-0.7.3b11 → batchalign-0.7.3b12}/setup.py +0 -0
@@ -0,0 +1,34 @@
|
|
1
|
+
"""
|
2
|
+
verbforms.py
|
3
|
+
Fix Japanese verb forms.
|
4
|
+
"""
|
5
|
+
|
6
|
+
def verbform(upos, target, text):
|
7
|
+
if text == "な" and upos == "part":
|
8
|
+
return "aux", "うな"
|
9
|
+
if text == "呼ん":
|
10
|
+
return upos, "呼ん"
|
11
|
+
if text == "たり":
|
12
|
+
return "aux", "たり"
|
13
|
+
if text == "たら":
|
14
|
+
return "sconj", "たら"
|
15
|
+
if text == "たっ":
|
16
|
+
return "sconj", "たって"
|
17
|
+
if text == "て" and upos == "sconj":
|
18
|
+
return "aux", "て"
|
19
|
+
if text == "なさい" and target == "為さる":
|
20
|
+
return "aux", "為さい"
|
21
|
+
if text == "な" and upos == "part":
|
22
|
+
return "aux", "な"
|
23
|
+
if text == "脱" and upos == "noun":
|
24
|
+
return "verb", "脱"
|
25
|
+
if text == "よう" and upos == "aux":
|
26
|
+
return "aux", "よう"
|
27
|
+
if text == "ろ" and upos == "aux" and target == "為る":
|
28
|
+
return "aux", "ろ"
|
29
|
+
if upos == "verb" and "る" in target:
|
30
|
+
return "verb", target.replace("る","").strip()
|
31
|
+
|
32
|
+
return upos,target
|
33
|
+
|
34
|
+
|
@@ -134,7 +134,13 @@ def handler(word, lang=None):
|
|
134
134
|
if "“" in target:
|
135
135
|
target = word.text
|
136
136
|
|
137
|
-
|
137
|
+
pos = word.upos.lower()
|
138
|
+
|
139
|
+
if lang == "ja":
|
140
|
+
from batchalign.pipelines.morphosyntax.ja.verbforms import verbform
|
141
|
+
pos,target = verbform(pos,target,word.text)
|
142
|
+
|
143
|
+
return f"{'' if not unknown else '0'}{pos}|{target}"
|
138
144
|
|
139
145
|
# POS specific handler
|
140
146
|
def handler__PRON(word, lang=None):
|
@@ -72,6 +72,7 @@ batchalign/pipelines/fa/whisper_fa.py
|
|
72
72
|
batchalign/pipelines/morphosyntax/__init__.py
|
73
73
|
batchalign/pipelines/morphosyntax/ud.py
|
74
74
|
batchalign/pipelines/morphosyntax/fr/case.py
|
75
|
+
batchalign/pipelines/morphosyntax/ja/verbforms.py
|
75
76
|
batchalign/pipelines/speaker/__init__.py
|
76
77
|
batchalign/pipelines/speaker/nemo_speaker.py
|
77
78
|
batchalign/pipelines/utr/__init__.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b11 → batchalign-0.7.3b12}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|