batchalign 0.7.3b15__tar.gz → 0.7.3b17__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.3b15/batchalign.egg-info → batchalign-0.7.3b17}/PKG-INFO +62 -2
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/utils.py +27 -15
- batchalign-0.7.3b17/batchalign/pipelines/morphosyntax/ja/verbforms.py +118 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/morphosyntax/ud.py +7 -1
- batchalign-0.7.3b17/batchalign/version +3 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17/batchalign.egg-info}/PKG-INFO +62 -2
- batchalign-0.7.3b15/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -56
- batchalign-0.7.3b15/batchalign/version +0 -3
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/LICENSE +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/MANIFEST.in +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/README.md +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/__main__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/constants.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/document.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/errors.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/setup.cfg +0 -0
- {batchalign-0.7.3b15 → batchalign-0.7.3b17}/setup.py +0 -0
@@ -1,16 +1,76 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3b17
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
7
7
|
Classifier: Development Status :: 3 - Alpha
|
8
8
|
Classifier: Topic :: Utilities
|
9
9
|
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: pydantic>=2.4
|
12
|
+
Requires-Dist: nltk>=3.8
|
13
|
+
Requires-Dist: praatio<6.1.0,>=6.0.0
|
14
|
+
Requires-Dist: torch<2.2.0,>=2.0.1
|
15
|
+
Requires-Dist: torchaudio<2.2.0,>=2.1.0
|
16
|
+
Requires-Dist: pyAudioAnalysis==0.3.14
|
17
|
+
Requires-Dist: hmmlearn==0.3.0
|
18
|
+
Requires-Dist: eyed3~=0.9.7
|
19
|
+
Requires-Dist: pydub<0.26.0,>=0.25.1
|
20
|
+
Requires-Dist: imblearn
|
21
|
+
Requires-Dist: plotly>=5.18.0
|
22
|
+
Requires-Dist: transformers~=4.37
|
23
|
+
Requires-Dist: tokenizers>=0.14.1
|
24
|
+
Requires-Dist: pycountry>=22.3
|
25
|
+
Requires-Dist: stanza>=1.7
|
26
|
+
Requires-Dist: scipy~=1.11
|
27
|
+
Requires-Dist: rev_ai>=2.18.0
|
28
|
+
Requires-Dist: rich~=13.6
|
29
|
+
Requires-Dist: click~=8.1
|
30
|
+
Requires-Dist: matplotlib<4.0.0,>=3.8.0
|
31
|
+
Requires-Dist: pyfiglet==1.0.2
|
32
|
+
Requires-Dist: soundfile~=0.12.0
|
33
|
+
Requires-Dist: rich-click>=1.7.0
|
34
|
+
Requires-Dist: typing-extensions
|
10
35
|
Provides-Extra: dev
|
36
|
+
Requires-Dist: pytest; extra == "dev"
|
11
37
|
Provides-Extra: train
|
38
|
+
Requires-Dist: accelerate~=0.27; extra == "train"
|
12
39
|
Provides-Extra: speaker
|
13
|
-
|
40
|
+
Requires-Dist: nemo-toolkit~=1.21.0; extra == "speaker"
|
41
|
+
Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
|
42
|
+
Requires-Dist: pydub~=0.25.0; extra == "speaker"
|
43
|
+
Requires-Dist: braceexpand; extra == "speaker"
|
44
|
+
Requires-Dist: editdistance; extra == "speaker"
|
45
|
+
Requires-Dist: g2p_en; extra == "speaker"
|
46
|
+
Requires-Dist: ipywidgets; extra == "speaker"
|
47
|
+
Requires-Dist: jiwer; extra == "speaker"
|
48
|
+
Requires-Dist: kaldi-python-io; extra == "speaker"
|
49
|
+
Requires-Dist: kaldiio; extra == "speaker"
|
50
|
+
Requires-Dist: lhotse>=1.20.0; extra == "speaker"
|
51
|
+
Requires-Dist: librosa>=0.10.0; extra == "speaker"
|
52
|
+
Requires-Dist: marshmallow; extra == "speaker"
|
53
|
+
Requires-Dist: matplotlib; extra == "speaker"
|
54
|
+
Requires-Dist: packaging; extra == "speaker"
|
55
|
+
Requires-Dist: pyannote.core; extra == "speaker"
|
56
|
+
Requires-Dist: pyannote.metrics; extra == "speaker"
|
57
|
+
Requires-Dist: pydub; extra == "speaker"
|
58
|
+
Requires-Dist: pyloudnorm; extra == "speaker"
|
59
|
+
Requires-Dist: resampy; extra == "speaker"
|
60
|
+
Requires-Dist: ruamel.yaml; extra == "speaker"
|
61
|
+
Requires-Dist: scipy>=0.14; extra == "speaker"
|
62
|
+
Requires-Dist: soundfile; extra == "speaker"
|
63
|
+
Requires-Dist: sox; extra == "speaker"
|
64
|
+
Requires-Dist: texterrors; extra == "speaker"
|
65
|
+
Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
|
66
|
+
Requires-Dist: omegaconf<=2.3; extra == "speaker"
|
67
|
+
Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
|
68
|
+
Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
|
69
|
+
Requires-Dist: transformers>=4.36.0; extra == "speaker"
|
70
|
+
Requires-Dist: wandb; extra == "speaker"
|
71
|
+
Requires-Dist: webdataset>=0.2.86; extra == "speaker"
|
72
|
+
Requires-Dist: sentencepiece; extra == "speaker"
|
73
|
+
Requires-Dist: youtokentome; extra == "speaker"
|
14
74
|
|
15
75
|
# TalkBank | Batchalign2
|
16
76
|
|
@@ -43,17 +43,8 @@ def chat_parse_mor(mor_str):
|
|
43
43
|
if mor_str in ENDING_PUNCT:
|
44
44
|
return [Morphology(lemma=mor_str, pos="PUNCT", feats="")]
|
45
45
|
|
46
|
-
# JANK handle + forms
|
47
|
-
if "+" in mor_str:
|
48
|
-
pos, rest = mor_str.split("+", maxsplit=1)
|
49
|
-
return [Morphology.model_validate({
|
50
|
-
"lemma": "+"+rest,
|
51
|
-
"pos": pos.strip("|"),
|
52
|
-
"feats": "",
|
53
|
-
})]
|
54
|
-
|
55
46
|
try:
|
56
|
-
mors = [i.split("|") for i in re.split("[~$]", mor_str)]
|
47
|
+
mors = [i.split("|", maxsplit=1) for i in re.split("[~$]", mor_str)]
|
57
48
|
# TODO epic jank: backwards compatibility check: if a form
|
58
49
|
# uses a lot of dashes, its probably because its old-style
|
59
50
|
# dash seperated; if it doesn't; it probably is new-style
|
@@ -67,11 +58,32 @@ def chat_parse_mor(mor_str):
|
|
67
58
|
except:
|
68
59
|
raise CHATValidationException(f"mor parser recieved invalid mor string: '{mor_str}'")
|
69
60
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
"
|
74
|
-
|
61
|
+
|
62
|
+
mors = []
|
63
|
+
for p,l,f in zip(pos, lemmas, feats):
|
64
|
+
# if "+" not in mor_str:
|
65
|
+
mors.append(Morphology.model_validate({
|
66
|
+
"lemma": l,
|
67
|
+
"pos": p,
|
68
|
+
"feats": f,
|
69
|
+
}))
|
70
|
+
# else:
|
71
|
+
# breakpoint()
|
72
|
+
# pos, rest = mor_str.split("+", maxsplit=1)
|
73
|
+
|
74
|
+
|
75
|
+
|
76
|
+
# # JANK handle + forms
|
77
|
+
# if "+" in mor_str:
|
78
|
+
# pos, rest = mor_str.split("+", maxsplit=1)
|
79
|
+
# return [Morphology.model_validate({
|
80
|
+
# "lemma": "+"+rest,
|
81
|
+
# "pos": pos.strip("|"),
|
82
|
+
# "feats": "",
|
83
|
+
# })]
|
84
|
+
|
85
|
+
|
86
|
+
# mors = [ ]
|
75
87
|
|
76
88
|
return mors
|
77
89
|
|
@@ -0,0 +1,118 @@
|
|
1
|
+
"""
|
2
|
+
verbforms.py
|
3
|
+
Fix Japanese verb forms.
|
4
|
+
"""
|
5
|
+
|
6
|
+
def verbform(upos, target, text):
|
7
|
+
if "撮る" in text:
|
8
|
+
return "verb", "撮る"
|
9
|
+
if "貼る" in text:
|
10
|
+
return "verb", "貼る"
|
11
|
+
if "混ぜ" in text:
|
12
|
+
return "verb", "混ぜる"
|
13
|
+
if "釣る" in text:
|
14
|
+
return "verb", "釣る"
|
15
|
+
if "速い" in text and upos == "adj":
|
16
|
+
return "adj", "速い"
|
17
|
+
if "治ま" in text:
|
18
|
+
return "verb", "治まる"
|
19
|
+
if "刺す" in text:
|
20
|
+
return "verb", "刺す"
|
21
|
+
if "降り" in text:
|
22
|
+
return "verb", "降りる"
|
23
|
+
if "降" in text:
|
24
|
+
return "verb", "降る"
|
25
|
+
if "載せ" in text:
|
26
|
+
return "verb", "載せる"
|
27
|
+
if "帰" in text:
|
28
|
+
return "verb", "帰る"
|
29
|
+
if "はい" in text:
|
30
|
+
return "intj", "はい"
|
31
|
+
if "うん" in text:
|
32
|
+
return "intj", "うん"
|
33
|
+
if "おっ" in text:
|
34
|
+
return "intj", "おっ"
|
35
|
+
if "ほら" in text:
|
36
|
+
return "intj", "ほら"
|
37
|
+
if "ヤッホー" in text:
|
38
|
+
return "intj", "ヤッホー"
|
39
|
+
if "ただいま" in text:
|
40
|
+
return "intj", "ただいま"
|
41
|
+
if "あたし" in text:
|
42
|
+
return "pron", "あたし"
|
43
|
+
if "舐め" in text:
|
44
|
+
return "verb", "舐める"
|
45
|
+
if "バツ" in text:
|
46
|
+
return "noun", "バツ"
|
47
|
+
if "ブラシ" in text:
|
48
|
+
return "noun", "ブラシ"
|
49
|
+
if "引き出し" in text:
|
50
|
+
return "noun", "引き出し"
|
51
|
+
if "下さい" in text:
|
52
|
+
return "noun", "下さい"
|
53
|
+
if target in ["シャャミー", "物コャミ"]:
|
54
|
+
return "noun", "クシャミ"
|
55
|
+
if "マヨネーズ" in text:
|
56
|
+
return "noun", "マヨネーズ"
|
57
|
+
if "マヨ" in text:
|
58
|
+
return "noun", "マヨ"
|
59
|
+
if "チップス" in text:
|
60
|
+
return "noun", "チップス"
|
61
|
+
if "ゴロンっ" in text:
|
62
|
+
return "noun", "ゴロンっ"
|
63
|
+
if "モチーンっ" in text:
|
64
|
+
return "noun", "モチーンっ"
|
65
|
+
if "人っ" == text:
|
66
|
+
return "noun", "人"
|
67
|
+
if text == "掻く":
|
68
|
+
return "part", "かい"
|
69
|
+
if "遣" in text and upos == "noun":
|
70
|
+
return "verb", "遣る"
|
71
|
+
if "死" in text:
|
72
|
+
return "verb", "死ぬ"
|
73
|
+
if "立" in text:
|
74
|
+
return "verb", "立つ"
|
75
|
+
if "引" in text:
|
76
|
+
return "verb", "引く"
|
77
|
+
if "出" in text:
|
78
|
+
return "verb", "出す"
|
79
|
+
if "引" in text:
|
80
|
+
return "verb", "引く"
|
81
|
+
if "飲" in text:
|
82
|
+
return "verb", "飲む"
|
83
|
+
if "呼" in text:
|
84
|
+
return "verb", "呼ぶ"
|
85
|
+
if "脱" in text:
|
86
|
+
return "verb", "脱ぐ"
|
87
|
+
if text == "な" and upos == "part":
|
88
|
+
return "aux", "な"
|
89
|
+
if text == "呼ん":
|
90
|
+
return "verb", "呼ぶ"
|
91
|
+
if text == "な" and upos == "aux":
|
92
|
+
return "aux", "な"
|
93
|
+
if text == "だり":
|
94
|
+
return "aux", "たり"
|
95
|
+
if text == "たり":
|
96
|
+
return "aux", "たり"
|
97
|
+
if text == "たら":
|
98
|
+
return "sconj", "たら"
|
99
|
+
if text == "たっ":
|
100
|
+
return "sconj", "たって"
|
101
|
+
# if text == "て" and upos == "sconj":
|
102
|
+
# return "aux", "て"
|
103
|
+
if text == "なさい" and target == "為さる":
|
104
|
+
return "aux", "為さい"
|
105
|
+
if text == "な" and upos == "part":
|
106
|
+
return "aux", "な"
|
107
|
+
if text == "脱" and upos == "noun":
|
108
|
+
return "verb", "脱"
|
109
|
+
if text == "よう" and upos == "aux":
|
110
|
+
return "aux", "よう"
|
111
|
+
if text == "ろ" and upos == "aux" and target == "為る":
|
112
|
+
return "aux", "ろ"
|
113
|
+
# if upos == "verb" and "る" in target:
|
114
|
+
# return "verb", target.replace("る","").strip()
|
115
|
+
|
116
|
+
return upos,target
|
117
|
+
|
118
|
+
|
@@ -237,6 +237,8 @@ def handler__VERB(word, lang=None):
|
|
237
237
|
res = handler(word, lang)
|
238
238
|
if "sconj" in res:
|
239
239
|
return res
|
240
|
+
elif "verb" not in res:
|
241
|
+
return res
|
240
242
|
else:
|
241
243
|
return res+flag+stringify_feats(aspect, mood,
|
242
244
|
tense, polarity, polite,
|
@@ -266,7 +268,10 @@ def handler__PUNCT(word, lang=None):
|
|
266
268
|
return "noun|da"
|
267
269
|
elif re.match(r"^['\w-]+$", word.text): # we match text here because .text is the ultumate content
|
268
270
|
# instead of the lemma, which maybe entirely weird
|
269
|
-
|
271
|
+
if word.text == "もん":
|
272
|
+
return f"part|{word.text}"
|
273
|
+
else:
|
274
|
+
return f"x|{word.text}"
|
270
275
|
|
271
276
|
# Register handlers
|
272
277
|
HANDLERS = {
|
@@ -885,6 +890,7 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
885
890
|
retokenized_ut = retokenized_ut.replace(" :", ":")
|
886
891
|
retokenized_ut = retokenized_ut.replace("+ ,", "+,")
|
887
892
|
retokenized_ut = retokenized_ut.replace(": <", ": <")
|
893
|
+
# retokenized_ut = retokenized_ut.replace("[ *", "[*")
|
888
894
|
retokenized_ut = retokenized_ut.replace(" ↑", "↑")
|
889
895
|
retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
|
890
896
|
retokenized_ut = retokenized_ut.replace(" @", "@")
|
@@ -1,16 +1,76 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3b17
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
7
7
|
Classifier: Development Status :: 3 - Alpha
|
8
8
|
Classifier: Topic :: Utilities
|
9
9
|
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: pydantic>=2.4
|
12
|
+
Requires-Dist: nltk>=3.8
|
13
|
+
Requires-Dist: praatio<6.1.0,>=6.0.0
|
14
|
+
Requires-Dist: torch<2.2.0,>=2.0.1
|
15
|
+
Requires-Dist: torchaudio<2.2.0,>=2.1.0
|
16
|
+
Requires-Dist: pyAudioAnalysis==0.3.14
|
17
|
+
Requires-Dist: hmmlearn==0.3.0
|
18
|
+
Requires-Dist: eyed3~=0.9.7
|
19
|
+
Requires-Dist: pydub<0.26.0,>=0.25.1
|
20
|
+
Requires-Dist: imblearn
|
21
|
+
Requires-Dist: plotly>=5.18.0
|
22
|
+
Requires-Dist: transformers~=4.37
|
23
|
+
Requires-Dist: tokenizers>=0.14.1
|
24
|
+
Requires-Dist: pycountry>=22.3
|
25
|
+
Requires-Dist: stanza>=1.7
|
26
|
+
Requires-Dist: scipy~=1.11
|
27
|
+
Requires-Dist: rev_ai>=2.18.0
|
28
|
+
Requires-Dist: rich~=13.6
|
29
|
+
Requires-Dist: click~=8.1
|
30
|
+
Requires-Dist: matplotlib<4.0.0,>=3.8.0
|
31
|
+
Requires-Dist: pyfiglet==1.0.2
|
32
|
+
Requires-Dist: soundfile~=0.12.0
|
33
|
+
Requires-Dist: rich-click>=1.7.0
|
34
|
+
Requires-Dist: typing-extensions
|
10
35
|
Provides-Extra: dev
|
36
|
+
Requires-Dist: pytest; extra == "dev"
|
11
37
|
Provides-Extra: train
|
38
|
+
Requires-Dist: accelerate~=0.27; extra == "train"
|
12
39
|
Provides-Extra: speaker
|
13
|
-
|
40
|
+
Requires-Dist: nemo-toolkit~=1.21.0; extra == "speaker"
|
41
|
+
Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
|
42
|
+
Requires-Dist: pydub~=0.25.0; extra == "speaker"
|
43
|
+
Requires-Dist: braceexpand; extra == "speaker"
|
44
|
+
Requires-Dist: editdistance; extra == "speaker"
|
45
|
+
Requires-Dist: g2p_en; extra == "speaker"
|
46
|
+
Requires-Dist: ipywidgets; extra == "speaker"
|
47
|
+
Requires-Dist: jiwer; extra == "speaker"
|
48
|
+
Requires-Dist: kaldi-python-io; extra == "speaker"
|
49
|
+
Requires-Dist: kaldiio; extra == "speaker"
|
50
|
+
Requires-Dist: lhotse>=1.20.0; extra == "speaker"
|
51
|
+
Requires-Dist: librosa>=0.10.0; extra == "speaker"
|
52
|
+
Requires-Dist: marshmallow; extra == "speaker"
|
53
|
+
Requires-Dist: matplotlib; extra == "speaker"
|
54
|
+
Requires-Dist: packaging; extra == "speaker"
|
55
|
+
Requires-Dist: pyannote.core; extra == "speaker"
|
56
|
+
Requires-Dist: pyannote.metrics; extra == "speaker"
|
57
|
+
Requires-Dist: pydub; extra == "speaker"
|
58
|
+
Requires-Dist: pyloudnorm; extra == "speaker"
|
59
|
+
Requires-Dist: resampy; extra == "speaker"
|
60
|
+
Requires-Dist: ruamel.yaml; extra == "speaker"
|
61
|
+
Requires-Dist: scipy>=0.14; extra == "speaker"
|
62
|
+
Requires-Dist: soundfile; extra == "speaker"
|
63
|
+
Requires-Dist: sox; extra == "speaker"
|
64
|
+
Requires-Dist: texterrors; extra == "speaker"
|
65
|
+
Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
|
66
|
+
Requires-Dist: omegaconf<=2.3; extra == "speaker"
|
67
|
+
Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
|
68
|
+
Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
|
69
|
+
Requires-Dist: transformers>=4.36.0; extra == "speaker"
|
70
|
+
Requires-Dist: wandb; extra == "speaker"
|
71
|
+
Requires-Dist: webdataset>=0.2.86; extra == "speaker"
|
72
|
+
Requires-Dist: sentencepiece; extra == "speaker"
|
73
|
+
Requires-Dist: youtokentome; extra == "speaker"
|
14
74
|
|
15
75
|
# TalkBank | Batchalign2
|
16
76
|
|
@@ -1,56 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
verbforms.py
|
3
|
-
Fix Japanese verb forms.
|
4
|
-
"""
|
5
|
-
|
6
|
-
def verbform(upos, target, text):
|
7
|
-
if "遣" in text and upos == "noun":
|
8
|
-
return "verb", "遣る"
|
9
|
-
if "死" in text:
|
10
|
-
return "verb", "死ぬ"
|
11
|
-
if "立" in text:
|
12
|
-
return "verb", "立つ"
|
13
|
-
if "引" in text:
|
14
|
-
return "verb", "引く"
|
15
|
-
if "出" in text:
|
16
|
-
return "verb", "出す"
|
17
|
-
if "引" in text:
|
18
|
-
return "verb", "引く"
|
19
|
-
if "飲" in text:
|
20
|
-
return "verb", "飲む"
|
21
|
-
if "呼" in text:
|
22
|
-
return "verb", "呼ぶ"
|
23
|
-
if "脱" in text:
|
24
|
-
return "verb", "脱ぐ"
|
25
|
-
if text == "な" and upos == "part":
|
26
|
-
return "aux", "な"
|
27
|
-
if text == "呼ん":
|
28
|
-
return "verb", "呼ぶ"
|
29
|
-
if text == "な" and upos == "aux":
|
30
|
-
return "aux", "な"
|
31
|
-
if text == "だり":
|
32
|
-
return "aux", "たり"
|
33
|
-
if text == "たり":
|
34
|
-
return "aux", "たり"
|
35
|
-
if text == "たら":
|
36
|
-
return "sconj", "たら"
|
37
|
-
if text == "たっ":
|
38
|
-
return "sconj", "たって"
|
39
|
-
# if text == "て" and upos == "sconj":
|
40
|
-
# return "aux", "て"
|
41
|
-
if text == "なさい" and target == "為さる":
|
42
|
-
return "aux", "為さい"
|
43
|
-
if text == "な" and upos == "part":
|
44
|
-
return "aux", "な"
|
45
|
-
if text == "脱" and upos == "noun":
|
46
|
-
return "verb", "脱"
|
47
|
-
if text == "よう" and upos == "aux":
|
48
|
-
return "aux", "よう"
|
49
|
-
if text == "ろ" and upos == "aux" and target == "為る":
|
50
|
-
return "aux", "ろ"
|
51
|
-
# if upos == "verb" and "る" in target:
|
52
|
-
# return "verb", target.replace("る","").strip()
|
53
|
-
|
54
|
-
return upos,target
|
55
|
-
|
56
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b15 → batchalign-0.7.3b17}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|