batchalign 0.7.3b14__tar.gz → 0.7.3b16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.3b14/batchalign.egg-info → batchalign-0.7.3b16}/PKG-INFO +4 -64
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/README.md +2 -2
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/file.py +7 -1
- batchalign-0.7.3b16/batchalign/pipelines/morphosyntax/ja/verbforms.py +118 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/ud.py +6 -1
- batchalign-0.7.3b16/batchalign/version +3 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16/batchalign.egg-info}/PKG-INFO +4 -64
- batchalign-0.7.3b14/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -56
- batchalign-0.7.3b14/batchalign/version +0 -3
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/LICENSE +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/MANIFEST.in +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/__main__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/constants.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/document.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/errors.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/setup.cfg +0 -0
- {batchalign-0.7.3b14 → batchalign-0.7.3b16}/setup.py +0 -0
@@ -1,76 +1,16 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3b16
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
7
7
|
Classifier: Development Status :: 3 - Alpha
|
8
8
|
Classifier: Topic :: Utilities
|
9
9
|
Description-Content-Type: text/markdown
|
10
|
-
License-File: LICENSE
|
11
|
-
Requires-Dist: pydantic>=2.4
|
12
|
-
Requires-Dist: nltk>=3.8
|
13
|
-
Requires-Dist: praatio<6.1.0,>=6.0.0
|
14
|
-
Requires-Dist: torch<2.2.0,>=2.0.1
|
15
|
-
Requires-Dist: torchaudio<2.2.0,>=2.1.0
|
16
|
-
Requires-Dist: pyAudioAnalysis==0.3.14
|
17
|
-
Requires-Dist: hmmlearn==0.3.0
|
18
|
-
Requires-Dist: eyed3~=0.9.7
|
19
|
-
Requires-Dist: pydub<0.26.0,>=0.25.1
|
20
|
-
Requires-Dist: imblearn
|
21
|
-
Requires-Dist: plotly>=5.18.0
|
22
|
-
Requires-Dist: transformers~=4.37
|
23
|
-
Requires-Dist: tokenizers>=0.14.1
|
24
|
-
Requires-Dist: pycountry>=22.3
|
25
|
-
Requires-Dist: stanza>=1.7
|
26
|
-
Requires-Dist: scipy~=1.11
|
27
|
-
Requires-Dist: rev_ai>=2.18.0
|
28
|
-
Requires-Dist: rich~=13.6
|
29
|
-
Requires-Dist: click~=8.1
|
30
|
-
Requires-Dist: matplotlib<4.0.0,>=3.8.0
|
31
|
-
Requires-Dist: pyfiglet==1.0.2
|
32
|
-
Requires-Dist: soundfile~=0.12.0
|
33
|
-
Requires-Dist: rich-click>=1.7.0
|
34
|
-
Requires-Dist: typing-extensions
|
35
10
|
Provides-Extra: dev
|
36
|
-
Requires-Dist: pytest; extra == "dev"
|
37
11
|
Provides-Extra: train
|
38
|
-
Requires-Dist: accelerate~=0.27; extra == "train"
|
39
12
|
Provides-Extra: speaker
|
40
|
-
|
41
|
-
Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
|
42
|
-
Requires-Dist: pydub~=0.25.0; extra == "speaker"
|
43
|
-
Requires-Dist: braceexpand; extra == "speaker"
|
44
|
-
Requires-Dist: editdistance; extra == "speaker"
|
45
|
-
Requires-Dist: g2p_en; extra == "speaker"
|
46
|
-
Requires-Dist: ipywidgets; extra == "speaker"
|
47
|
-
Requires-Dist: jiwer; extra == "speaker"
|
48
|
-
Requires-Dist: kaldi-python-io; extra == "speaker"
|
49
|
-
Requires-Dist: kaldiio; extra == "speaker"
|
50
|
-
Requires-Dist: lhotse>=1.20.0; extra == "speaker"
|
51
|
-
Requires-Dist: librosa>=0.10.0; extra == "speaker"
|
52
|
-
Requires-Dist: marshmallow; extra == "speaker"
|
53
|
-
Requires-Dist: matplotlib; extra == "speaker"
|
54
|
-
Requires-Dist: packaging; extra == "speaker"
|
55
|
-
Requires-Dist: pyannote.core; extra == "speaker"
|
56
|
-
Requires-Dist: pyannote.metrics; extra == "speaker"
|
57
|
-
Requires-Dist: pydub; extra == "speaker"
|
58
|
-
Requires-Dist: pyloudnorm; extra == "speaker"
|
59
|
-
Requires-Dist: resampy; extra == "speaker"
|
60
|
-
Requires-Dist: ruamel.yaml; extra == "speaker"
|
61
|
-
Requires-Dist: scipy>=0.14; extra == "speaker"
|
62
|
-
Requires-Dist: soundfile; extra == "speaker"
|
63
|
-
Requires-Dist: sox; extra == "speaker"
|
64
|
-
Requires-Dist: texterrors; extra == "speaker"
|
65
|
-
Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
|
66
|
-
Requires-Dist: omegaconf<=2.3; extra == "speaker"
|
67
|
-
Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
|
68
|
-
Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
|
69
|
-
Requires-Dist: transformers>=4.36.0; extra == "speaker"
|
70
|
-
Requires-Dist: wandb; extra == "speaker"
|
71
|
-
Requires-Dist: webdataset>=0.2.86; extra == "speaker"
|
72
|
-
Requires-Dist: sentencepiece; extra == "speaker"
|
73
|
-
Requires-Dist: youtokentome; extra == "speaker"
|
13
|
+
License-File: LICENSE
|
74
14
|
|
75
15
|
# TalkBank | Batchalign2
|
76
16
|
|
@@ -102,13 +42,13 @@ You can get Batchalign from PyPi, and you can update the package in the same way
|
|
102
42
|
macOS/Linux:
|
103
43
|
|
104
44
|
```
|
105
|
-
|
45
|
+
pip install -U batchalign
|
106
46
|
```
|
107
47
|
|
108
48
|
Windows:
|
109
49
|
|
110
50
|
```
|
111
|
-
py -m
|
51
|
+
py -m pip install -U batchalign
|
112
52
|
```
|
113
53
|
|
114
54
|
### Rock and Roll
|
@@ -28,13 +28,13 @@ You can get Batchalign from PyPi, and you can update the package in the same way
|
|
28
28
|
macOS/Linux:
|
29
29
|
|
30
30
|
```
|
31
|
-
|
31
|
+
pip install -U batchalign
|
32
32
|
```
|
33
33
|
|
34
34
|
Windows:
|
35
35
|
|
36
36
|
```
|
37
|
-
py -m
|
37
|
+
py -m pip install -U batchalign
|
38
38
|
```
|
39
39
|
|
40
40
|
### Rock and Roll
|
@@ -129,7 +129,13 @@ class CHATFile(BaseFormat):
|
|
129
129
|
write_wor=write_wor))
|
130
130
|
main.append("@End\n")
|
131
131
|
|
132
|
-
|
132
|
+
raw = "\n".join(main)
|
133
|
+
|
134
|
+
# correct for unicode problems
|
135
|
+
corrected = raw
|
136
|
+
corrected = corrected.replace(u"\u202b", u"\u200f")
|
137
|
+
|
138
|
+
return corrected
|
133
139
|
|
134
140
|
@property
|
135
141
|
def doc(self):
|
@@ -0,0 +1,118 @@
|
|
1
|
+
"""
|
2
|
+
verbforms.py
|
3
|
+
Fix Japanese verb forms.
|
4
|
+
"""
|
5
|
+
|
6
|
+
def verbform(upos, target, text):
|
7
|
+
if "撮る" in text:
|
8
|
+
return "verb", "撮る"
|
9
|
+
if "貼る" in text:
|
10
|
+
return "verb", "貼る"
|
11
|
+
if "混ぜ" in text:
|
12
|
+
return "verb", "混ぜる"
|
13
|
+
if "釣る" in text:
|
14
|
+
return "verb", "釣る"
|
15
|
+
if "速い" in text and upos == "adj":
|
16
|
+
return "adj", "速い"
|
17
|
+
if "治ま" in text:
|
18
|
+
return "verb", "治まる"
|
19
|
+
if "刺す" in text:
|
20
|
+
return "verb", "刺す"
|
21
|
+
if "降り" in text:
|
22
|
+
return "verb", "降りる"
|
23
|
+
if "降" in text:
|
24
|
+
return "verb", "降る"
|
25
|
+
if "載せ" in text:
|
26
|
+
return "verb", "載せる"
|
27
|
+
if "帰" in text:
|
28
|
+
return "verb", "帰る"
|
29
|
+
if "はい" in text:
|
30
|
+
return "intj", "はい"
|
31
|
+
if "うん" in text:
|
32
|
+
return "intj", "うん"
|
33
|
+
if "おっ" in text:
|
34
|
+
return "intj", "おっ"
|
35
|
+
if "ほら" in text:
|
36
|
+
return "intj", "ほら"
|
37
|
+
if "ヤッホー" in text:
|
38
|
+
return "intj", "ヤッホー"
|
39
|
+
if "ただいま" in text:
|
40
|
+
return "intj", "ただいま"
|
41
|
+
if "あたし" in text:
|
42
|
+
return "pron", "あたし"
|
43
|
+
if "舐め" in text:
|
44
|
+
return "verb", "舐める"
|
45
|
+
if "バツ" in text:
|
46
|
+
return "noun", "バツ"
|
47
|
+
if "ブラシ" in text:
|
48
|
+
return "noun", "ブラシ"
|
49
|
+
if "引き出し" in text:
|
50
|
+
return "noun", "引き出し"
|
51
|
+
if "下さい" in text:
|
52
|
+
return "noun", "下さい"
|
53
|
+
if target in ["シャャミー", "物コャミ"]:
|
54
|
+
return "noun", "クシャミ"
|
55
|
+
if "マヨネーズ" in text:
|
56
|
+
return "noun", "マヨネーズ"
|
57
|
+
if "マヨ" in text:
|
58
|
+
return "noun", "マヨ"
|
59
|
+
if "チップス" in text:
|
60
|
+
return "noun", "チップス"
|
61
|
+
if "ゴロンっ" in text:
|
62
|
+
return "noun", "ゴロンっ"
|
63
|
+
if "モチーンっ" in text:
|
64
|
+
return "noun", "モチーンっ"
|
65
|
+
if "人っ" == text:
|
66
|
+
return "noun", "人"
|
67
|
+
if text == "掻く":
|
68
|
+
return "part", "かい"
|
69
|
+
if "遣" in text and upos == "noun":
|
70
|
+
return "verb", "遣る"
|
71
|
+
if "死" in text:
|
72
|
+
return "verb", "死ぬ"
|
73
|
+
if "立" in text:
|
74
|
+
return "verb", "立つ"
|
75
|
+
if "引" in text:
|
76
|
+
return "verb", "引く"
|
77
|
+
if "出" in text:
|
78
|
+
return "verb", "出す"
|
79
|
+
if "引" in text:
|
80
|
+
return "verb", "引く"
|
81
|
+
if "飲" in text:
|
82
|
+
return "verb", "飲む"
|
83
|
+
if "呼" in text:
|
84
|
+
return "verb", "呼ぶ"
|
85
|
+
if "脱" in text:
|
86
|
+
return "verb", "脱ぐ"
|
87
|
+
if text == "な" and upos == "part":
|
88
|
+
return "aux", "な"
|
89
|
+
if text == "呼ん":
|
90
|
+
return "verb", "呼ぶ"
|
91
|
+
if text == "な" and upos == "aux":
|
92
|
+
return "aux", "な"
|
93
|
+
if text == "だり":
|
94
|
+
return "aux", "たり"
|
95
|
+
if text == "たり":
|
96
|
+
return "aux", "たり"
|
97
|
+
if text == "たら":
|
98
|
+
return "sconj", "たら"
|
99
|
+
if text == "たっ":
|
100
|
+
return "sconj", "たって"
|
101
|
+
# if text == "て" and upos == "sconj":
|
102
|
+
# return "aux", "て"
|
103
|
+
if text == "なさい" and target == "為さる":
|
104
|
+
return "aux", "為さい"
|
105
|
+
if text == "な" and upos == "part":
|
106
|
+
return "aux", "な"
|
107
|
+
if text == "脱" and upos == "noun":
|
108
|
+
return "verb", "脱"
|
109
|
+
if text == "よう" and upos == "aux":
|
110
|
+
return "aux", "よう"
|
111
|
+
if text == "ろ" and upos == "aux" and target == "為る":
|
112
|
+
return "aux", "ろ"
|
113
|
+
# if upos == "verb" and "る" in target:
|
114
|
+
# return "verb", target.replace("る","").strip()
|
115
|
+
|
116
|
+
return upos,target
|
117
|
+
|
118
|
+
|
@@ -237,6 +237,8 @@ def handler__VERB(word, lang=None):
|
|
237
237
|
res = handler(word, lang)
|
238
238
|
if "sconj" in res:
|
239
239
|
return res
|
240
|
+
elif "verb" not in res:
|
241
|
+
return res
|
240
242
|
else:
|
241
243
|
return res+flag+stringify_feats(aspect, mood,
|
242
244
|
tense, polarity, polite,
|
@@ -266,7 +268,10 @@ def handler__PUNCT(word, lang=None):
|
|
266
268
|
return "noun|da"
|
267
269
|
elif re.match(r"^['\w-]+$", word.text): # we match text here because .text is the ultumate content
|
268
270
|
# instead of the lemma, which maybe entirely weird
|
269
|
-
|
271
|
+
if word.text == "もん":
|
272
|
+
return f"part|{word.text}"
|
273
|
+
else:
|
274
|
+
return f"x|{word.text}"
|
270
275
|
|
271
276
|
# Register handlers
|
272
277
|
HANDLERS = {
|
@@ -1,76 +1,16 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.3b16
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
7
7
|
Classifier: Development Status :: 3 - Alpha
|
8
8
|
Classifier: Topic :: Utilities
|
9
9
|
Description-Content-Type: text/markdown
|
10
|
-
License-File: LICENSE
|
11
|
-
Requires-Dist: pydantic>=2.4
|
12
|
-
Requires-Dist: nltk>=3.8
|
13
|
-
Requires-Dist: praatio<6.1.0,>=6.0.0
|
14
|
-
Requires-Dist: torch<2.2.0,>=2.0.1
|
15
|
-
Requires-Dist: torchaudio<2.2.0,>=2.1.0
|
16
|
-
Requires-Dist: pyAudioAnalysis==0.3.14
|
17
|
-
Requires-Dist: hmmlearn==0.3.0
|
18
|
-
Requires-Dist: eyed3~=0.9.7
|
19
|
-
Requires-Dist: pydub<0.26.0,>=0.25.1
|
20
|
-
Requires-Dist: imblearn
|
21
|
-
Requires-Dist: plotly>=5.18.0
|
22
|
-
Requires-Dist: transformers~=4.37
|
23
|
-
Requires-Dist: tokenizers>=0.14.1
|
24
|
-
Requires-Dist: pycountry>=22.3
|
25
|
-
Requires-Dist: stanza>=1.7
|
26
|
-
Requires-Dist: scipy~=1.11
|
27
|
-
Requires-Dist: rev_ai>=2.18.0
|
28
|
-
Requires-Dist: rich~=13.6
|
29
|
-
Requires-Dist: click~=8.1
|
30
|
-
Requires-Dist: matplotlib<4.0.0,>=3.8.0
|
31
|
-
Requires-Dist: pyfiglet==1.0.2
|
32
|
-
Requires-Dist: soundfile~=0.12.0
|
33
|
-
Requires-Dist: rich-click>=1.7.0
|
34
|
-
Requires-Dist: typing-extensions
|
35
10
|
Provides-Extra: dev
|
36
|
-
Requires-Dist: pytest; extra == "dev"
|
37
11
|
Provides-Extra: train
|
38
|
-
Requires-Dist: accelerate~=0.27; extra == "train"
|
39
12
|
Provides-Extra: speaker
|
40
|
-
|
41
|
-
Requires-Dist: omegaconf~=2.3.0; extra == "speaker"
|
42
|
-
Requires-Dist: pydub~=0.25.0; extra == "speaker"
|
43
|
-
Requires-Dist: braceexpand; extra == "speaker"
|
44
|
-
Requires-Dist: editdistance; extra == "speaker"
|
45
|
-
Requires-Dist: g2p_en; extra == "speaker"
|
46
|
-
Requires-Dist: ipywidgets; extra == "speaker"
|
47
|
-
Requires-Dist: jiwer; extra == "speaker"
|
48
|
-
Requires-Dist: kaldi-python-io; extra == "speaker"
|
49
|
-
Requires-Dist: kaldiio; extra == "speaker"
|
50
|
-
Requires-Dist: lhotse>=1.20.0; extra == "speaker"
|
51
|
-
Requires-Dist: librosa>=0.10.0; extra == "speaker"
|
52
|
-
Requires-Dist: marshmallow; extra == "speaker"
|
53
|
-
Requires-Dist: matplotlib; extra == "speaker"
|
54
|
-
Requires-Dist: packaging; extra == "speaker"
|
55
|
-
Requires-Dist: pyannote.core; extra == "speaker"
|
56
|
-
Requires-Dist: pyannote.metrics; extra == "speaker"
|
57
|
-
Requires-Dist: pydub; extra == "speaker"
|
58
|
-
Requires-Dist: pyloudnorm; extra == "speaker"
|
59
|
-
Requires-Dist: resampy; extra == "speaker"
|
60
|
-
Requires-Dist: ruamel.yaml; extra == "speaker"
|
61
|
-
Requires-Dist: scipy>=0.14; extra == "speaker"
|
62
|
-
Requires-Dist: soundfile; extra == "speaker"
|
63
|
-
Requires-Dist: sox; extra == "speaker"
|
64
|
-
Requires-Dist: texterrors; extra == "speaker"
|
65
|
-
Requires-Dist: hydra-core<=1.3.2,>1.3; extra == "speaker"
|
66
|
-
Requires-Dist: omegaconf<=2.3; extra == "speaker"
|
67
|
-
Requires-Dist: pytorch-lightning>=2.2.1; extra == "speaker"
|
68
|
-
Requires-Dist: torchmetrics>=0.11.0; extra == "speaker"
|
69
|
-
Requires-Dist: transformers>=4.36.0; extra == "speaker"
|
70
|
-
Requires-Dist: wandb; extra == "speaker"
|
71
|
-
Requires-Dist: webdataset>=0.2.86; extra == "speaker"
|
72
|
-
Requires-Dist: sentencepiece; extra == "speaker"
|
73
|
-
Requires-Dist: youtokentome; extra == "speaker"
|
13
|
+
License-File: LICENSE
|
74
14
|
|
75
15
|
# TalkBank | Batchalign2
|
76
16
|
|
@@ -102,13 +42,13 @@ You can get Batchalign from PyPi, and you can update the package in the same way
|
|
102
42
|
macOS/Linux:
|
103
43
|
|
104
44
|
```
|
105
|
-
|
45
|
+
pip install -U batchalign
|
106
46
|
```
|
107
47
|
|
108
48
|
Windows:
|
109
49
|
|
110
50
|
```
|
111
|
-
py -m
|
51
|
+
py -m pip install -U batchalign
|
112
52
|
```
|
113
53
|
|
114
54
|
### Rock and Roll
|
@@ -1,56 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
verbforms.py
|
3
|
-
Fix Japanese verb forms.
|
4
|
-
"""
|
5
|
-
|
6
|
-
def verbform(upos, target, text):
|
7
|
-
if "遣" in text and upos == "noun":
|
8
|
-
return "verb", "遣る"
|
9
|
-
if "死" in text:
|
10
|
-
return "verb", "死ぬ"
|
11
|
-
if "立" in text:
|
12
|
-
return "verb", "立つ"
|
13
|
-
if "引" in text:
|
14
|
-
return "verb", "引く"
|
15
|
-
if "出" in text:
|
16
|
-
return "verb", "出す"
|
17
|
-
if "引" in text:
|
18
|
-
return "verb", "引く"
|
19
|
-
if "飲" in text:
|
20
|
-
return "verb", "飲む"
|
21
|
-
if "呼" in text:
|
22
|
-
return "verb", "呼ぶ"
|
23
|
-
if "脱" in text:
|
24
|
-
return "verb", "脱ぐ"
|
25
|
-
if text == "な" and upos == "part":
|
26
|
-
return "aux", "な"
|
27
|
-
if text == "呼ん":
|
28
|
-
return "verb", "呼ぶ"
|
29
|
-
if text == "な" and upos == "aux":
|
30
|
-
return "aux", "な"
|
31
|
-
if text == "だり":
|
32
|
-
return "aux", "たり"
|
33
|
-
if text == "たり":
|
34
|
-
return "aux", "たり"
|
35
|
-
if text == "たら":
|
36
|
-
return "sconj", "たら"
|
37
|
-
if text == "たっ":
|
38
|
-
return "sconj", "たって"
|
39
|
-
# if text == "て" and upos == "sconj":
|
40
|
-
# return "aux", "て"
|
41
|
-
if text == "なさい" and target == "為さる":
|
42
|
-
return "aux", "為さい"
|
43
|
-
if text == "な" and upos == "part":
|
44
|
-
return "aux", "な"
|
45
|
-
if text == "脱" and upos == "noun":
|
46
|
-
return "verb", "脱"
|
47
|
-
if text == "よう" and upos == "aux":
|
48
|
-
return "aux", "よう"
|
49
|
-
if text == "ろ" and upos == "aux" and target == "為る":
|
50
|
-
return "aux", "ろ"
|
51
|
-
# if upos == "verb" and "る" in target:
|
52
|
-
# return "verb", target.replace("る","").strip()
|
53
|
-
|
54
|
-
return upos,target
|
55
|
-
|
56
|
-
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b14 → batchalign-0.7.3b16}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|