batchalign 0.7.10.post2__tar.gz → 0.7.10.post5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.10.post2/batchalign.egg-info → batchalign-0.7.10.post5}/PKG-INFO +2 -1
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/utils.py +15 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/ud.py +2 -1
- batchalign-0.7.10.post5/batchalign/version +3 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5/batchalign.egg-info}/PKG-INFO +2 -1
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/requires.txt +1 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/setup.py +1 -0
- batchalign-0.7.10.post2/batchalign/version +0 -3
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/LICENSE +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/MANIFEST.in +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/README.md +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/__main__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/constants.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/document.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/errors.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.10.
|
3
|
+
Version: 0.7.10.post5
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -32,6 +32,7 @@ Requires-Dist: setuptools
|
|
32
32
|
Requires-Dist: soundfile~=0.12.0
|
33
33
|
Requires-Dist: rich-click>=1.7.0
|
34
34
|
Requires-Dist: typing-extensions
|
35
|
+
Requires-Dist: num2words
|
35
36
|
Provides-Extra: dev
|
36
37
|
Requires-Dist: pytest; extra == "dev"
|
37
38
|
Provides-Extra: train
|
@@ -4,6 +4,10 @@ from batchalign.utils import *
|
|
4
4
|
|
5
5
|
from batchalign.constants import ENDING_PUNCT
|
6
6
|
|
7
|
+
from num2words import num2words
|
8
|
+
import pycountry
|
9
|
+
|
10
|
+
|
7
11
|
def retokenize(intermediate_output):
|
8
12
|
"""Retokenize the output of the ASR system from one giant blob to utterances
|
9
13
|
|
@@ -153,6 +157,17 @@ def process_generation(output, lang="eng", utterance_engine=None):
|
|
153
157
|
for part in word_parts:
|
154
158
|
final_words.append([part.strip(), [cur, cur+div]])
|
155
159
|
cur += div
|
160
|
+
|
161
|
+
lang_2 = pycountry.languages.get(alpha_3=lang).alpha_2
|
162
|
+
def catched_num2words(i):
|
163
|
+
if not i.isdigit():
|
164
|
+
return i
|
165
|
+
try:
|
166
|
+
return num2words(i, lang=lang_2)
|
167
|
+
except NotImplementedError:
|
168
|
+
return i
|
169
|
+
final_words = [[catched_num2words(i), j] for i,j in final_words]
|
170
|
+
|
156
171
|
# if the final words is > 300, split into n parts
|
157
172
|
if len(final_words) > 300:
|
158
173
|
# for each group, append
|
@@ -990,7 +990,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
990
990
|
content.dependency = form.dependency
|
991
991
|
|
992
992
|
except Exception as e:
|
993
|
-
|
993
|
+
pass
|
994
|
+
# warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
|
994
995
|
|
995
996
|
L.debug("Stanza done.")
|
996
997
|
return doc
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.10.
|
3
|
+
Version: 0.7.10.post5
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -32,6 +32,7 @@ Requires-Dist: setuptools
|
|
32
32
|
Requires-Dist: soundfile~=0.12.0
|
33
33
|
Requires-Dist: rich-click>=1.7.0
|
34
34
|
Requires-Dist: typing-extensions
|
35
|
+
Requires-Dist: num2words
|
35
36
|
Provides-Extra: dev
|
36
37
|
Requires-Dist: pytest; extra == "dev"
|
37
38
|
Provides-Extra: train
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/formats/textgrid/generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/analysis/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/__init__.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/cleanup/support/test.test
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/__init__.py
RENAMED
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/coref.py
RENAMED
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/en/irr.py
RENAMED
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/apm.py
RENAMED
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/speaker/__init__.py
RENAMED
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utterance/__init__.py
RENAMED
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/pipelines/utterance/ud_utterance.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_file.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign/tests/pipelines/test_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post2 → batchalign-0.7.10.post5}/batchalign.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|