batchalign 0.7.1b5__tar.gz → 0.7.1b7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.1b5/batchalign.egg-info → batchalign-0.7.1b7}/PKG-INFO +2 -2
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/README.md +1 -1
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/cli/cli.py +9 -6
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/parser.py +1 -1
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/dispatch.py +1 -1
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/ud.py +20 -0
- batchalign-0.7.1b7/batchalign/version +3 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7/batchalign.egg-info}/PKG-INFO +2 -2
- batchalign-0.7.1b5/batchalign/version +0 -3
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/LICENSE +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/MANIFEST.in +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/__main__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/constants.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/document.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/errors.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/setup.cfg +0 -0
- {batchalign-0.7.1b5 → batchalign-0.7.1b7}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.1b7
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -82,7 +82,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
82
82
|
|
83
83
|
## Quick Start
|
84
84
|
|
85
|
-
The following instructions is a quick start to install Batchalign.
|
85
|
+
The following instructions is a quick start to install Batchalign.
|
86
86
|
|
87
87
|
### Get Python
|
88
88
|
- We support Python versions 3.9, 3.10, and 3.11.
|
@@ -8,7 +8,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
8
8
|
|
9
9
|
## Quick Start
|
10
10
|
|
11
|
-
The following instructions is a quick start to install Batchalign.
|
11
|
+
The following instructions is a quick start to install Batchalign.
|
12
12
|
|
13
13
|
### Get Python
|
14
14
|
- We support Python versions 3.9, 3.10, and 3.11.
|
@@ -149,19 +149,22 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
149
149
|
def loader(file):
|
150
150
|
return file
|
151
151
|
|
152
|
+
asr = "rev"
|
153
|
+
if kwargs["whisper"]:
|
154
|
+
asr = "whisper"
|
155
|
+
if kwargs["whisperx"]:
|
156
|
+
asr = "whisperx"
|
157
|
+
|
158
|
+
|
152
159
|
def writer(doc, output):
|
160
|
+
doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
|
161
|
+
content=f"Batchalign {VERSION_NUMBER.strip()}, ASR Engine {asr}"))
|
153
162
|
CHATFile(doc=doc, special_mor_=True).write(output
|
154
163
|
.replace(".wav", ".cha")
|
155
164
|
.replace(".mp4", ".cha")
|
156
165
|
.replace(".mp3", ".cha"),
|
157
166
|
write_wor=kwargs.get("wor", False))
|
158
167
|
|
159
|
-
asr = "rev"
|
160
|
-
if kwargs["whisper"]:
|
161
|
-
asr = "whisper"
|
162
|
-
if kwargs["whisperx"]:
|
163
|
-
asr = "whisperx"
|
164
|
-
|
165
168
|
if kwargs.get("diarize"):
|
166
169
|
_dispatch("transcribe_s",
|
167
170
|
lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
|
@@ -222,7 +222,7 @@ def chat_parse_doc(lines, special_mor=False):
|
|
222
222
|
continue
|
223
223
|
# we split because there are multiple languages possible
|
224
224
|
elif "@Languages" in line.strip():
|
225
|
-
results["langs"] = [i.strip() for i in line.strip("@Languages:").strip().split(",")]
|
225
|
+
results["langs"] = [i.strip() for i in line.strip("@Languages:").strip().replace(" ", ",").strip().split(",") if i.strip() != ""]
|
226
226
|
if len(results["langs"]) > 0 and results["langs"][0] == "eng" and special_mor:
|
227
227
|
use_special_mor = True
|
228
228
|
# parse participants; the number of | delinates the metedata field
|
@@ -73,7 +73,7 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
73
73
|
packages.append("disfluency")
|
74
74
|
if "retracing" not in packages:
|
75
75
|
packages.append("retracing")
|
76
|
-
if "utterance" not in packages and resolve("utterance", lang) == None:
|
76
|
+
if "utterance" not in packages and resolve("utterance", lang) == None and lang not in ["heb"]:
|
77
77
|
packages.append("utterance")
|
78
78
|
if "fa" in packages:
|
79
79
|
if "utr" not in packages:
|
@@ -808,7 +808,27 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
808
808
|
ut, end = chat_parse_utterance(" ".join([i.text for i in sents[0].tokens])+" "+ending,
|
809
809
|
mor, gra,
|
810
810
|
None, None)
|
811
|
+
# JANK add a space after every form being analyzed
|
812
|
+
text_fixed = []
|
813
|
+
text_orig = i.text
|
814
|
+
# we do this to force one replacement of the token
|
815
|
+
# for every input token
|
816
|
+
for i in sents[0].tokens:
|
817
|
+
try:
|
818
|
+
before, after = text_orig.split(i.text, 1)
|
819
|
+
text_fixed.append(before.strip())
|
820
|
+
text_fixed.append(i.text.strip())
|
821
|
+
text_orig = after
|
822
|
+
except ValueError:
|
823
|
+
# we give up on that token; likely not found
|
824
|
+
# because there we tokenization issues (i.e.
|
825
|
+
# existing tokenization)
|
826
|
+
continue
|
827
|
+
text_fixed.append(text_orig.strip())
|
828
|
+
text_fixed = " ".join(text_fixed).strip()
|
829
|
+
text_fixed = re.sub(r" +", " ", text_fixed)
|
811
830
|
doc.content[indx] = Utterance(content=ut,
|
831
|
+
text=text_fixed,
|
812
832
|
tier=doc.content[indx].tier,
|
813
833
|
time=doc.content[indx].time,
|
814
834
|
custom_dependencies=doc.content[indx].custom_dependencies)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.1b7
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -82,7 +82,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
82
82
|
|
83
83
|
## Quick Start
|
84
84
|
|
85
|
-
The following instructions is a quick start to install Batchalign.
|
85
|
+
The following instructions is a quick start to install Batchalign.
|
86
86
|
|
87
87
|
### Get Python
|
88
88
|
- We support Python versions 3.9, 3.10, and 3.11.
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b5 → batchalign-0.7.1b7}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|