batchalign 0.7.1b10__tar.gz → 0.7.1b11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.1b10/batchalign.egg-info → batchalign-0.7.1b11}/PKG-INFO +2 -2
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/README.md +1 -1
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/morphosyntax/ud.py +9 -4
- batchalign-0.7.1b11/batchalign/version +3 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11/batchalign.egg-info}/PKG-INFO +2 -2
- batchalign-0.7.1b10/batchalign/version +0 -3
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/LICENSE +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/MANIFEST.in +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/__main__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/constants.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/document.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/errors.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/setup.cfg +0 -0
- {batchalign-0.7.1b10 → batchalign-0.7.1b11}/setup.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.1b11
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -82,7 +82,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
82
82
|
|
83
83
|
## Quick Start
|
84
84
|
|
85
|
-
The following instructions is a quick start to install Batchalign.
|
85
|
+
The following instructions is a quick start to install Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
|
86
86
|
|
87
87
|
### Get Python
|
88
88
|
- We support Python versions 3.9, 3.10, and 3.11.
|
@@ -8,7 +8,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
8
8
|
|
9
9
|
## Quick Start
|
10
10
|
|
11
|
-
The following instructions is a quick start to install Batchalign.
|
11
|
+
The following instructions is a quick start to install Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
|
12
12
|
|
13
13
|
### Get Python
|
14
14
|
- We support Python versions 3.9, 3.10, and 3.11.
|
@@ -813,13 +813,13 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
813
813
|
chunks = list(enumerate(doc.content[indx].text.split(" ")))
|
814
814
|
# filter out everything that could not possibly align
|
815
815
|
chunks_align = [(i,j) for i,j in chunks
|
816
|
-
if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"])
|
817
|
-
and (
|
816
|
+
if len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"]) and (j[-1] not in ["]"])
|
817
|
+
and ("@" not in j)
|
818
818
|
and j.strip() not in ENDING_PUNCT + MOR_PUNCT + CHAT_IGNORE + ["++"]]
|
819
819
|
# hollow out anything we are trying to align, and leave everything else
|
820
820
|
chunks_backplate = [[j]
|
821
|
-
if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"])
|
822
|
-
and (
|
821
|
+
if not (len(j) != 0 and (j[0] not in ["<", "[", "&", "\x15"]) and (j[-1] not in ["]"])
|
822
|
+
and ("@" not in j)
|
823
823
|
and j.strip() not in ENDING_PUNCT + MOR_PUNCT + CHAT_IGNORE + ["++"])
|
824
824
|
else
|
825
825
|
[]
|
@@ -852,6 +852,11 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
852
852
|
retokenized_ut = re.sub(r" +", " ", retokenized_ut)
|
853
853
|
retokenized_ut = retokenized_ut.replace("+ \"", "+\"")
|
854
854
|
retokenized_ut = retokenized_ut.replace(" >", ">")
|
855
|
+
retokenized_ut = retokenized_ut.replace("< ", "<")
|
856
|
+
retokenized_ut = retokenized_ut.replace(" :", ":")
|
857
|
+
retokenized_ut = retokenized_ut.replace(": <", ": <")
|
858
|
+
retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
|
859
|
+
retokenized_ut = retokenized_ut.replace(" @", "@")
|
855
860
|
# pray to everyone that it works---this will simply crash and ignore
|
856
861
|
# the utterance if it didn't work, so we are doing this as a sanity
|
857
862
|
# check rather than needing the parsed result
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.1b11
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -82,7 +82,7 @@ The TalkBank Project, of which Batchalign is a part, is supported by NIH grant H
|
|
82
82
|
|
83
83
|
## Quick Start
|
84
84
|
|
85
|
-
The following instructions is a quick start to install Batchalign.
|
85
|
+
The following instructions is a quick start to install Batchalign. For most users aiming to process CHAT and audio with Batchalign, we recommend more detailed usage instructions: for [usage](https://talkbank.org/info/BA2-usage.pdf) and [human transcript cleanup](https://talkbank.org/info/BA2-cleanup.pdf). The following provides a quick start guide for the program.
|
86
86
|
|
87
87
|
### Get Python
|
88
88
|
- We support Python versions 3.9, 3.10, and 3.11.
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b10 → batchalign-0.7.1b11}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|