batchalign 0.7.6a13__tar.gz → 0.7.6a14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.6a13/batchalign.egg-info → batchalign-0.7.6a14}/PKG-INFO +1 -1
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/ud.py +12 -2
- batchalign-0.7.6a14/batchalign/version +3 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.6a13/batchalign/version +0 -3
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/LICENSE +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/MANIFEST.in +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/README.md +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/__main__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/constants.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/document.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/errors.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/setup.cfg +0 -0
- {batchalign-0.7.6a13 → batchalign-0.7.6a14}/setup.py +0 -0
@@ -872,6 +872,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
872
872
|
for i,j in enumerate(ut):
|
873
873
|
for k in j.text:
|
874
874
|
ud_chars.append(ReferenceTarget(k, payload=i))
|
875
|
+
creaky = False
|
876
|
+
collected = ""
|
875
877
|
# brrr
|
876
878
|
aligned = align(chunks_chars, ud_chars, tqdm=False)
|
877
879
|
for i in aligned:
|
@@ -879,8 +881,14 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
879
881
|
if i.reference_payload not in chunks_backplate[i.payload]:
|
880
882
|
chunks_backplate[i.payload].append(i.reference_payload)
|
881
883
|
elif isinstance(i, Extra) and i.extra_type == ExtraType.PAYLOAD:
|
882
|
-
|
883
|
-
|
884
|
+
if i.key == "*":
|
885
|
+
creaky = not creaky
|
886
|
+
chunks_backplate[i.payload].append("*"+collected+"*")
|
887
|
+
collected = ""
|
888
|
+
elif creaky:
|
889
|
+
collected += i.key
|
890
|
+
elif not creaky:
|
891
|
+
chunks_backplate[i.payload].append(i.key)
|
884
892
|
# we want to replace the morphology of forms that are not actually
|
885
893
|
# supposed to be analyzed
|
886
894
|
elif isinstance(i, Extra) and i.extra_type == ExtraType.REFERENCE:
|
@@ -915,6 +923,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
915
923
|
retokenized_ut = retokenized_ut.replace(" ↑", "↑")
|
916
924
|
retokenized_ut = re.sub(r"@ ?w ?p", "@wp", retokenized_ut)
|
917
925
|
retokenized_ut = retokenized_ut.replace(" @", "@")
|
926
|
+
retokenized_ut = re.sub(r"\*[* ]*", "*", retokenized_ut)
|
927
|
+
retokenized_ut = re.sub(r"\*(.*?)\*", r"*\1* ", retokenized_ut)
|
918
928
|
# pray to everyone that it works---this will simply crash and ignore
|
919
929
|
# the utterance if it didn't work, so we are doing this as a sanity
|
920
930
|
# check rather than needing the parsed result
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.6a13 → batchalign-0.7.6a14}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|