batchalign 0.7.1b6__tar.gz → 0.7.1b7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.1b6/batchalign.egg-info → batchalign-0.7.1b7}/PKG-INFO +1 -1
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/cli/cli.py +9 -6
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/chat/parser.py +1 -1
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/ud.py +20 -0
- batchalign-0.7.1b7/batchalign/version +3 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.1b6/batchalign/version +0 -3
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/LICENSE +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/MANIFEST.in +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/README.md +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/__main__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/constants.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/document.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/errors.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/setup.cfg +0 -0
- {batchalign-0.7.1b6 → batchalign-0.7.1b7}/setup.py +0 -0
@@ -149,19 +149,22 @@ def transcribe(ctx, in_dir, out_dir, lang, num_speakers, **kwargs):
|
|
149
149
|
def loader(file):
|
150
150
|
return file
|
151
151
|
|
152
|
+
asr = "rev"
|
153
|
+
if kwargs["whisper"]:
|
154
|
+
asr = "whisper"
|
155
|
+
if kwargs["whisperx"]:
|
156
|
+
asr = "whisperx"
|
157
|
+
|
158
|
+
|
152
159
|
def writer(doc, output):
|
160
|
+
doc.content.insert(0, CustomLine(id="Comment", type=CustomLineType.INDEPENDENT,
|
161
|
+
content=f"Batchalign {VERSION_NUMBER.strip()}, ASR Engine {asr}"))
|
153
162
|
CHATFile(doc=doc, special_mor_=True).write(output
|
154
163
|
.replace(".wav", ".cha")
|
155
164
|
.replace(".mp4", ".cha")
|
156
165
|
.replace(".mp3", ".cha"),
|
157
166
|
write_wor=kwargs.get("wor", False))
|
158
167
|
|
159
|
-
asr = "rev"
|
160
|
-
if kwargs["whisper"]:
|
161
|
-
asr = "whisper"
|
162
|
-
if kwargs["whisperx"]:
|
163
|
-
asr = "whisperx"
|
164
|
-
|
165
168
|
if kwargs.get("diarize"):
|
166
169
|
_dispatch("transcribe_s",
|
167
170
|
lang, num_speakers, ["mp3", "mp4", "wav"], ctx,
|
@@ -222,7 +222,7 @@ def chat_parse_doc(lines, special_mor=False):
|
|
222
222
|
continue
|
223
223
|
# we split because there are multiple languages possible
|
224
224
|
elif "@Languages" in line.strip():
|
225
|
-
results["langs"] = [i.strip() for i in line.strip("@Languages:").strip().split(",")]
|
225
|
+
results["langs"] = [i.strip() for i in line.strip("@Languages:").strip().replace(" ", ",").strip().split(",") if i.strip() != ""]
|
226
226
|
if len(results["langs"]) > 0 and results["langs"][0] == "eng" and special_mor:
|
227
227
|
use_special_mor = True
|
228
228
|
# parse participants; the number of | delinates the metedata field
|
@@ -808,7 +808,27 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
808
808
|
ut, end = chat_parse_utterance(" ".join([i.text for i in sents[0].tokens])+" "+ending,
|
809
809
|
mor, gra,
|
810
810
|
None, None)
|
811
|
+
# JANK add a space after every form being analyzed
|
812
|
+
text_fixed = []
|
813
|
+
text_orig = i.text
|
814
|
+
# we do this to force one replacement of the token
|
815
|
+
# for every input token
|
816
|
+
for i in sents[0].tokens:
|
817
|
+
try:
|
818
|
+
before, after = text_orig.split(i.text, 1)
|
819
|
+
text_fixed.append(before.strip())
|
820
|
+
text_fixed.append(i.text.strip())
|
821
|
+
text_orig = after
|
822
|
+
except ValueError:
|
823
|
+
# we give up on that token; likely not found
|
824
|
+
# because there we tokenization issues (i.e.
|
825
|
+
# existing tokenization)
|
826
|
+
continue
|
827
|
+
text_fixed.append(text_orig.strip())
|
828
|
+
text_fixed = " ".join(text_fixed).strip()
|
829
|
+
text_fixed = re.sub(r" +", " ", text_fixed)
|
811
830
|
doc.content[indx] = Utterance(content=ut,
|
831
|
+
text=text_fixed,
|
812
832
|
tier=doc.content[indx].tier,
|
813
833
|
time=doc.content[indx].time,
|
814
834
|
custom_dependencies=doc.content[indx].custom_dependencies)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.1b6 → batchalign-0.7.1b7}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|