batchalign 0.7.10.post4__tar.gz → 0.7.11a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.10.post4/batchalign.egg-info → batchalign-0.7.11a0}/PKG-INFO +1 -1
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/whisper/infer_fa.py +1 -1
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/fa/whisper_fa.py +13 -3
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/ud.py +2 -1
- batchalign-0.7.11a0/batchalign/version +3 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.10.post4/batchalign/version +0 -3
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/LICENSE +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/MANIFEST.in +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/README.md +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/__main__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/constants.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/document.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/errors.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/dispatch.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/coref.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/setup.cfg +0 -0
- {batchalign-0.7.10.post4 → batchalign-0.7.11a0}/setup.py +0 -0
@@ -82,7 +82,7 @@ class WhisperFAModel(object):
|
|
82
82
|
def __call__(self, audio, text):
|
83
83
|
L.debug("Whisper Preprocessing...")
|
84
84
|
# input features
|
85
|
-
features = self.__processor(audio=audio, text=text,
|
85
|
+
features = self.__processor(audio=audio, text=" ".join(list(text)),
|
86
86
|
sampling_rate=self.sample_rate,
|
87
87
|
return_tensors='pt')
|
88
88
|
tokens = features["labels"][0]
|
@@ -107,11 +107,20 @@ class WhisperFAEngine(BatchalignEngine):
|
|
107
107
|
# we do this BACKWARDS because we went to have the first timestamp
|
108
108
|
# we get about a word first
|
109
109
|
alignments.reverse()
|
110
|
-
for elem in alignments:
|
110
|
+
for indx,elem in enumerate(alignments):
|
111
111
|
if isinstance(elem, Match):
|
112
|
+
next_elem = indx - 1 # remember this is backwards, see above
|
113
|
+
while next_elem >= 0 and alignments[next_elem].payload == elem.payload:
|
114
|
+
next_elem -= 1
|
115
|
+
if next_elem < 0:
|
116
|
+
next_elem = None
|
117
|
+
else:
|
118
|
+
next_elem = alignments[next_elem]
|
112
119
|
grp[elem.reference_payload][0].time = (int(round((timings[elem.payload]*1000 +
|
113
120
|
grp[0][1][0]))),
|
114
121
|
int(round((timings[elem.payload]*1000 +
|
122
|
+
grp[0][1][0])))+500 if next_elem == None else
|
123
|
+
int(round((timings[next_elem.payload]*1000 +
|
115
124
|
grp[0][1][0]))))
|
116
125
|
|
117
126
|
L.debug(f"Correcting text...")
|
@@ -144,8 +153,9 @@ class WhisperFAEngine(BatchalignEngine):
|
|
144
153
|
w.time = (w.time[0], doc.content[next_ut].alignment[0])
|
145
154
|
else:
|
146
155
|
w.time = (w.time[0], w.time[0]+500) # give half a second because we don't know
|
147
|
-
else:
|
148
|
-
|
156
|
+
# else:
|
157
|
+
# w.time = (w.time[0], ut.content[tmp].time[0])
|
158
|
+
|
149
159
|
# just in case, bound the time by the utterance derived timings
|
150
160
|
if ut.alignment and ut.alignment[0] != None:
|
151
161
|
w.time = (max(w.time[0], ut.alignment[0]), min(w.time[1], ut.alignment[1]))
|
@@ -990,7 +990,8 @@ def morphoanalyze(doc: Document, retokenize:bool, status_hook:callable = None, *
|
|
990
990
|
content.dependency = form.dependency
|
991
991
|
|
992
992
|
except Exception as e:
|
993
|
-
|
993
|
+
pass
|
994
|
+
# warnings.warn(f"Utterance failed parsing, skipping ud tagging... line='{line}', error='{e}'.\n")
|
994
995
|
|
995
996
|
L.debug("Stanza done.")
|
996
997
|
return doc
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/disfluencies.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/cleanup/support/test.test
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/fr/apmn.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/fr/case.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/morphosyntax/ja/verbforms.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/speaker/nemo_speaker.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/pipelines/utterance/ud_utterance.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_file.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_lexer.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_parser.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/chat/test_chat_utils.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/analysis/test_eval.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/asr/test_asr_utils.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/fa/test_fa_pipeline.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.10.post4 → batchalign-0.7.11a0}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|