batchalign 0.7.3b0__tar.gz → 0.7.3b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.3b0/batchalign.egg-info → batchalign-0.7.3b2}/PKG-INFO +1 -1
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/document.py +1 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/asr/whisper.py +2 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/retrace.py +2 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/dispatch.py +1 -1
- batchalign-0.7.3b2/batchalign/version +3 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2/batchalign.egg-info}/PKG-INFO +1 -1
- batchalign-0.7.3b0/batchalign/version +0 -3
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/LICENSE +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/MANIFEST.in +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/README.md +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/__main__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/cli/cli.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/cli/dispatch.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/constants.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/errors.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/chat/generator.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/resolve.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign.egg-info/SOURCES.txt +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign.egg-info/requires.txt +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/setup.cfg +0 -0
- {batchalign-0.7.3b0 → batchalign-0.7.3b2}/setup.py +0 -0
@@ -256,6 +256,7 @@ class Utterance(BaseModel):
|
|
256
256
|
detokenized = " ".join(result)
|
257
257
|
# replace suprious spaces caused by edge fudging
|
258
258
|
detokenized = re.sub(r"< +", "<", detokenized)
|
259
|
+
detokenized = re.sub(r" >", ">", detokenized)
|
259
260
|
|
260
261
|
# check and seperate punct
|
261
262
|
last_tok = result[-1]
|
@@ -36,6 +36,8 @@ class WhisperEngine(BatchalignEngine):
|
|
36
36
|
language = pycountry.languages.get(alpha_3=lang).name
|
37
37
|
if language == "Yue Chinese":
|
38
38
|
language = "Cantonese"
|
39
|
+
if "greek" in language.lower():
|
40
|
+
language = "Greek"
|
39
41
|
|
40
42
|
self.__whisper = WhisperASRModel(model, base=base, language=language)
|
41
43
|
self.__lang = lang
|
@@ -43,6 +43,8 @@ class NgramRetraceEngine(BatchalignEngine):
|
|
43
43
|
for j in content[begin:begin+n]:
|
44
44
|
if j.type != TokenType.FP:
|
45
45
|
j.type = TokenType.RETRACE
|
46
|
+
for p in ENDING_PUNCT + MOR_PUNCT:
|
47
|
+
j.text = j.text.replace(p, "").strip()
|
46
48
|
root = root+n
|
47
49
|
# we scan grams forward one by one
|
48
50
|
begin += 1
|
@@ -73,7 +73,7 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
73
73
|
packages.append("disfluency")
|
74
74
|
if "retracing" not in packages:
|
75
75
|
packages.append("retracing")
|
76
|
-
if "utterance" not in packages and resolve("utterance", lang) == None and lang not in ["heb", "fra", "ara"]:
|
76
|
+
if "utterance" not in packages and resolve("utterance", lang) == None and lang not in ["heb", "fra", "ara", "nld", "hun", "eus", "grc", "ell"]:
|
77
77
|
packages.append("utterance")
|
78
78
|
if "fa" in packages:
|
79
79
|
if "utr" not in packages:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.3b0 → batchalign-0.7.3b2}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|