batchalign 0.7.5a7__tar.gz → 0.7.6a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {batchalign-0.7.5a7/batchalign.egg-info → batchalign-0.7.6a0}/PKG-INFO +2 -2
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/cli/cli.py +21 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/cli/dispatch.py +1 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/document.py +10 -1
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/generator.py +20 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/resolve.py +1 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/__init__.py +1 -1
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/dispatch.py +5 -1
- batchalign-0.7.6a0/batchalign/pipelines/morphosyntax/__init__.py +3 -0
- batchalign-0.7.6a0/batchalign/pipelines/morphosyntax/coref.py +45 -0
- batchalign-0.7.6a0/batchalign/version +3 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0/batchalign.egg-info}/PKG-INFO +2 -2
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/SOURCES.txt +1 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/requires.txt +1 -1
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/setup.py +1 -1
- batchalign-0.7.5a7/batchalign/pipelines/morphosyntax/__init__.py +0 -1
- batchalign-0.7.5a7/batchalign/version +0 -3
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/LICENSE +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/MANIFEST.in +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/README.md +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/__main__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/cli/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/constants.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/errors.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/base.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/file.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/lexer.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/parser.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/textgrid/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/textgrid/file.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/textgrid/generator.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/textgrid/parser.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/speaker/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/speaker/config.yaml +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/speaker/infer.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/speaker/utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/training/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/training/run.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/training/utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/dataset.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/execute.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/infer.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/prep.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/train.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/whisper/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/whisper/infer_asr.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/whisper/infer_fa.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/analysis/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/analysis/eval.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/rev.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/whisper.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/whisperx.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/base.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/cleanup.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/parse_support.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/retrace.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/support/test.test +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/fa/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/fa/whisper_fa.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/morphosyntax/ud.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/pipeline.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/speaker/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utr/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utr/rev_utr.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utr/utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utr/whisper_utr.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utterance/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/conftest.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/fixures.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/test_pipeline.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/test_document.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/utils/__init__.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/utils/config.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/utils/dp.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/utils/utils.py +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/dependency_links.txt +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/entry_points.txt +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/top_level.txt +0 -0
- {batchalign-0.7.5a7 → batchalign-0.7.6a0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.6a0
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -22,7 +22,7 @@ Requires-Dist: plotly>=5.18.0
|
|
22
22
|
Requires-Dist: transformers>=4.37
|
23
23
|
Requires-Dist: tokenizers>=0.14.1
|
24
24
|
Requires-Dist: pycountry>=22.3
|
25
|
-
Requires-Dist: stanza>=1.
|
25
|
+
Requires-Dist: stanza[transformers]>=1.9.1
|
26
26
|
Requires-Dist: scipy~=1.11
|
27
27
|
Requires-Dist: rev_ai>=2.18.0
|
28
28
|
Requires-Dist: rich~=13.6
|
@@ -217,6 +217,27 @@ def morphotag(ctx, in_dir, out_dir, **kwargs):
|
|
217
217
|
loader, writer, C)
|
218
218
|
|
219
219
|
|
220
|
+
#################### MORPHOTAG ################################
|
221
|
+
|
222
|
+
@batchalign.command(hidden=True)
|
223
|
+
@common_options
|
224
|
+
@click.pass_context
|
225
|
+
def coref(ctx, in_dir, out_dir, **kwargs):
|
226
|
+
"""Perform coreference analysis on transcripts."""
|
227
|
+
|
228
|
+
def loader(file):
|
229
|
+
cf = CHATFile(path=os.path.abspath(file))
|
230
|
+
doc = cf.doc
|
231
|
+
return doc, {}
|
232
|
+
|
233
|
+
def writer(doc, output):
|
234
|
+
CHATFile(doc=doc).write(output)
|
235
|
+
|
236
|
+
_dispatch("coref", "eng", 1, ["cha"], ctx,
|
237
|
+
in_dir, out_dir,
|
238
|
+
loader, writer, C)
|
239
|
+
|
240
|
+
|
220
241
|
#################### UTSEG ################################
|
221
242
|
|
222
243
|
@batchalign.command()
|
@@ -29,7 +29,8 @@ class Task(IntEnum):
|
|
29
29
|
FORCED_ALIGNMENT = 9
|
30
30
|
FEATURE_EXTRACT = 10
|
31
31
|
MORPHOSYNTAX = 11
|
32
|
-
|
32
|
+
COREF = 12
|
33
|
+
WER = 13
|
33
34
|
|
34
35
|
|
35
36
|
DEBUG__G = 0
|
@@ -51,6 +52,7 @@ TypeMap = {
|
|
51
52
|
Task.FEATURE_EXTRACT: TaskType.ANALYSIS,
|
52
53
|
Task.RETRACE_ANALYSIS: TaskType.PROCESSING,
|
53
54
|
Task.DISFLUENCY_ANALYSIS: TaskType.PROCESSING,
|
55
|
+
Task.COREF: TaskType.PROCESSING,
|
54
56
|
Task.WER: TaskType.ANALYSIS,
|
55
57
|
|
56
58
|
Task.DEBUG__G: TaskType.GENERATION,
|
@@ -69,6 +71,7 @@ TaskFriendlyName = {
|
|
69
71
|
Task.FEATURE_EXTRACT: "Feature Extraction",
|
70
72
|
Task.RETRACE_ANALYSIS: "Retrace Analysis",
|
71
73
|
Task.DISFLUENCY_ANALYSIS: "Disfluncy Analysis",
|
74
|
+
Task.COREF: "Coreference Resolution",
|
72
75
|
Task.WER: "Word Error Rate",
|
73
76
|
Task.DEBUG__G: "TEST_GENERATION",
|
74
77
|
Task.DEBUG__P: "TEST_PROCESSING",
|
@@ -103,12 +106,18 @@ class Morphology(BaseModel):
|
|
103
106
|
pos: str # pos like "pron"
|
104
107
|
feats: str # string feats "Dem-Acc-S1"
|
105
108
|
|
109
|
+
class Coref(BaseModel):
|
110
|
+
start: bool
|
111
|
+
end: bool
|
112
|
+
chain: int
|
113
|
+
|
106
114
|
class Form(BaseModel):
|
107
115
|
text: str # the text
|
108
116
|
# MILISCEONDS
|
109
117
|
time: Optional[Tuple[int, int]] = Field(default=None) # word bullet
|
110
118
|
morphology: Optional[List[Morphology]] = Field(default=None) # mor
|
111
119
|
dependency: Optional[List[Dependency]] = Field(default=None) # gra
|
120
|
+
coreference: Optional[List[Coref]] = Field(default=None) # gra
|
112
121
|
type: TokenType = Field(default=TokenType.REGULAR) # whether the field is a regular word (i.e. not a filled pause, not a feature, not a retrace, etc.)
|
113
122
|
|
114
123
|
class Tier(BaseModel):
|
@@ -33,6 +33,8 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
|
|
33
33
|
gras = []
|
34
34
|
has_wor = False
|
35
35
|
wor_elems = []
|
36
|
+
has_coref = False
|
37
|
+
coref_elems = []
|
36
38
|
|
37
39
|
for i in utterance.content:
|
38
40
|
mors.append(i.morphology)
|
@@ -43,6 +45,21 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
|
|
43
45
|
else:
|
44
46
|
wor_elems.append(i.text)
|
45
47
|
|
48
|
+
if i.coreference:
|
49
|
+
has_coref = True
|
50
|
+
coref_str_form = ""
|
51
|
+
for j in i.coreference:
|
52
|
+
coref_str = ""
|
53
|
+
if j.start:
|
54
|
+
coref_str += "("
|
55
|
+
coref_str += str(j.chain)
|
56
|
+
if j.end:
|
57
|
+
coref_str += ")"
|
58
|
+
coref_str_form += coref_str
|
59
|
+
coref_elems.append(coref_str_form)
|
60
|
+
else:
|
61
|
+
coref_elems.append("-")
|
62
|
+
|
46
63
|
if bool(mors[-1]) != bool(gras[-1]):
|
47
64
|
warnings.warn(f"Batchalign has detected a mismatch between lengths of mor and gra tiers for utterance; output will not pass CHATTER; line='{main_line}'")
|
48
65
|
|
@@ -75,6 +92,9 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
|
|
75
92
|
#### WOR LINE GENERATION ####
|
76
93
|
if has_wor and write_wor:
|
77
94
|
result.append("%wor:\t"+" ".join(wor_elems))
|
95
|
+
if has_coref:
|
96
|
+
result.append("%coref:\t"+" ".join(coref_elems))
|
97
|
+
|
78
98
|
|
79
99
|
|
80
100
|
#### EXTRA LINE GENERATION ####
|
@@ -2,7 +2,7 @@ from .pipeline import BatchalignPipeline
|
|
2
2
|
from .base import BatchalignEngine
|
3
3
|
from .asr import WhisperEngine, RevEngine, WhisperXEngine
|
4
4
|
|
5
|
-
from .morphosyntax import StanzaEngine
|
5
|
+
from .morphosyntax import StanzaEngine, CorefEngine
|
6
6
|
from .cleanup import NgramRetraceEngine, DisfluencyReplacementEngine
|
7
7
|
from .speaker import NemoSpeakerEngine
|
8
8
|
|
@@ -6,7 +6,7 @@ Tabulate default packages and options.
|
|
6
6
|
from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
|
7
7
|
NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
|
8
8
|
RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
|
9
|
-
StanzaUtteranceEngine)
|
9
|
+
StanzaUtteranceEngine, CorefEngine)
|
10
10
|
from batchalign import BatchalignPipeline
|
11
11
|
from batchalign.models import resolve
|
12
12
|
|
@@ -27,6 +27,7 @@ DEFAULT_PACKAGES = {
|
|
27
27
|
"retracing": "ngram",
|
28
28
|
"eval": "evaluation",
|
29
29
|
"utterance": "stanza_utt",
|
30
|
+
"coref": "stanza_coref",
|
30
31
|
}
|
31
32
|
|
32
33
|
LANGUAGE_OVERRIDE_PACKAGES = {
|
@@ -124,6 +125,9 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
|
|
124
125
|
engines.append(NemoSpeakerEngine(num_speakers=num_speakers))
|
125
126
|
elif engine == "stanza_utt":
|
126
127
|
engines.append(StanzaUtteranceEngine())
|
128
|
+
elif engine == "stanza_coref":
|
129
|
+
engines.append(CorefEngine())
|
130
|
+
|
127
131
|
|
128
132
|
L.debug(f"Done initalizing packages.")
|
129
133
|
return BatchalignPipeline(*engines)
|
@@ -0,0 +1,45 @@
|
|
1
|
+
import stanza
|
2
|
+
from batchalign.utils.dp import PayloadTarget, ReferenceTarget, Match, align
|
3
|
+
from warnings import warn
|
4
|
+
from batchalign.document import *
|
5
|
+
from batchalign.constants import *
|
6
|
+
from batchalign.pipelines.base import *
|
7
|
+
from batchalign.formats.chat.parser import chat_parse_utterance
|
8
|
+
|
9
|
+
from batchalign.utils.dp import *
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
class CorefEngine(BatchalignEngine):
|
14
|
+
tasks = [ Task.COREF ]
|
15
|
+
|
16
|
+
def process(self, doc, **kwargs):
|
17
|
+
if "eng" not in doc.langs:
|
18
|
+
warn("Coreference resolution is only supported for English documents.")
|
19
|
+
return
|
20
|
+
|
21
|
+
detokenized = " ".join([i.strip(include_retrace=True, include_fp=True) for i in doc.content if isinstance(i, Utterance)])
|
22
|
+
pipeline = stanza.Pipeline(lang="en", processors="tokenize, coref")
|
23
|
+
|
24
|
+
coref_chains = pipeline(detokenized).sentences
|
25
|
+
coref_chains = [(j.text,
|
26
|
+
[Coref(start=chain.is_start,
|
27
|
+
end=chain.is_end,
|
28
|
+
chain=chain.chain.index) for chain in j.coref_chains])
|
29
|
+
for i in coref_chains
|
30
|
+
for j in i.words]
|
31
|
+
|
32
|
+
payloads = [PayloadTarget(i[0], i[1]) for i in coref_chains]
|
33
|
+
references = [ReferenceTarget(j.text, (ut_id, form_id)) for ut_id, i in enumerate(doc.content)
|
34
|
+
if isinstance(i, Utterance)
|
35
|
+
for form_id, j in enumerate(i.content)]
|
36
|
+
alignment = align(payloads, references, tqdm=False)
|
37
|
+
|
38
|
+
for i in alignment:
|
39
|
+
if isinstance(i, Match):
|
40
|
+
(ut, form) = i.reference_payload
|
41
|
+
doc.content[ut].content[form].coreference = i.payload
|
42
|
+
|
43
|
+
return doc
|
44
|
+
|
45
|
+
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: batchalign
|
3
|
-
Version: 0.7.
|
3
|
+
Version: 0.7.6a0
|
4
4
|
Summary: Python Speech Language Sample Analysis
|
5
5
|
Author: Brian MacWhinney, Houjun Liu
|
6
6
|
Author-email: macw@cmu.edu, houjun@cmu.edu
|
@@ -22,7 +22,7 @@ Requires-Dist: plotly>=5.18.0
|
|
22
22
|
Requires-Dist: transformers>=4.37
|
23
23
|
Requires-Dist: tokenizers>=0.14.1
|
24
24
|
Requires-Dist: pycountry>=22.3
|
25
|
-
Requires-Dist: stanza>=1.
|
25
|
+
Requires-Dist: stanza[transformers]>=1.9.1
|
26
26
|
Requires-Dist: scipy~=1.11
|
27
27
|
Requires-Dist: rev_ai>=2.18.0
|
28
28
|
Requires-Dist: rich~=13.6
|
@@ -70,6 +70,7 @@ batchalign/pipelines/cleanup/support/test.test
|
|
70
70
|
batchalign/pipelines/fa/__init__.py
|
71
71
|
batchalign/pipelines/fa/whisper_fa.py
|
72
72
|
batchalign/pipelines/morphosyntax/__init__.py
|
73
|
+
batchalign/pipelines/morphosyntax/coref.py
|
73
74
|
batchalign/pipelines/morphosyntax/ud.py
|
74
75
|
batchalign/pipelines/morphosyntax/fr/case.py
|
75
76
|
batchalign/pipelines/morphosyntax/ja/verbforms.py
|
@@ -1 +0,0 @@
|
|
1
|
-
from .ud import StanzaEngine
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/support/filled_pauses.eng
RENAMED
File without changes
|
{batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/support/replacements.eng
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_generator.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/textgrid/test_textgrid.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/asr/test_asr_pipeline.py
RENAMED
File without changes
|
File without changes
|
{batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/cleanup/test_disfluency.py
RENAMED
File without changes
|
{batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/cleanup/test_parse_support.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/test_pipeline_models.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|