batchalign 0.7.5a7__tar.gz → 0.7.6a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {batchalign-0.7.5a7/batchalign.egg-info → batchalign-0.7.6a0}/PKG-INFO +2 -2
  2. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/cli/cli.py +21 -0
  3. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/cli/dispatch.py +1 -0
  4. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/document.py +10 -1
  5. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/generator.py +20 -0
  6. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/resolve.py +1 -0
  7. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/__init__.py +1 -1
  8. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/dispatch.py +5 -1
  9. batchalign-0.7.6a0/batchalign/pipelines/morphosyntax/__init__.py +3 -0
  10. batchalign-0.7.6a0/batchalign/pipelines/morphosyntax/coref.py +45 -0
  11. batchalign-0.7.6a0/batchalign/version +3 -0
  12. {batchalign-0.7.5a7 → batchalign-0.7.6a0/batchalign.egg-info}/PKG-INFO +2 -2
  13. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/SOURCES.txt +1 -0
  14. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/requires.txt +1 -1
  15. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/setup.py +1 -1
  16. batchalign-0.7.5a7/batchalign/pipelines/morphosyntax/__init__.py +0 -1
  17. batchalign-0.7.5a7/batchalign/version +0 -3
  18. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/LICENSE +0 -0
  19. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/MANIFEST.in +0 -0
  20. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/README.md +0 -0
  21. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/__init__.py +0 -0
  22. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/__main__.py +0 -0
  23. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/cli/__init__.py +0 -0
  24. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/constants.py +0 -0
  25. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/errors.py +0 -0
  26. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/__init__.py +0 -0
  27. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/base.py +0 -0
  28. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/__init__.py +0 -0
  29. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/file.py +0 -0
  30. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/lexer.py +0 -0
  31. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/parser.py +0 -0
  32. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/chat/utils.py +0 -0
  33. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/textgrid/__init__.py +0 -0
  34. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/textgrid/file.py +0 -0
  35. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/textgrid/generator.py +0 -0
  36. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/formats/textgrid/parser.py +0 -0
  37. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/__init__.py +0 -0
  38. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/speaker/__init__.py +0 -0
  39. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/speaker/config.yaml +0 -0
  40. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/speaker/infer.py +0 -0
  41. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/speaker/utils.py +0 -0
  42. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/training/__init__.py +0 -0
  43. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/training/run.py +0 -0
  44. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/training/utils.py +0 -0
  45. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utils.py +0 -0
  46. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/__init__.py +0 -0
  47. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/dataset.py +0 -0
  48. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/execute.py +0 -0
  49. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/infer.py +0 -0
  50. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/prep.py +0 -0
  51. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/utterance/train.py +0 -0
  52. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/whisper/__init__.py +0 -0
  53. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/whisper/infer_asr.py +0 -0
  54. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/models/whisper/infer_fa.py +0 -0
  55. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/analysis/__init__.py +0 -0
  56. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/analysis/eval.py +0 -0
  57. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/__init__.py +0 -0
  58. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/rev.py +0 -0
  59. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/utils.py +0 -0
  60. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/whisper.py +0 -0
  61. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/asr/whisperx.py +0 -0
  62. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/base.py +0 -0
  63. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/__init__.py +0 -0
  64. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  65. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  66. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  67. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/retrace.py +0 -0
  68. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  69. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  70. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/cleanup/support/test.test +0 -0
  71. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/fa/__init__.py +0 -0
  72. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  73. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  74. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  75. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  76. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/pipeline.py +0 -0
  77. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/speaker/__init__.py +0 -0
  78. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  79. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utr/__init__.py +0 -0
  80. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utr/rev_utr.py +0 -0
  81. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utr/utils.py +0 -0
  82. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  83. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utterance/__init__.py +0 -0
  84. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  85. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/__init__.py +0 -0
  86. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/conftest.py +0 -0
  87. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  88. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  89. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  90. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  91. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  92. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  93. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  94. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  95. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  96. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  97. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  98. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  99. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/fixures.py +0 -0
  100. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  101. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  102. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/tests/test_document.py +0 -0
  103. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/utils/__init__.py +0 -0
  104. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/utils/config.py +0 -0
  105. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/utils/dp.py +0 -0
  106. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign/utils/utils.py +0 -0
  107. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/dependency_links.txt +0 -0
  108. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/entry_points.txt +0 -0
  109. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/batchalign.egg-info/top_level.txt +0 -0
  110. {batchalign-0.7.5a7 → batchalign-0.7.6a0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.5a7
3
+ Version: 0.7.6a0
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -22,7 +22,7 @@ Requires-Dist: plotly>=5.18.0
22
22
  Requires-Dist: transformers>=4.37
23
23
  Requires-Dist: tokenizers>=0.14.1
24
24
  Requires-Dist: pycountry>=22.3
25
- Requires-Dist: stanza>=1.7
25
+ Requires-Dist: stanza[transformers]>=1.9.1
26
26
  Requires-Dist: scipy~=1.11
27
27
  Requires-Dist: rev_ai>=2.18.0
28
28
  Requires-Dist: rich~=13.6
@@ -217,6 +217,27 @@ def morphotag(ctx, in_dir, out_dir, **kwargs):
217
217
  loader, writer, C)
218
218
 
219
219
 
220
+ #################### MORPHOTAG ################################
221
+
222
+ @batchalign.command(hidden=True)
223
+ @common_options
224
+ @click.pass_context
225
+ def coref(ctx, in_dir, out_dir, **kwargs):
226
+ """Perform coreference analysis on transcripts."""
227
+
228
+ def loader(file):
229
+ cf = CHATFile(path=os.path.abspath(file))
230
+ doc = cf.doc
231
+ return doc, {}
232
+
233
+ def writer(doc, output):
234
+ CHATFile(doc=doc).write(output)
235
+
236
+ _dispatch("coref", "eng", 1, ["cha"], ctx,
237
+ in_dir, out_dir,
238
+ loader, writer, C)
239
+
240
+
220
241
  #################### UTSEG ################################
221
242
 
222
243
  @batchalign.command()
@@ -47,6 +47,7 @@ Cmd2Task = {
47
47
  "morphotag": "morphosyntax",
48
48
  "benchmark": "asr,eval",
49
49
  "utseg": "utterance",
50
+ "coref": "coref",
50
51
  }
51
52
 
52
53
  # this is the main runner used by all functions
@@ -29,7 +29,8 @@ class Task(IntEnum):
29
29
  FORCED_ALIGNMENT = 9
30
30
  FEATURE_EXTRACT = 10
31
31
  MORPHOSYNTAX = 11
32
- WER = 12
32
+ COREF = 12
33
+ WER = 13
33
34
 
34
35
 
35
36
  DEBUG__G = 0
@@ -51,6 +52,7 @@ TypeMap = {
51
52
  Task.FEATURE_EXTRACT: TaskType.ANALYSIS,
52
53
  Task.RETRACE_ANALYSIS: TaskType.PROCESSING,
53
54
  Task.DISFLUENCY_ANALYSIS: TaskType.PROCESSING,
55
+ Task.COREF: TaskType.PROCESSING,
54
56
  Task.WER: TaskType.ANALYSIS,
55
57
 
56
58
  Task.DEBUG__G: TaskType.GENERATION,
@@ -69,6 +71,7 @@ TaskFriendlyName = {
69
71
  Task.FEATURE_EXTRACT: "Feature Extraction",
70
72
  Task.RETRACE_ANALYSIS: "Retrace Analysis",
71
73
  Task.DISFLUENCY_ANALYSIS: "Disfluncy Analysis",
74
+ Task.COREF: "Coreference Resolution",
72
75
  Task.WER: "Word Error Rate",
73
76
  Task.DEBUG__G: "TEST_GENERATION",
74
77
  Task.DEBUG__P: "TEST_PROCESSING",
@@ -103,12 +106,18 @@ class Morphology(BaseModel):
103
106
  pos: str # pos like "pron"
104
107
  feats: str # string feats "Dem-Acc-S1"
105
108
 
109
+ class Coref(BaseModel):
110
+ start: bool
111
+ end: bool
112
+ chain: int
113
+
106
114
  class Form(BaseModel):
107
115
  text: str # the text
108
116
  # MILISCEONDS
109
117
  time: Optional[Tuple[int, int]] = Field(default=None) # word bullet
110
118
  morphology: Optional[List[Morphology]] = Field(default=None) # mor
111
119
  dependency: Optional[List[Dependency]] = Field(default=None) # gra
120
+ coreference: Optional[List[Coref]] = Field(default=None) # gra
112
121
  type: TokenType = Field(default=TokenType.REGULAR) # whether the field is a regular word (i.e. not a filled pause, not a feature, not a retrace, etc.)
113
122
 
114
123
  class Tier(BaseModel):
@@ -33,6 +33,8 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
33
33
  gras = []
34
34
  has_wor = False
35
35
  wor_elems = []
36
+ has_coref = False
37
+ coref_elems = []
36
38
 
37
39
  for i in utterance.content:
38
40
  mors.append(i.morphology)
@@ -43,6 +45,21 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
43
45
  else:
44
46
  wor_elems.append(i.text)
45
47
 
48
+ if i.coreference:
49
+ has_coref = True
50
+ coref_str_form = ""
51
+ for j in i.coreference:
52
+ coref_str = ""
53
+ if j.start:
54
+ coref_str += "("
55
+ coref_str += str(j.chain)
56
+ if j.end:
57
+ coref_str += ")"
58
+ coref_str_form += coref_str
59
+ coref_elems.append(coref_str_form)
60
+ else:
61
+ coref_elems.append("-")
62
+
46
63
  if bool(mors[-1]) != bool(gras[-1]):
47
64
  warnings.warn(f"Batchalign has detected a mismatch between lengths of mor and gra tiers for utterance; output will not pass CHATTER; line='{main_line}'")
48
65
 
@@ -75,6 +92,9 @@ def generate_chat_utterance(utterance: Utterance, special_mor=False, write_wor=T
75
92
  #### WOR LINE GENERATION ####
76
93
  if has_wor and write_wor:
77
94
  result.append("%wor:\t"+" ".join(wor_elems))
95
+ if has_coref:
96
+ result.append("%coref:\t"+" ".join(coref_elems))
97
+
78
98
 
79
99
 
80
100
  #### EXTRA LINE GENERATION ####
@@ -12,6 +12,7 @@ resolver = {
12
12
  },
13
13
  "whisper": {
14
14
  'eng': ("talkbank/CHATWhisper-en-large-v1", "openai/whisper-large-v2"),
15
+ 'yue': ("alvanlii/whisper-small-cantonese", "alvanlii/whisper-small-cantonese"),
15
16
  }
16
17
  }
17
18
 
@@ -2,7 +2,7 @@ from .pipeline import BatchalignPipeline
2
2
  from .base import BatchalignEngine
3
3
  from .asr import WhisperEngine, RevEngine, WhisperXEngine
4
4
 
5
- from .morphosyntax import StanzaEngine
5
+ from .morphosyntax import StanzaEngine, CorefEngine
6
6
  from .cleanup import NgramRetraceEngine, DisfluencyReplacementEngine
7
7
  from .speaker import NemoSpeakerEngine
8
8
 
@@ -6,7 +6,7 @@ Tabulate default packages and options.
6
6
  from batchalign import (WhisperEngine, WhisperFAEngine, StanzaEngine, RevEngine,
7
7
  NgramRetraceEngine, DisfluencyReplacementEngine, WhisperUTREngine,
8
8
  RevUTREngine, EvaluationEngine, WhisperXEngine, NemoSpeakerEngine,
9
- StanzaUtteranceEngine)
9
+ StanzaUtteranceEngine, CorefEngine)
10
10
  from batchalign import BatchalignPipeline
11
11
  from batchalign.models import resolve
12
12
 
@@ -27,6 +27,7 @@ DEFAULT_PACKAGES = {
27
27
  "retracing": "ngram",
28
28
  "eval": "evaluation",
29
29
  "utterance": "stanza_utt",
30
+ "coref": "stanza_coref",
30
31
  }
31
32
 
32
33
  LANGUAGE_OVERRIDE_PACKAGES = {
@@ -124,6 +125,9 @@ def dispatch_pipeline(pkg_str, lang, num_speakers=None, **arg_overrides):
124
125
  engines.append(NemoSpeakerEngine(num_speakers=num_speakers))
125
126
  elif engine == "stanza_utt":
126
127
  engines.append(StanzaUtteranceEngine())
128
+ elif engine == "stanza_coref":
129
+ engines.append(CorefEngine())
130
+
127
131
 
128
132
  L.debug(f"Done initalizing packages.")
129
133
  return BatchalignPipeline(*engines)
@@ -0,0 +1,3 @@
1
+ from .ud import StanzaEngine
2
+ from .coref import CorefEngine
3
+
@@ -0,0 +1,45 @@
1
+ import stanza
2
+ from batchalign.utils.dp import PayloadTarget, ReferenceTarget, Match, align
3
+ from warnings import warn
4
+ from batchalign.document import *
5
+ from batchalign.constants import *
6
+ from batchalign.pipelines.base import *
7
+ from batchalign.formats.chat.parser import chat_parse_utterance
8
+
9
+ from batchalign.utils.dp import *
10
+
11
+
12
+
13
+ class CorefEngine(BatchalignEngine):
14
+ tasks = [ Task.COREF ]
15
+
16
+ def process(self, doc, **kwargs):
17
+ if "eng" not in doc.langs:
18
+ warn("Coreference resolution is only supported for English documents.")
19
+ return
20
+
21
+ detokenized = " ".join([i.strip(include_retrace=True, include_fp=True) for i in doc.content if isinstance(i, Utterance)])
22
+ pipeline = stanza.Pipeline(lang="en", processors="tokenize, coref")
23
+
24
+ coref_chains = pipeline(detokenized).sentences
25
+ coref_chains = [(j.text,
26
+ [Coref(start=chain.is_start,
27
+ end=chain.is_end,
28
+ chain=chain.chain.index) for chain in j.coref_chains])
29
+ for i in coref_chains
30
+ for j in i.words]
31
+
32
+ payloads = [PayloadTarget(i[0], i[1]) for i in coref_chains]
33
+ references = [ReferenceTarget(j.text, (ut_id, form_id)) for ut_id, i in enumerate(doc.content)
34
+ if isinstance(i, Utterance)
35
+ for form_id, j in enumerate(i.content)]
36
+ alignment = align(payloads, references, tqdm=False)
37
+
38
+ for i in alignment:
39
+ if isinstance(i, Match):
40
+ (ut, form) = i.reference_payload
41
+ doc.content[ut].content[form].coreference = i.payload
42
+
43
+ return doc
44
+
45
+
@@ -0,0 +1,3 @@
1
+ 0.7.6-alpha.0
2
+ September 27th, 2024
3
+ initial coreference support
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.5a7
3
+ Version: 0.7.6a0
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -22,7 +22,7 @@ Requires-Dist: plotly>=5.18.0
22
22
  Requires-Dist: transformers>=4.37
23
23
  Requires-Dist: tokenizers>=0.14.1
24
24
  Requires-Dist: pycountry>=22.3
25
- Requires-Dist: stanza>=1.7
25
+ Requires-Dist: stanza[transformers]>=1.9.1
26
26
  Requires-Dist: scipy~=1.11
27
27
  Requires-Dist: rev_ai>=2.18.0
28
28
  Requires-Dist: rich~=13.6
@@ -70,6 +70,7 @@ batchalign/pipelines/cleanup/support/test.test
70
70
  batchalign/pipelines/fa/__init__.py
71
71
  batchalign/pipelines/fa/whisper_fa.py
72
72
  batchalign/pipelines/morphosyntax/__init__.py
73
+ batchalign/pipelines/morphosyntax/coref.py
73
74
  batchalign/pipelines/morphosyntax/ud.py
74
75
  batchalign/pipelines/morphosyntax/fr/case.py
75
76
  batchalign/pipelines/morphosyntax/ja/verbforms.py
@@ -12,7 +12,7 @@ plotly>=5.18.0
12
12
  transformers>=4.37
13
13
  tokenizers>=0.14.1
14
14
  pycountry>=22.3
15
- stanza>=1.7
15
+ stanza[transformers]>=1.9.1
16
16
  scipy~=1.11
17
17
  rev_ai>=2.18.0
18
18
  rich~=13.6
@@ -40,7 +40,7 @@ setup(
40
40
  "transformers>=4.37",
41
41
  "tokenizers>=0.14.1",
42
42
  "pycountry>=22.3",
43
- "stanza>=1.7",
43
+ "stanza[transformers]>=1.9.1",
44
44
  "scipy~=1.11",
45
45
  "rev_ai>=2.18.0",
46
46
  "rich~=13.6",
@@ -1 +0,0 @@
1
- from .ud import StanzaEngine
@@ -1,3 +0,0 @@
1
- 0.7.5-alpha.7
2
- September 7th, 2024
3
- batch hanging utterance bug
File without changes
File without changes
File without changes
File without changes