bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
"""Adapter for glazing package (VerbNet, PropBank, FrameNet).
|
|
2
|
+
|
|
3
|
+
This module provides an adapter to fetch lexical items from VerbNet, PropBank,
|
|
4
|
+
and FrameNet via the glazing package using the proper loader classes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
11
|
+
import glazing
|
|
12
|
+
from glazing.framenet.loader import FrameNetLoader
|
|
13
|
+
from glazing.propbank.loader import PropBankLoader
|
|
14
|
+
from glazing.verbnet.loader import VerbNetLoader
|
|
15
|
+
|
|
16
|
+
from bead.data.language_codes import LanguageCode
|
|
17
|
+
from bead.resources.adapters.base import ResourceAdapter
|
|
18
|
+
from bead.resources.adapters.cache import AdapterCache
|
|
19
|
+
from bead.resources.lexical_item import LexicalItem
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GlazingAdapter(ResourceAdapter):
|
|
23
|
+
"""Adapter for glazing package (VerbNet, PropBank, FrameNet).
|
|
24
|
+
|
|
25
|
+
This adapter fetches verb frame information from VerbNet, PropBank, or
|
|
26
|
+
FrameNet and converts it to LexicalItem format. Frame information is
|
|
27
|
+
stored in the attributes field.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
resource : Literal["verbnet", "propbank", "framenet"]
|
|
32
|
+
Which glazing resource to use.
|
|
33
|
+
cache : AdapterCache | None
|
|
34
|
+
Optional cache instance. If None, no caching is performed.
|
|
35
|
+
|
|
36
|
+
Examples
|
|
37
|
+
--------
|
|
38
|
+
>>> adapter = GlazingAdapter(resource="verbnet")
|
|
39
|
+
>>> items = adapter.fetch_items(query="break", language_code="en")
|
|
40
|
+
>>> all(item.language_code == "en" for item in items)
|
|
41
|
+
True
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
resource: Literal["verbnet", "propbank", "framenet"] = "verbnet",
|
|
47
|
+
cache: AdapterCache | None = None,
|
|
48
|
+
) -> None:
|
|
49
|
+
self.resource = resource
|
|
50
|
+
self.cache = cache
|
|
51
|
+
self._loader: VerbNetLoader | PropBankLoader | FrameNetLoader | None = None
|
|
52
|
+
|
|
53
|
+
def _get_loader(self) -> VerbNetLoader | PropBankLoader | FrameNetLoader:
|
|
54
|
+
"""Get or create the appropriate loader for the resource.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
VerbNetLoader | PropBankLoader | FrameNetLoader
|
|
59
|
+
The loader instance for the configured resource.
|
|
60
|
+
"""
|
|
61
|
+
if self._loader is None:
|
|
62
|
+
if self.resource == "verbnet":
|
|
63
|
+
self._loader = VerbNetLoader()
|
|
64
|
+
elif self.resource == "propbank":
|
|
65
|
+
self._loader = PropBankLoader()
|
|
66
|
+
else: # framenet
|
|
67
|
+
self._loader = FrameNetLoader()
|
|
68
|
+
return self._loader
|
|
69
|
+
|
|
70
|
+
def fetch_items(
|
|
71
|
+
self,
|
|
72
|
+
query: str | None = None,
|
|
73
|
+
language_code: LanguageCode = None,
|
|
74
|
+
**kwargs: Any,
|
|
75
|
+
) -> list[LexicalItem]:
|
|
76
|
+
"""Fetch items from glazing resource.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
query : str | None
|
|
81
|
+
Lemma or predicate to query (e.g., "break", "run").
|
|
82
|
+
If None, fetches ALL items from the resource.
|
|
83
|
+
language_code : LanguageCode
|
|
84
|
+
Language code filter. Glazing resources are primarily English,
|
|
85
|
+
so language_code="en" is typical. Other languages may not be
|
|
86
|
+
supported.
|
|
87
|
+
**kwargs : Any
|
|
88
|
+
Additional parameters:
|
|
89
|
+
- include_frames (bool): Include detailed frame information
|
|
90
|
+
(syntax, examples, descriptions). Default: False.
|
|
91
|
+
|
|
92
|
+
Returns
|
|
93
|
+
-------
|
|
94
|
+
list[LexicalItem]
|
|
95
|
+
Lexical items with frame information in attributes.
|
|
96
|
+
|
|
97
|
+
Raises
|
|
98
|
+
------
|
|
99
|
+
RuntimeError
|
|
100
|
+
If glazing resource access fails.
|
|
101
|
+
|
|
102
|
+
Examples
|
|
103
|
+
--------
|
|
104
|
+
>>> # Query specific verb
|
|
105
|
+
>>> adapter = GlazingAdapter(resource="verbnet")
|
|
106
|
+
>>> items = adapter.fetch_items(query="break", language_code="en")
|
|
107
|
+
>>> len(items) > 0
|
|
108
|
+
True
|
|
109
|
+
>>> # Fetch all items from resource
|
|
110
|
+
>>> all_items = adapter.fetch_items(query=None, language_code="en")
|
|
111
|
+
>>> len(all_items) > 100
|
|
112
|
+
True
|
|
113
|
+
>>> # Include detailed frame information
|
|
114
|
+
>>> items = adapter.fetch_items( # doctest: +SKIP
|
|
115
|
+
... query="break", language_code="en", include_frames=True
|
|
116
|
+
... )
|
|
117
|
+
>>> "frames" in items[0].attributes # doctest: +SKIP
|
|
118
|
+
True
|
|
119
|
+
"""
|
|
120
|
+
# check cache
|
|
121
|
+
cache_key = None
|
|
122
|
+
if self.cache:
|
|
123
|
+
cache_key = self.cache.make_key(
|
|
124
|
+
f"glazing_{self.resource}",
|
|
125
|
+
query=query,
|
|
126
|
+
language_code=language_code,
|
|
127
|
+
**kwargs,
|
|
128
|
+
)
|
|
129
|
+
cached = self.cache.get(cache_key)
|
|
130
|
+
if cached is not None:
|
|
131
|
+
return cached
|
|
132
|
+
|
|
133
|
+
# fetch from glazing
|
|
134
|
+
try:
|
|
135
|
+
items = self._fetch_from_resource(query, language_code, **kwargs)
|
|
136
|
+
|
|
137
|
+
# cache result
|
|
138
|
+
if self.cache and cache_key:
|
|
139
|
+
self.cache.set(cache_key, items)
|
|
140
|
+
|
|
141
|
+
return items
|
|
142
|
+
|
|
143
|
+
except NotImplementedError:
|
|
144
|
+
# re-raise NotImplementedError without wrapping
|
|
145
|
+
raise
|
|
146
|
+
except Exception as e:
|
|
147
|
+
raise RuntimeError(
|
|
148
|
+
f"Failed to fetch from glazing {self.resource}: {e}"
|
|
149
|
+
) from e
|
|
150
|
+
|
|
151
|
+
def _fetch_from_resource(
|
|
152
|
+
self, query: str | None, language_code: LanguageCode, **kwargs: Any
|
|
153
|
+
) -> list[LexicalItem]:
|
|
154
|
+
"""Fetch from specific glazing resource.
|
|
155
|
+
|
|
156
|
+
Parameters
|
|
157
|
+
----------
|
|
158
|
+
query : str | None
|
|
159
|
+
Lemma or predicate to query. If None, fetch all items.
|
|
160
|
+
language_code : LanguageCode
|
|
161
|
+
Language code filter.
|
|
162
|
+
**kwargs : Any
|
|
163
|
+
Additional parameters (e.g., include_frames).
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
list[LexicalItem]
|
|
168
|
+
Lexical items from the resource.
|
|
169
|
+
"""
|
|
170
|
+
if self.resource == "verbnet":
|
|
171
|
+
return self._fetch_verbnet(query, language_code, **kwargs)
|
|
172
|
+
elif self.resource == "propbank":
|
|
173
|
+
return self._fetch_propbank(query, language_code, **kwargs)
|
|
174
|
+
else: # framenet
|
|
175
|
+
return self._fetch_framenet(query, language_code, **kwargs)
|
|
176
|
+
|
|
177
|
+
def _fetch_verbnet(
|
|
178
|
+
self, query: str | None, language_code: LanguageCode, **kwargs: Any
|
|
179
|
+
) -> list[LexicalItem]:
|
|
180
|
+
"""Fetch from VerbNet using VerbNetLoader.
|
|
181
|
+
|
|
182
|
+
Parameters
|
|
183
|
+
----------
|
|
184
|
+
query : str | None
|
|
185
|
+
Verb lemma to search for. If None, fetch ALL verbs.
|
|
186
|
+
language_code : LanguageCode
|
|
187
|
+
Language code filter.
|
|
188
|
+
**kwargs : Any
|
|
189
|
+
Additional parameters:
|
|
190
|
+
- include_frames (bool): Include detailed frame information.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
list[LexicalItem]
|
|
195
|
+
LexicalItem objects for matching verb classes.
|
|
196
|
+
"""
|
|
197
|
+
loader = self._get_loader()
|
|
198
|
+
assert isinstance(loader, VerbNetLoader)
|
|
199
|
+
|
|
200
|
+
include_frames = kwargs.get("include_frames", False)
|
|
201
|
+
items: list[LexicalItem] = []
|
|
202
|
+
|
|
203
|
+
# search through all verb classes
|
|
204
|
+
for verb_class in loader.classes.values():
|
|
205
|
+
if not verb_class.members:
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
for member in verb_class.members:
|
|
209
|
+
# filter by query if provided
|
|
210
|
+
if query is not None and member.name != query:
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
# build attributes
|
|
214
|
+
attributes: dict[str, Any] = {
|
|
215
|
+
"verbnet_class": verb_class.id,
|
|
216
|
+
"themroles": [r.type for r in verb_class.themroles]
|
|
217
|
+
if verb_class.themroles
|
|
218
|
+
else [],
|
|
219
|
+
"frame_count": len(verb_class.frames) if verb_class.frames else 0,
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
# add detailed frame information if requested
|
|
223
|
+
if include_frames and verb_class.frames:
|
|
224
|
+
frames_data = []
|
|
225
|
+
for frame in verb_class.frames:
|
|
226
|
+
frame_dict: dict[str, Any] = {
|
|
227
|
+
"primary": frame.description.primary,
|
|
228
|
+
"secondary": frame.description.secondary,
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
# extract syntax elements
|
|
232
|
+
if frame.syntax and hasattr(frame.syntax, "elements"):
|
|
233
|
+
syntax_elements = []
|
|
234
|
+
for element in frame.syntax.elements:
|
|
235
|
+
pos = element.pos
|
|
236
|
+
value = (
|
|
237
|
+
element.value if hasattr(element, "value") else None
|
|
238
|
+
)
|
|
239
|
+
syntax_elements.append((pos, value))
|
|
240
|
+
frame_dict["syntax"] = syntax_elements
|
|
241
|
+
else:
|
|
242
|
+
frame_dict["syntax"] = []
|
|
243
|
+
|
|
244
|
+
# extract examples
|
|
245
|
+
if frame.examples:
|
|
246
|
+
frame_dict["examples"] = [ex.text for ex in frame.examples]
|
|
247
|
+
else:
|
|
248
|
+
frame_dict["examples"] = []
|
|
249
|
+
|
|
250
|
+
frames_data.append(frame_dict)
|
|
251
|
+
|
|
252
|
+
attributes["frames"] = frames_data
|
|
253
|
+
|
|
254
|
+
# create LexicalItem for this verb class
|
|
255
|
+
features = {"pos": "VERB", **attributes}
|
|
256
|
+
item = LexicalItem(
|
|
257
|
+
lemma=member.name,
|
|
258
|
+
language_code=language_code or "en",
|
|
259
|
+
features=features,
|
|
260
|
+
source="VerbNet",
|
|
261
|
+
)
|
|
262
|
+
items.append(item)
|
|
263
|
+
|
|
264
|
+
return items
|
|
265
|
+
|
|
266
|
+
def _fetch_propbank(
|
|
267
|
+
self, query: str | None, language_code: LanguageCode, **kwargs: Any
|
|
268
|
+
) -> list[LexicalItem]:
|
|
269
|
+
"""Fetch from PropBank using PropBankLoader.
|
|
270
|
+
|
|
271
|
+
Parameters
|
|
272
|
+
----------
|
|
273
|
+
query : str | None
|
|
274
|
+
Predicate lemma to search for. If None, fetch ALL predicates.
|
|
275
|
+
language_code : LanguageCode
|
|
276
|
+
Language code filter.
|
|
277
|
+
**kwargs : Any
|
|
278
|
+
Additional parameters:
|
|
279
|
+
- include_frames (bool): Include detailed frame/roleset information.
|
|
280
|
+
|
|
281
|
+
Returns
|
|
282
|
+
-------
|
|
283
|
+
list[LexicalItem]
|
|
284
|
+
LexicalItem objects for matching predicates.
|
|
285
|
+
"""
|
|
286
|
+
loader = self._get_loader()
|
|
287
|
+
assert isinstance(loader, PropBankLoader)
|
|
288
|
+
|
|
289
|
+
include_frames = kwargs.get("include_frames", False)
|
|
290
|
+
items: list[LexicalItem] = []
|
|
291
|
+
|
|
292
|
+
# if query is None, iterate through all framesets
|
|
293
|
+
if query is None:
|
|
294
|
+
# get all framesets from PropBank
|
|
295
|
+
for frameset in loader.framesets.values():
|
|
296
|
+
items.extend(
|
|
297
|
+
self._create_propbank_items(frameset, language_code, include_frames)
|
|
298
|
+
)
|
|
299
|
+
else:
|
|
300
|
+
# get specific frameset for the predicate
|
|
301
|
+
frameset = loader.get_frameset(query)
|
|
302
|
+
if frameset:
|
|
303
|
+
items.extend(
|
|
304
|
+
self._create_propbank_items(frameset, language_code, include_frames)
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
return items
|
|
308
|
+
|
|
309
|
+
def _create_propbank_items(
|
|
310
|
+
self, frameset: Any, language_code: LanguageCode, include_frames: bool
|
|
311
|
+
) -> list[LexicalItem]:
|
|
312
|
+
"""Create LexicalItem objects from a PropBank frameset.
|
|
313
|
+
|
|
314
|
+
Parameters
|
|
315
|
+
----------
|
|
316
|
+
frameset : Any
|
|
317
|
+
PropBank frameset object.
|
|
318
|
+
language_code : LanguageCode
|
|
319
|
+
Language code filter.
|
|
320
|
+
include_frames : bool
|
|
321
|
+
Whether to include detailed roleset information.
|
|
322
|
+
|
|
323
|
+
Returns
|
|
324
|
+
-------
|
|
325
|
+
list[LexicalItem]
|
|
326
|
+
LexicalItem objects for the frameset's rolesets.
|
|
327
|
+
"""
|
|
328
|
+
items: list[LexicalItem] = []
|
|
329
|
+
|
|
330
|
+
if not frameset.rolesets:
|
|
331
|
+
return items
|
|
332
|
+
|
|
333
|
+
for roleset in frameset.rolesets:
|
|
334
|
+
attributes: dict[str, Any] = {
|
|
335
|
+
"propbank_roleset_id": roleset.id,
|
|
336
|
+
"roleset_name": roleset.name if roleset.name else "",
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
# add detailed role information if requested
|
|
340
|
+
if include_frames and roleset.roles:
|
|
341
|
+
attributes["roles"] = [
|
|
342
|
+
{
|
|
343
|
+
"arg": role.n,
|
|
344
|
+
"function": role.f,
|
|
345
|
+
"description": role.descr,
|
|
346
|
+
}
|
|
347
|
+
for role in roleset.roles
|
|
348
|
+
]
|
|
349
|
+
|
|
350
|
+
# add examples if available
|
|
351
|
+
if hasattr(roleset, "examples") and roleset.examples:
|
|
352
|
+
attributes["examples"] = [
|
|
353
|
+
ex.text for ex in roleset.examples if hasattr(ex, "text")
|
|
354
|
+
]
|
|
355
|
+
|
|
356
|
+
# create LexicalItem for each roleset
|
|
357
|
+
# use predicate_lemma attribute from PropBank frameset
|
|
358
|
+
lemma = (
|
|
359
|
+
frameset.predicate_lemma
|
|
360
|
+
if hasattr(frameset, "predicate_lemma")
|
|
361
|
+
else str(frameset)
|
|
362
|
+
)
|
|
363
|
+
features = {"pos": "VERB", **attributes}
|
|
364
|
+
item = LexicalItem(
|
|
365
|
+
lemma=lemma,
|
|
366
|
+
language_code=language_code or "en",
|
|
367
|
+
features=features,
|
|
368
|
+
source="PropBank",
|
|
369
|
+
)
|
|
370
|
+
items.append(item)
|
|
371
|
+
|
|
372
|
+
return items
|
|
373
|
+
|
|
374
|
+
def _fetch_framenet(
|
|
375
|
+
self, query: str | None, language_code: LanguageCode, **kwargs: Any
|
|
376
|
+
) -> list[LexicalItem]:
|
|
377
|
+
"""Fetch from FrameNet using FrameNetLoader.
|
|
378
|
+
|
|
379
|
+
Parameters
|
|
380
|
+
----------
|
|
381
|
+
query : str | None
|
|
382
|
+
Lemma to search for. If None, fetch ALL lexical units.
|
|
383
|
+
language_code : LanguageCode
|
|
384
|
+
Language code filter.
|
|
385
|
+
**kwargs : Any
|
|
386
|
+
Additional parameters:
|
|
387
|
+
- include_frames (bool): Include detailed frame information.
|
|
388
|
+
|
|
389
|
+
Returns
|
|
390
|
+
-------
|
|
391
|
+
list[LexicalItem]
|
|
392
|
+
LexicalItem objects for matching lexical units.
|
|
393
|
+
"""
|
|
394
|
+
loader = self._get_loader()
|
|
395
|
+
assert isinstance(loader, FrameNetLoader)
|
|
396
|
+
|
|
397
|
+
include_frames = kwargs.get("include_frames", False)
|
|
398
|
+
items: list[LexicalItem] = []
|
|
399
|
+
|
|
400
|
+
# iterate through all frames and their lexical units
|
|
401
|
+
for frame in loader.frames:
|
|
402
|
+
if not frame.lexical_units:
|
|
403
|
+
continue
|
|
404
|
+
|
|
405
|
+
for lu in frame.lexical_units:
|
|
406
|
+
# extract lemma from lexical unit name (format: "lemma.pos")
|
|
407
|
+
lemma = lu.name.split(".")[0] if "." in lu.name else lu.name
|
|
408
|
+
|
|
409
|
+
# filter by query if provided
|
|
410
|
+
if query is not None and lemma != query:
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
# create LexicalItem for this lexical unit
|
|
414
|
+
item = self._create_framenet_item(
|
|
415
|
+
lu, frame, language_code, include_frames
|
|
416
|
+
)
|
|
417
|
+
items.append(item)
|
|
418
|
+
|
|
419
|
+
return items
|
|
420
|
+
|
|
421
|
+
def _create_framenet_item(
|
|
422
|
+
self, lu: Any, frame: Any, language_code: LanguageCode, include_frames: bool
|
|
423
|
+
) -> LexicalItem:
|
|
424
|
+
"""Create a LexicalItem from a FrameNet lexical unit.
|
|
425
|
+
|
|
426
|
+
Parameters
|
|
427
|
+
----------
|
|
428
|
+
lu : Any
|
|
429
|
+
FrameNet LexicalUnit object.
|
|
430
|
+
frame : Any
|
|
431
|
+
FrameNet Frame object containing the lexical unit.
|
|
432
|
+
language_code : LanguageCode
|
|
433
|
+
Language code filter.
|
|
434
|
+
include_frames : bool
|
|
435
|
+
Whether to include detailed frame information.
|
|
436
|
+
|
|
437
|
+
Returns
|
|
438
|
+
-------
|
|
439
|
+
LexicalItem
|
|
440
|
+
LexicalItem object for the lexical unit.
|
|
441
|
+
"""
|
|
442
|
+
# extract lemma from lexical unit name (format: "lemma.pos")
|
|
443
|
+
lemma = lu.name.split(".")[0] if "." in lu.name else lu.name
|
|
444
|
+
|
|
445
|
+
# map FrameNet POS to standard POS tags
|
|
446
|
+
pos_map = {"V": "VERB", "N": "NOUN", "A": "ADJ", "ADV": "ADV", "PREP": "ADP"}
|
|
447
|
+
pos = pos_map.get(lu.pos, "VERB")
|
|
448
|
+
|
|
449
|
+
# build attributes
|
|
450
|
+
attributes: dict[str, Any] = {
|
|
451
|
+
"framenet_frame": frame.name,
|
|
452
|
+
"framenet_frame_id": frame.id,
|
|
453
|
+
"lexical_unit_name": lu.name,
|
|
454
|
+
"lexical_unit_id": lu.id,
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
# add definition if available
|
|
458
|
+
if hasattr(lu, "definition") and lu.definition:
|
|
459
|
+
attributes["definition"] = lu.definition
|
|
460
|
+
|
|
461
|
+
# add detailed frame information if requested
|
|
462
|
+
if include_frames:
|
|
463
|
+
attributes["frame_definition"] = frame.definition
|
|
464
|
+
|
|
465
|
+
# add frame elements (semantic roles)
|
|
466
|
+
if frame.frame_elements:
|
|
467
|
+
attributes["frame_elements"] = [
|
|
468
|
+
{
|
|
469
|
+
"name": fe.name,
|
|
470
|
+
"core_type": fe.core_type,
|
|
471
|
+
"definition": fe.definition
|
|
472
|
+
if hasattr(fe, "definition")
|
|
473
|
+
else None,
|
|
474
|
+
}
|
|
475
|
+
for fe in frame.frame_elements
|
|
476
|
+
]
|
|
477
|
+
|
|
478
|
+
features = {"pos": pos, **attributes}
|
|
479
|
+
return LexicalItem(
|
|
480
|
+
lemma=lemma,
|
|
481
|
+
language_code=language_code or "en",
|
|
482
|
+
features=features,
|
|
483
|
+
source="FrameNet",
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
def is_available(self) -> bool:
|
|
487
|
+
"""Check if glazing package is available.
|
|
488
|
+
|
|
489
|
+
Returns
|
|
490
|
+
-------
|
|
491
|
+
bool
|
|
492
|
+
True if glazing can be imported and data is initialized, False
|
|
493
|
+
otherwise.
|
|
494
|
+
|
|
495
|
+
Examples
|
|
496
|
+
--------
|
|
497
|
+
>>> adapter = GlazingAdapter()
|
|
498
|
+
>>> adapter.is_available()
|
|
499
|
+
True
|
|
500
|
+
"""
|
|
501
|
+
try:
|
|
502
|
+
# check if glazing is initialized
|
|
503
|
+
glazing.check_initialization()
|
|
504
|
+
# try to create a loader to verify data is accessible
|
|
505
|
+
self._get_loader()
|
|
506
|
+
return True
|
|
507
|
+
except Exception:
|
|
508
|
+
return False
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Registry for managing resource adapters.
|
|
2
|
+
|
|
3
|
+
This module provides a registry for discovering and instantiating adapters
|
|
4
|
+
by name.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from bead.resources.adapters.base import ResourceAdapter
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AdapterRegistry:
|
|
15
|
+
"""Registry for managing resource adapters.
|
|
16
|
+
|
|
17
|
+
The registry allows adapters to be registered by name and retrieved
|
|
18
|
+
with custom initialization parameters.
|
|
19
|
+
|
|
20
|
+
Examples
|
|
21
|
+
--------
|
|
22
|
+
>>> from bead.resources.adapters.glazing import GlazingAdapter
|
|
23
|
+
>>> registry = AdapterRegistry()
|
|
24
|
+
>>> registry.register("glazing", GlazingAdapter)
|
|
25
|
+
>>> adapter = registry.get("glazing", resource="verbnet")
|
|
26
|
+
>>> isinstance(adapter, GlazingAdapter)
|
|
27
|
+
True
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self) -> None:
|
|
31
|
+
self._adapters: dict[str, type[ResourceAdapter]] = {}
|
|
32
|
+
|
|
33
|
+
def register(self, name: str, adapter_class: type[ResourceAdapter]) -> None:
|
|
34
|
+
"""Register an adapter class.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
name : str
|
|
39
|
+
Adapter name (e.g., "glazing", "unimorph").
|
|
40
|
+
adapter_class : type[ResourceAdapter]
|
|
41
|
+
Adapter class (not instance) that subclasses ResourceAdapter.
|
|
42
|
+
|
|
43
|
+
Raises
|
|
44
|
+
------
|
|
45
|
+
ValueError
|
|
46
|
+
If name is empty or adapter_class is not a ResourceAdapter subclass.
|
|
47
|
+
|
|
48
|
+
Examples
|
|
49
|
+
--------
|
|
50
|
+
>>> from bead.resources.adapters.glazing import GlazingAdapter
|
|
51
|
+
>>> registry = AdapterRegistry()
|
|
52
|
+
>>> registry.register("glazing", GlazingAdapter)
|
|
53
|
+
>>> "glazing" in registry.list_available()
|
|
54
|
+
True
|
|
55
|
+
"""
|
|
56
|
+
if not name or not name.strip():
|
|
57
|
+
raise ValueError("Adapter name must be non-empty")
|
|
58
|
+
# runtime check for subclass; pyright can't verify this at compile time
|
|
59
|
+
if not issubclass(adapter_class, ResourceAdapter): # type: ignore[reportUnnecessaryIsInstance]
|
|
60
|
+
raise ValueError(f"{adapter_class} must be a subclass of ResourceAdapter")
|
|
61
|
+
self._adapters[name] = adapter_class
|
|
62
|
+
|
|
63
|
+
def get(self, name: str, **kwargs: Any) -> ResourceAdapter:
|
|
64
|
+
"""Get adapter instance by name.
|
|
65
|
+
|
|
66
|
+
Parameters
|
|
67
|
+
----------
|
|
68
|
+
name : str
|
|
69
|
+
Adapter name (must be registered).
|
|
70
|
+
**kwargs : Any
|
|
71
|
+
Arguments passed to adapter constructor.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
ResourceAdapter
|
|
76
|
+
Adapter instance.
|
|
77
|
+
|
|
78
|
+
Raises
|
|
79
|
+
------
|
|
80
|
+
KeyError
|
|
81
|
+
If adapter name is not registered.
|
|
82
|
+
|
|
83
|
+
Examples
|
|
84
|
+
--------
|
|
85
|
+
>>> from bead.resources.adapters.glazing import GlazingAdapter
|
|
86
|
+
>>> registry = AdapterRegistry()
|
|
87
|
+
>>> registry.register("glazing", GlazingAdapter)
|
|
88
|
+
>>> adapter = registry.get("glazing", resource="verbnet")
|
|
89
|
+
>>> adapter.resource
|
|
90
|
+
'verbnet'
|
|
91
|
+
"""
|
|
92
|
+
if name not in self._adapters:
|
|
93
|
+
raise KeyError(
|
|
94
|
+
f"Adapter '{name}' not registered. Available: {self.list_available()}"
|
|
95
|
+
)
|
|
96
|
+
adapter_class = self._adapters[name]
|
|
97
|
+
return adapter_class(**kwargs)
|
|
98
|
+
|
|
99
|
+
def list_available(self) -> list[str]:
|
|
100
|
+
"""List names of available adapters.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
list[str]
|
|
105
|
+
Sorted list of registered adapter names.
|
|
106
|
+
|
|
107
|
+
Examples
|
|
108
|
+
--------
|
|
109
|
+
>>> registry = AdapterRegistry()
|
|
110
|
+
>>> registry.list_available()
|
|
111
|
+
[]
|
|
112
|
+
>>> from bead.resources.adapters.glazing import GlazingAdapter
|
|
113
|
+
>>> registry.register("glazing", GlazingAdapter)
|
|
114
|
+
>>> registry.list_available()
|
|
115
|
+
['glazing']
|
|
116
|
+
"""
|
|
117
|
+
return sorted(self._adapters.keys())
|