bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
"""Abstract base class for mapping external frame inventories to Templates.
|
|
2
|
+
|
|
3
|
+
This module provides language-agnostic base classes for generating Template
|
|
4
|
+
objects from external linguistic frame inventories (e.g., VerbNet, FrameNet,
|
|
5
|
+
PropBank, valency lexicons).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from bead.resources.constraints import Constraint
|
|
14
|
+
from bead.resources.template import Slot, Template
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class FrameToTemplateMapper(ABC):
|
|
18
|
+
"""Abstract base class for mapping frame inventories to Templates.
|
|
19
|
+
|
|
20
|
+
This class provides a framework for generating Template objects from
|
|
21
|
+
external linguistic frame data. Subclasses implement language- and
|
|
22
|
+
resource-specific mapping logic.
|
|
23
|
+
|
|
24
|
+
Examples
|
|
25
|
+
--------
|
|
26
|
+
Implementing a VerbNet mapper:
|
|
27
|
+
>>> class VerbNetMapper(FrameToTemplateMapper):
|
|
28
|
+
... def generate_from_frame(self, verb_lemma, frame_data):
|
|
29
|
+
... slots = self.map_frame_to_slots(frame_data)
|
|
30
|
+
... constraints = self.generate_constraints(frame_data, slots)
|
|
31
|
+
... return Template(
|
|
32
|
+
... name=f"{verb_lemma}_{frame_data['id']}",
|
|
33
|
+
... template_string=frame_data['template_string'],
|
|
34
|
+
... slots=slots,
|
|
35
|
+
... constraints=constraints
|
|
36
|
+
... )
|
|
37
|
+
...
|
|
38
|
+
... def map_frame_to_slots(self, frame_data):
|
|
39
|
+
... # Extract slots from VerbNet syntax
|
|
40
|
+
... return {}
|
|
41
|
+
...
|
|
42
|
+
... def generate_constraints(self, frame_data, slots):
|
|
43
|
+
... # Generate constraints from VerbNet restrictions
|
|
44
|
+
... return []
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
@abstractmethod
|
|
48
|
+
def generate_from_frame(
|
|
49
|
+
self,
|
|
50
|
+
*args: Any,
|
|
51
|
+
**kwargs: Any,
|
|
52
|
+
) -> Template | list[Template]:
|
|
53
|
+
"""Generate Template(s) from a frame specification.
|
|
54
|
+
|
|
55
|
+
This is the main entry point for template generation. Subclasses
|
|
56
|
+
implement the specific logic for their frame inventory.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
*args : Any
|
|
61
|
+
Positional arguments (frame data, identifiers, etc.).
|
|
62
|
+
**kwargs : Any
|
|
63
|
+
Keyword arguments (configuration options, etc.).
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
Template | list[Template]
|
|
68
|
+
Generated template(s). May return multiple templates if the
|
|
69
|
+
frame has multiple realizations (e.g., different complementizer
|
|
70
|
+
types, alternations).
|
|
71
|
+
|
|
72
|
+
Examples
|
|
73
|
+
--------
|
|
74
|
+
VerbNet implementation:
|
|
75
|
+
>>> mapper.generate_from_frame(
|
|
76
|
+
... verb_lemma="think",
|
|
77
|
+
... verbnet_class="29.9",
|
|
78
|
+
... frame_data={"primary": "NP V that S"}
|
|
79
|
+
... ) # doctest: +SKIP
|
|
80
|
+
"""
|
|
81
|
+
...
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def map_frame_to_slots(
|
|
85
|
+
self,
|
|
86
|
+
frame_data: Any,
|
|
87
|
+
) -> dict[str, Slot]:
|
|
88
|
+
"""Map frame elements to Template slots.
|
|
89
|
+
|
|
90
|
+
Converts frame-specific element descriptions into Slot objects
|
|
91
|
+
with appropriate constraints.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
frame_data : Any
|
|
96
|
+
Frame specification from the external inventory.
|
|
97
|
+
Type depends on the specific resource (dict, object, etc.).
|
|
98
|
+
|
|
99
|
+
Returns
|
|
100
|
+
-------
|
|
101
|
+
dict[str, Slot]
|
|
102
|
+
Slots keyed by slot name.
|
|
103
|
+
|
|
104
|
+
Examples
|
|
105
|
+
--------
|
|
106
|
+
Mapping VerbNet syntax to slots:
|
|
107
|
+
>>> slots = mapper.map_frame_to_slots({
|
|
108
|
+
... "syntax": [
|
|
109
|
+
... ("NP", "Agent"),
|
|
110
|
+
... ("V", None),
|
|
111
|
+
... ("NP", "Theme")
|
|
112
|
+
... ]
|
|
113
|
+
... }) # doctest: +SKIP
|
|
114
|
+
>>> "subject" in slots
|
|
115
|
+
True
|
|
116
|
+
"""
|
|
117
|
+
...
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
def generate_constraints(
|
|
121
|
+
self,
|
|
122
|
+
frame_data: Any,
|
|
123
|
+
slots: dict[str, Slot],
|
|
124
|
+
) -> list[Constraint]:
|
|
125
|
+
"""Generate multi-slot constraints from frame specifications.
|
|
126
|
+
|
|
127
|
+
Converts frame-specific restrictions into DSL Constraint objects
|
|
128
|
+
that enforce relationships between slots.
|
|
129
|
+
|
|
130
|
+
Parameters
|
|
131
|
+
----------
|
|
132
|
+
frame_data : Any
|
|
133
|
+
Frame specification from the external inventory.
|
|
134
|
+
slots : dict[str, Slot]
|
|
135
|
+
Slots that have been created for this frame.
|
|
136
|
+
|
|
137
|
+
Returns
|
|
138
|
+
-------
|
|
139
|
+
list[Constraint]
|
|
140
|
+
Multi-slot constraints for the template.
|
|
141
|
+
|
|
142
|
+
Examples
|
|
143
|
+
--------
|
|
144
|
+
Generating constraints from VerbNet restrictions:
|
|
145
|
+
>>> constraints = mapper.generate_constraints(
|
|
146
|
+
... frame_data={"restrictions": [...]},
|
|
147
|
+
... slots={"subject": ..., "verb": ...}
|
|
148
|
+
... ) # doctest: +SKIP
|
|
149
|
+
"""
|
|
150
|
+
...
|
|
151
|
+
|
|
152
|
+
def create_template_name(
|
|
153
|
+
self,
|
|
154
|
+
*identifiers: str,
|
|
155
|
+
separator: str = "_",
|
|
156
|
+
) -> str:
|
|
157
|
+
"""Create a unique template name from identifiers.
|
|
158
|
+
|
|
159
|
+
Utility method for generating consistent template names.
|
|
160
|
+
Sanitizes identifiers by replacing spaces, dots, and hyphens.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
*identifiers : str
|
|
165
|
+
Components to include in the name (e.g., verb, class, frame).
|
|
166
|
+
separator : str
|
|
167
|
+
Separator between components (default: "_").
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
str
|
|
172
|
+
Sanitized template name.
|
|
173
|
+
|
|
174
|
+
Examples
|
|
175
|
+
--------
|
|
176
|
+
>>> mapper = ConcreteMapper()
|
|
177
|
+
>>> mapper.create_template_name("think", "29.9", "that-clause")
|
|
178
|
+
'think_29_9_that_clause'
|
|
179
|
+
"""
|
|
180
|
+
# sanitize each identifier
|
|
181
|
+
sanitized: list[str] = []
|
|
182
|
+
for identifier in identifiers:
|
|
183
|
+
safe: str = (
|
|
184
|
+
identifier.replace(" ", separator)
|
|
185
|
+
.replace(".", separator)
|
|
186
|
+
.replace("-", separator)
|
|
187
|
+
)
|
|
188
|
+
sanitized.append(safe)
|
|
189
|
+
|
|
190
|
+
return separator.join(sanitized)
|
|
191
|
+
|
|
192
|
+
def create_template_metadata(
|
|
193
|
+
self,
|
|
194
|
+
frame_data: dict[str, Any],
|
|
195
|
+
**additional_metadata: Any,
|
|
196
|
+
) -> dict[str, Any]:
|
|
197
|
+
"""Create metadata dictionary for template.
|
|
198
|
+
|
|
199
|
+
Utility method for extracting and organizing frame metadata.
|
|
200
|
+
Subclasses can override to add resource-specific metadata.
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
frame_data : dict[str, Any]
|
|
205
|
+
Frame specification from the external inventory.
|
|
206
|
+
**additional_metadata : Any
|
|
207
|
+
Additional metadata to include.
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
dict[str, Any]
|
|
212
|
+
Metadata dictionary for Template.metadata field.
|
|
213
|
+
|
|
214
|
+
Examples
|
|
215
|
+
--------
|
|
216
|
+
>>> mapper = ConcreteMapper()
|
|
217
|
+
>>> metadata = mapper.create_template_metadata(
|
|
218
|
+
... frame_data={"id": "29.9-1", "examples": [...]},
|
|
219
|
+
... verb_lemma="think"
|
|
220
|
+
... ) # doctest: +SKIP
|
|
221
|
+
"""
|
|
222
|
+
metadata: dict[str, Any] = {}
|
|
223
|
+
|
|
224
|
+
# add frame data
|
|
225
|
+
metadata.update(frame_data)
|
|
226
|
+
|
|
227
|
+
# add additional metadata
|
|
228
|
+
metadata.update(additional_metadata)
|
|
229
|
+
|
|
230
|
+
return metadata
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class MultiFrameMapper(FrameToTemplateMapper):
|
|
234
|
+
"""Mapper that generates multiple template variants from a single frame.
|
|
235
|
+
|
|
236
|
+
Some frame specifications support multiple realizations (e.g., different
|
|
237
|
+
complementizer types, voice alternations). This class provides a framework
|
|
238
|
+
for generating all variants.
|
|
239
|
+
|
|
240
|
+
Examples
|
|
241
|
+
--------
|
|
242
|
+
>>> class ClausalMapper(MultiFrameMapper):
|
|
243
|
+
... def get_frame_variants(self, frame_data):
|
|
244
|
+
... # Return list of variant specifications
|
|
245
|
+
... return [
|
|
246
|
+
... {"comp": "that", "mood": "declarative"},
|
|
247
|
+
... {"comp": "whether", "mood": "interrogative"},
|
|
248
|
+
... ]
|
|
249
|
+
...
|
|
250
|
+
... def generate_from_frame(self, verb, frame_data):
|
|
251
|
+
... variants = self.get_frame_variants(frame_data)
|
|
252
|
+
... return [self._generate_variant(verb, v) for v in variants]
|
|
253
|
+
...
|
|
254
|
+
... def map_frame_to_slots(self, frame_data):
|
|
255
|
+
... return {}
|
|
256
|
+
...
|
|
257
|
+
... def generate_constraints(self, frame_data, slots):
|
|
258
|
+
... return []
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
@abstractmethod
|
|
262
|
+
def get_frame_variants(
|
|
263
|
+
self,
|
|
264
|
+
frame_data: Any,
|
|
265
|
+
) -> list[Any]:
|
|
266
|
+
"""Extract all variants from frame specification.
|
|
267
|
+
|
|
268
|
+
Parameters
|
|
269
|
+
----------
|
|
270
|
+
frame_data : Any
|
|
271
|
+
Frame specification from the external inventory.
|
|
272
|
+
|
|
273
|
+
Returns
|
|
274
|
+
-------
|
|
275
|
+
list[Any]
|
|
276
|
+
List of variant specifications, each representing one possible
|
|
277
|
+
realization of the frame.
|
|
278
|
+
|
|
279
|
+
Examples
|
|
280
|
+
--------
|
|
281
|
+
>>> variants = mapper.get_frame_variants({
|
|
282
|
+
... "complementizers": ["that", "whether", "if"]
|
|
283
|
+
... }) # doctest: +SKIP
|
|
284
|
+
>>> len(variants)
|
|
285
|
+
3
|
|
286
|
+
"""
|
|
287
|
+
...
|
|
288
|
+
|
|
289
|
+
def generate_from_frame(
|
|
290
|
+
self,
|
|
291
|
+
*args: Any,
|
|
292
|
+
**kwargs: Any,
|
|
293
|
+
) -> list[Template]:
|
|
294
|
+
"""Generate templates for all frame variants.
|
|
295
|
+
|
|
296
|
+
Default implementation calls get_frame_variants() and generates
|
|
297
|
+
a template for each variant. Subclasses can override for custom logic.
|
|
298
|
+
|
|
299
|
+
Parameters
|
|
300
|
+
----------
|
|
301
|
+
*args : Any
|
|
302
|
+
Positional arguments passed to variant generation.
|
|
303
|
+
**kwargs : Any
|
|
304
|
+
Keyword arguments passed to variant generation.
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
list[Template]
|
|
309
|
+
Templates for all variants.
|
|
310
|
+
"""
|
|
311
|
+
# extract frame_data from kwargs
|
|
312
|
+
frame_data = kwargs.get("frame_data")
|
|
313
|
+
if frame_data is None:
|
|
314
|
+
raise ValueError("frame_data must be provided in kwargs")
|
|
315
|
+
|
|
316
|
+
variants = self.get_frame_variants(frame_data)
|
|
317
|
+
|
|
318
|
+
templates: list[Template] = []
|
|
319
|
+
for variant in variants:
|
|
320
|
+
# create a modified kwargs with variant info
|
|
321
|
+
variant_kwargs = kwargs.copy()
|
|
322
|
+
variant_kwargs["variant_data"] = variant
|
|
323
|
+
|
|
324
|
+
template: Template = self._generate_variant(*args, **variant_kwargs)
|
|
325
|
+
templates.append(template)
|
|
326
|
+
|
|
327
|
+
return templates
|
|
328
|
+
|
|
329
|
+
@abstractmethod
|
|
330
|
+
def _generate_variant(
|
|
331
|
+
self,
|
|
332
|
+
*args: Any,
|
|
333
|
+
**kwargs: Any,
|
|
334
|
+
) -> Template:
|
|
335
|
+
"""Generate template for a single variant.
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
*args : Any
|
|
340
|
+
Positional arguments.
|
|
341
|
+
**kwargs : Any
|
|
342
|
+
Keyword arguments, including variant_data.
|
|
343
|
+
|
|
344
|
+
Returns
|
|
345
|
+
-------
|
|
346
|
+
Template
|
|
347
|
+
Template for this variant.
|
|
348
|
+
"""
|
|
349
|
+
...
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Simulation framework for generating synthetic human judgments.
|
|
2
|
+
|
|
3
|
+
Provides annotators, noise models, and strategies for testing active
|
|
4
|
+
learning pipelines without real human data.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from bead.simulation.annotators.base import SimulatedAnnotator
|
|
8
|
+
from bead.simulation.annotators.lm_based import LMBasedAnnotator
|
|
9
|
+
from bead.simulation.noise_models.base import NoiseModel
|
|
10
|
+
from bead.simulation.noise_models.temperature import TemperatureNoiseModel
|
|
11
|
+
from bead.simulation.runner import SimulationRunner
|
|
12
|
+
from bead.simulation.strategies.base import SimulationStrategy
|
|
13
|
+
from bead.simulation.strategies.binary import BinaryStrategy
|
|
14
|
+
from bead.simulation.strategies.categorical import CategoricalStrategy
|
|
15
|
+
from bead.simulation.strategies.forced_choice import ForcedChoiceStrategy
|
|
16
|
+
from bead.simulation.strategies.ordinal_scale import OrdinalScaleStrategy
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"SimulatedAnnotator",
|
|
20
|
+
"LMBasedAnnotator",
|
|
21
|
+
"NoiseModel",
|
|
22
|
+
"TemperatureNoiseModel",
|
|
23
|
+
"SimulationRunner",
|
|
24
|
+
"SimulationStrategy",
|
|
25
|
+
"BinaryStrategy",
|
|
26
|
+
"CategoricalStrategy",
|
|
27
|
+
"ForcedChoiceStrategy",
|
|
28
|
+
"OrdinalScaleStrategy",
|
|
29
|
+
]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Simulated annotators for generating synthetic judgments."""
|
|
2
|
+
|
|
3
|
+
from bead.simulation.annotators.base import SimulatedAnnotator
|
|
4
|
+
from bead.simulation.annotators.distance_based import DistanceBasedAnnotator
|
|
5
|
+
from bead.simulation.annotators.lm_based import LMBasedAnnotator
|
|
6
|
+
from bead.simulation.annotators.oracle import OracleAnnotator
|
|
7
|
+
from bead.simulation.annotators.random import RandomAnnotator
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"SimulatedAnnotator",
|
|
11
|
+
"DistanceBasedAnnotator",
|
|
12
|
+
"LMBasedAnnotator",
|
|
13
|
+
"OracleAnnotator",
|
|
14
|
+
"RandomAnnotator",
|
|
15
|
+
]
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""Base class for simulated annotators."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from bead.config.simulation import SimulatedAnnotatorConfig
|
|
12
|
+
from bead.items.item import Item
|
|
13
|
+
from bead.items.item_template import ItemTemplate
|
|
14
|
+
from bead.simulation.noise_models.base import NoiseModel
|
|
15
|
+
from bead.simulation.strategies.base import SimulationStrategy
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SimulatedAnnotator(ABC):
|
|
19
|
+
"""Abstract base for simulated annotators.
|
|
20
|
+
|
|
21
|
+
An annotator combines:
|
|
22
|
+
- Task-specific strategy (how to respond to each task type)
|
|
23
|
+
- Noise model (how to add human-like variability)
|
|
24
|
+
- Configuration (model output keys, random seed, etc.)
|
|
25
|
+
|
|
26
|
+
The annotator orchestrates the simulation process and provides
|
|
27
|
+
a unified interface for generating judgments.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
config
|
|
32
|
+
Configuration for annotator.
|
|
33
|
+
random_state
|
|
34
|
+
Random seed (overrides config if provided).
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self, config: SimulatedAnnotatorConfig, random_state: int | None = None
|
|
39
|
+
) -> None:
|
|
40
|
+
self.config = config
|
|
41
|
+
self.random_state = random_state or config.random_state
|
|
42
|
+
self.rng = np.random.RandomState(self.random_state)
|
|
43
|
+
|
|
44
|
+
# will be set by subclasses
|
|
45
|
+
self.strategies: dict[str, SimulationStrategy] = {}
|
|
46
|
+
self.noise_model: NoiseModel | None = None
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_config(cls, config: SimulatedAnnotatorConfig) -> SimulatedAnnotator:
|
|
50
|
+
"""Create annotator from configuration.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
config : SimulatedAnnotatorConfig
|
|
55
|
+
Configuration specifying annotator type and parameters.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
SimulatedAnnotator
|
|
60
|
+
Configured annotator instance.
|
|
61
|
+
|
|
62
|
+
Raises
|
|
63
|
+
------
|
|
64
|
+
ValueError
|
|
65
|
+
If strategy is unknown.
|
|
66
|
+
|
|
67
|
+
Examples
|
|
68
|
+
--------
|
|
69
|
+
>>> from bead.config.simulation import SimulatedAnnotatorConfig
|
|
70
|
+
>>> config = SimulatedAnnotatorConfig(strategy="lm_score")
|
|
71
|
+
>>> annotator = SimulatedAnnotator.from_config(config)
|
|
72
|
+
"""
|
|
73
|
+
# import here to avoid circular dependency
|
|
74
|
+
from bead.simulation.annotators.distance_based import ( # noqa: PLC0415
|
|
75
|
+
DistanceBasedAnnotator,
|
|
76
|
+
)
|
|
77
|
+
from bead.simulation.annotators.lm_based import ( # noqa: PLC0415
|
|
78
|
+
LMBasedAnnotator,
|
|
79
|
+
)
|
|
80
|
+
from bead.simulation.annotators.oracle import OracleAnnotator # noqa: PLC0415
|
|
81
|
+
from bead.simulation.annotators.random import RandomAnnotator # noqa: PLC0415
|
|
82
|
+
|
|
83
|
+
if config.strategy == "lm_score":
|
|
84
|
+
return LMBasedAnnotator(config)
|
|
85
|
+
elif config.strategy == "random":
|
|
86
|
+
return RandomAnnotator(config)
|
|
87
|
+
elif config.strategy == "oracle":
|
|
88
|
+
return OracleAnnotator(config)
|
|
89
|
+
elif config.strategy == "distance":
|
|
90
|
+
return DistanceBasedAnnotator(config)
|
|
91
|
+
else:
|
|
92
|
+
msg = f"Unknown strategy: {config.strategy}"
|
|
93
|
+
raise ValueError(msg)
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def annotate(
|
|
97
|
+
self, item: Item, item_template: ItemTemplate
|
|
98
|
+
) -> str | int | float | list[str]:
|
|
99
|
+
"""Generate annotation for single item.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
item : Item
|
|
104
|
+
Item to annotate.
|
|
105
|
+
item_template : ItemTemplate
|
|
106
|
+
Template defining task structure.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
str | int | float | list[str]
|
|
111
|
+
Annotation (format depends on task type).
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
def annotate_batch(
|
|
115
|
+
self,
|
|
116
|
+
items: list[Item],
|
|
117
|
+
item_templates: list[ItemTemplate] | ItemTemplate,
|
|
118
|
+
) -> dict[str, str | int | float | list[str]]:
|
|
119
|
+
"""Generate annotations for batch of items.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
items : list[Item]
|
|
124
|
+
Items to annotate.
|
|
125
|
+
item_templates : list[ItemTemplate] | ItemTemplate
|
|
126
|
+
Templates (one per item or single template for all).
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
dict[str, str | int | float | list[str]]
|
|
131
|
+
Mapping from item ID to annotation.
|
|
132
|
+
|
|
133
|
+
Examples
|
|
134
|
+
--------
|
|
135
|
+
>>> annotations = annotator.annotate_batch(items, template)
|
|
136
|
+
>>> annotations[str(items[0].id)]
|
|
137
|
+
'option_a'
|
|
138
|
+
"""
|
|
139
|
+
# handle single template
|
|
140
|
+
templates_list: list[ItemTemplate]
|
|
141
|
+
if not isinstance(item_templates, list):
|
|
142
|
+
templates_list = [item_templates] * len(items)
|
|
143
|
+
else:
|
|
144
|
+
templates_list = item_templates
|
|
145
|
+
|
|
146
|
+
# annotate each item
|
|
147
|
+
annotations: dict[str, str | int | float | list[str]] = {}
|
|
148
|
+
for item, template in zip(items, templates_list, strict=True):
|
|
149
|
+
annotation = self.annotate(item, template)
|
|
150
|
+
annotations[str(item.id)] = annotation
|
|
151
|
+
|
|
152
|
+
return annotations
|
|
153
|
+
|
|
154
|
+
def get_strategy(self, task_type: str) -> SimulationStrategy:
|
|
155
|
+
"""Get strategy for task type.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
task_type : str
|
|
160
|
+
Task type (e.g., "forced_choice").
|
|
161
|
+
|
|
162
|
+
Returns
|
|
163
|
+
-------
|
|
164
|
+
SimulationStrategy
|
|
165
|
+
Strategy for this task type.
|
|
166
|
+
|
|
167
|
+
Raises
|
|
168
|
+
------
|
|
169
|
+
ValueError
|
|
170
|
+
If task type not supported.
|
|
171
|
+
"""
|
|
172
|
+
if task_type not in self.strategies:
|
|
173
|
+
msg = f"Task type '{task_type}' not supported by {self.__class__.__name__}"
|
|
174
|
+
raise ValueError(msg)
|
|
175
|
+
return self.strategies[task_type]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Distance-based annotator using embeddings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from bead.simulation.annotators.base import SimulatedAnnotator
|
|
8
|
+
from bead.simulation.strategies.binary import BinaryStrategy
|
|
9
|
+
from bead.simulation.strategies.categorical import CategoricalStrategy
|
|
10
|
+
from bead.simulation.strategies.cloze import ClozeStrategy
|
|
11
|
+
from bead.simulation.strategies.forced_choice import ForcedChoiceStrategy
|
|
12
|
+
from bead.simulation.strategies.free_text import FreeTextStrategy
|
|
13
|
+
from bead.simulation.strategies.magnitude import MagnitudeStrategy
|
|
14
|
+
from bead.simulation.strategies.multi_select import MultiSelectStrategy
|
|
15
|
+
from bead.simulation.strategies.ordinal_scale import OrdinalScaleStrategy
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from bead.config.simulation import SimulatedAnnotatorConfig
|
|
19
|
+
from bead.items.item import Item
|
|
20
|
+
from bead.items.item_template import ItemTemplate
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DistanceBasedAnnotator(SimulatedAnnotator):
|
|
24
|
+
"""Annotator using embedding distances for decisions.
|
|
25
|
+
|
|
26
|
+
Uses embeddings from Item.model_outputs to compute similarity/distance
|
|
27
|
+
metrics, then makes decisions based on those distances.
|
|
28
|
+
|
|
29
|
+
For forced choice, selects option with lowest distance (highest similarity).
|
|
30
|
+
For ordinal scales, maps distance to scale values.
|
|
31
|
+
For binary, thresholds distance.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
config
|
|
36
|
+
Configuration for annotator.
|
|
37
|
+
|
|
38
|
+
Examples
|
|
39
|
+
--------
|
|
40
|
+
>>> from bead.config.simulation import SimulatedAnnotatorConfig, NoiseModelConfig
|
|
41
|
+
>>> config = SimulatedAnnotatorConfig(
|
|
42
|
+
... strategy="distance",
|
|
43
|
+
... model_output_key="embedding",
|
|
44
|
+
... noise_model=NoiseModelConfig(noise_type="none")
|
|
45
|
+
... )
|
|
46
|
+
>>> annotator = DistanceBasedAnnotator(config)
|
|
47
|
+
>>> # judgment = annotator.annotate(item, template)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, config: SimulatedAnnotatorConfig) -> None:
|
|
51
|
+
super().__init__(config)
|
|
52
|
+
|
|
53
|
+
# initialize strategies for different task types;
|
|
54
|
+
# use same strategies as LM-based, but extract embeddings instead of LM scores
|
|
55
|
+
self.strategies = {
|
|
56
|
+
"forced_choice": ForcedChoiceStrategy(),
|
|
57
|
+
"binary": BinaryStrategy(),
|
|
58
|
+
"ordinal_scale": OrdinalScaleStrategy(),
|
|
59
|
+
"categorical": CategoricalStrategy(),
|
|
60
|
+
"magnitude": MagnitudeStrategy(),
|
|
61
|
+
"multi_select": MultiSelectStrategy(),
|
|
62
|
+
"free_text": FreeTextStrategy(),
|
|
63
|
+
"cloze": ClozeStrategy(),
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# initialize noise model if configured
|
|
67
|
+
if config.noise_model.noise_type == "temperature":
|
|
68
|
+
from bead.simulation.noise_models.temperature import ( # noqa: PLC0415
|
|
69
|
+
TemperatureNoiseModel,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
self.noise_model = TemperatureNoiseModel(
|
|
73
|
+
temperature=config.noise_model.temperature
|
|
74
|
+
)
|
|
75
|
+
elif config.noise_model.noise_type == "none":
|
|
76
|
+
self.noise_model = None
|
|
77
|
+
else:
|
|
78
|
+
# default: no noise
|
|
79
|
+
self.noise_model = None
|
|
80
|
+
|
|
81
|
+
def annotate(
|
|
82
|
+
self, item: Item, item_template: ItemTemplate
|
|
83
|
+
) -> str | int | float | bool | list[str]:
|
|
84
|
+
"""Generate annotation using embedding distances.
|
|
85
|
+
|
|
86
|
+
Parameters
|
|
87
|
+
----------
|
|
88
|
+
item : Item
|
|
89
|
+
Item to annotate.
|
|
90
|
+
item_template : ItemTemplate
|
|
91
|
+
Template defining task.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
str | int | float | bool | list[str]
|
|
96
|
+
Annotation (format depends on task type).
|
|
97
|
+
|
|
98
|
+
Notes
|
|
99
|
+
-----
|
|
100
|
+
For distance-based decisions, we convert embeddings to scores:
|
|
101
|
+
- Cosine similarity ranges from -1 (opposite) to 1 (identical)
|
|
102
|
+
- We convert to "score" by: score = similarity * 10
|
|
103
|
+
- This allows reuse of existing strategies
|
|
104
|
+
"""
|
|
105
|
+
# get strategy for task type
|
|
106
|
+
strategy = self.get_strategy(item_template.task_type)
|
|
107
|
+
|
|
108
|
+
# validate item
|
|
109
|
+
strategy.validate_item(item, item_template)
|
|
110
|
+
|
|
111
|
+
# for distance-based, we need to convert embeddings to scores;
|
|
112
|
+
# this is a simplified approach that relies on strategies to extract
|
|
113
|
+
# embeddings and treat them as scores (strategies will use model_output_key)
|
|
114
|
+
|
|
115
|
+
# generate base response
|
|
116
|
+
response = strategy.simulate_response(
|
|
117
|
+
item=item,
|
|
118
|
+
item_template=item_template,
|
|
119
|
+
model_output_key=self.config.model_output_key,
|
|
120
|
+
rng=self.rng,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# apply noise model if configured
|
|
124
|
+
if self.noise_model is not None:
|
|
125
|
+
response = self.noise_model.apply(
|
|
126
|
+
value=response,
|
|
127
|
+
context={
|
|
128
|
+
"item": item,
|
|
129
|
+
"template": item_template,
|
|
130
|
+
"strategy": strategy,
|
|
131
|
+
},
|
|
132
|
+
rng=self.rng,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
return response
|