bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,349 @@
1
+ """Abstract base class for mapping external frame inventories to Templates.
2
+
3
+ This module provides language-agnostic base classes for generating Template
4
+ objects from external linguistic frame inventories (e.g., VerbNet, FrameNet,
5
+ PropBank, valency lexicons).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from abc import ABC, abstractmethod
11
+ from typing import Any
12
+
13
+ from bead.resources.constraints import Constraint
14
+ from bead.resources.template import Slot, Template
15
+
16
+
17
+ class FrameToTemplateMapper(ABC):
18
+ """Abstract base class for mapping frame inventories to Templates.
19
+
20
+ This class provides a framework for generating Template objects from
21
+ external linguistic frame data. Subclasses implement language- and
22
+ resource-specific mapping logic.
23
+
24
+ Examples
25
+ --------
26
+ Implementing a VerbNet mapper:
27
+ >>> class VerbNetMapper(FrameToTemplateMapper):
28
+ ... def generate_from_frame(self, verb_lemma, frame_data):
29
+ ... slots = self.map_frame_to_slots(frame_data)
30
+ ... constraints = self.generate_constraints(frame_data, slots)
31
+ ... return Template(
32
+ ... name=f"{verb_lemma}_{frame_data['id']}",
33
+ ... template_string=frame_data['template_string'],
34
+ ... slots=slots,
35
+ ... constraints=constraints
36
+ ... )
37
+ ...
38
+ ... def map_frame_to_slots(self, frame_data):
39
+ ... # Extract slots from VerbNet syntax
40
+ ... return {}
41
+ ...
42
+ ... def generate_constraints(self, frame_data, slots):
43
+ ... # Generate constraints from VerbNet restrictions
44
+ ... return []
45
+ """
46
+
47
+ @abstractmethod
48
+ def generate_from_frame(
49
+ self,
50
+ *args: Any,
51
+ **kwargs: Any,
52
+ ) -> Template | list[Template]:
53
+ """Generate Template(s) from a frame specification.
54
+
55
+ This is the main entry point for template generation. Subclasses
56
+ implement the specific logic for their frame inventory.
57
+
58
+ Parameters
59
+ ----------
60
+ *args : Any
61
+ Positional arguments (frame data, identifiers, etc.).
62
+ **kwargs : Any
63
+ Keyword arguments (configuration options, etc.).
64
+
65
+ Returns
66
+ -------
67
+ Template | list[Template]
68
+ Generated template(s). May return multiple templates if the
69
+ frame has multiple realizations (e.g., different complementizer
70
+ types, alternations).
71
+
72
+ Examples
73
+ --------
74
+ VerbNet implementation:
75
+ >>> mapper.generate_from_frame(
76
+ ... verb_lemma="think",
77
+ ... verbnet_class="29.9",
78
+ ... frame_data={"primary": "NP V that S"}
79
+ ... ) # doctest: +SKIP
80
+ """
81
+ ...
82
+
83
+ @abstractmethod
84
+ def map_frame_to_slots(
85
+ self,
86
+ frame_data: Any,
87
+ ) -> dict[str, Slot]:
88
+ """Map frame elements to Template slots.
89
+
90
+ Converts frame-specific element descriptions into Slot objects
91
+ with appropriate constraints.
92
+
93
+ Parameters
94
+ ----------
95
+ frame_data : Any
96
+ Frame specification from the external inventory.
97
+ Type depends on the specific resource (dict, object, etc.).
98
+
99
+ Returns
100
+ -------
101
+ dict[str, Slot]
102
+ Slots keyed by slot name.
103
+
104
+ Examples
105
+ --------
106
+ Mapping VerbNet syntax to slots:
107
+ >>> slots = mapper.map_frame_to_slots({
108
+ ... "syntax": [
109
+ ... ("NP", "Agent"),
110
+ ... ("V", None),
111
+ ... ("NP", "Theme")
112
+ ... ]
113
+ ... }) # doctest: +SKIP
114
+ >>> "subject" in slots
115
+ True
116
+ """
117
+ ...
118
+
119
+ @abstractmethod
120
+ def generate_constraints(
121
+ self,
122
+ frame_data: Any,
123
+ slots: dict[str, Slot],
124
+ ) -> list[Constraint]:
125
+ """Generate multi-slot constraints from frame specifications.
126
+
127
+ Converts frame-specific restrictions into DSL Constraint objects
128
+ that enforce relationships between slots.
129
+
130
+ Parameters
131
+ ----------
132
+ frame_data : Any
133
+ Frame specification from the external inventory.
134
+ slots : dict[str, Slot]
135
+ Slots that have been created for this frame.
136
+
137
+ Returns
138
+ -------
139
+ list[Constraint]
140
+ Multi-slot constraints for the template.
141
+
142
+ Examples
143
+ --------
144
+ Generating constraints from VerbNet restrictions:
145
+ >>> constraints = mapper.generate_constraints(
146
+ ... frame_data={"restrictions": [...]},
147
+ ... slots={"subject": ..., "verb": ...}
148
+ ... ) # doctest: +SKIP
149
+ """
150
+ ...
151
+
152
+ def create_template_name(
153
+ self,
154
+ *identifiers: str,
155
+ separator: str = "_",
156
+ ) -> str:
157
+ """Create a unique template name from identifiers.
158
+
159
+ Utility method for generating consistent template names.
160
+ Sanitizes identifiers by replacing spaces, dots, and hyphens.
161
+
162
+ Parameters
163
+ ----------
164
+ *identifiers : str
165
+ Components to include in the name (e.g., verb, class, frame).
166
+ separator : str
167
+ Separator between components (default: "_").
168
+
169
+ Returns
170
+ -------
171
+ str
172
+ Sanitized template name.
173
+
174
+ Examples
175
+ --------
176
+ >>> mapper = ConcreteMapper()
177
+ >>> mapper.create_template_name("think", "29.9", "that-clause")
178
+ 'think_29_9_that_clause'
179
+ """
180
+ # sanitize each identifier
181
+ sanitized: list[str] = []
182
+ for identifier in identifiers:
183
+ safe: str = (
184
+ identifier.replace(" ", separator)
185
+ .replace(".", separator)
186
+ .replace("-", separator)
187
+ )
188
+ sanitized.append(safe)
189
+
190
+ return separator.join(sanitized)
191
+
192
+ def create_template_metadata(
193
+ self,
194
+ frame_data: dict[str, Any],
195
+ **additional_metadata: Any,
196
+ ) -> dict[str, Any]:
197
+ """Create metadata dictionary for template.
198
+
199
+ Utility method for extracting and organizing frame metadata.
200
+ Subclasses can override to add resource-specific metadata.
201
+
202
+ Parameters
203
+ ----------
204
+ frame_data : dict[str, Any]
205
+ Frame specification from the external inventory.
206
+ **additional_metadata : Any
207
+ Additional metadata to include.
208
+
209
+ Returns
210
+ -------
211
+ dict[str, Any]
212
+ Metadata dictionary for Template.metadata field.
213
+
214
+ Examples
215
+ --------
216
+ >>> mapper = ConcreteMapper()
217
+ >>> metadata = mapper.create_template_metadata(
218
+ ... frame_data={"id": "29.9-1", "examples": [...]},
219
+ ... verb_lemma="think"
220
+ ... ) # doctest: +SKIP
221
+ """
222
+ metadata: dict[str, Any] = {}
223
+
224
+ # add frame data
225
+ metadata.update(frame_data)
226
+
227
+ # add additional metadata
228
+ metadata.update(additional_metadata)
229
+
230
+ return metadata
231
+
232
+
233
+ class MultiFrameMapper(FrameToTemplateMapper):
234
+ """Mapper that generates multiple template variants from a single frame.
235
+
236
+ Some frame specifications support multiple realizations (e.g., different
237
+ complementizer types, voice alternations). This class provides a framework
238
+ for generating all variants.
239
+
240
+ Examples
241
+ --------
242
+ >>> class ClausalMapper(MultiFrameMapper):
243
+ ... def get_frame_variants(self, frame_data):
244
+ ... # Return list of variant specifications
245
+ ... return [
246
+ ... {"comp": "that", "mood": "declarative"},
247
+ ... {"comp": "whether", "mood": "interrogative"},
248
+ ... ]
249
+ ...
250
+ ... def generate_from_frame(self, verb, frame_data):
251
+ ... variants = self.get_frame_variants(frame_data)
252
+ ... return [self._generate_variant(verb, v) for v in variants]
253
+ ...
254
+ ... def map_frame_to_slots(self, frame_data):
255
+ ... return {}
256
+ ...
257
+ ... def generate_constraints(self, frame_data, slots):
258
+ ... return []
259
+ """
260
+
261
+ @abstractmethod
262
+ def get_frame_variants(
263
+ self,
264
+ frame_data: Any,
265
+ ) -> list[Any]:
266
+ """Extract all variants from frame specification.
267
+
268
+ Parameters
269
+ ----------
270
+ frame_data : Any
271
+ Frame specification from the external inventory.
272
+
273
+ Returns
274
+ -------
275
+ list[Any]
276
+ List of variant specifications, each representing one possible
277
+ realization of the frame.
278
+
279
+ Examples
280
+ --------
281
+ >>> variants = mapper.get_frame_variants({
282
+ ... "complementizers": ["that", "whether", "if"]
283
+ ... }) # doctest: +SKIP
284
+ >>> len(variants)
285
+ 3
286
+ """
287
+ ...
288
+
289
+ def generate_from_frame(
290
+ self,
291
+ *args: Any,
292
+ **kwargs: Any,
293
+ ) -> list[Template]:
294
+ """Generate templates for all frame variants.
295
+
296
+ Default implementation calls get_frame_variants() and generates
297
+ a template for each variant. Subclasses can override for custom logic.
298
+
299
+ Parameters
300
+ ----------
301
+ *args : Any
302
+ Positional arguments passed to variant generation.
303
+ **kwargs : Any
304
+ Keyword arguments passed to variant generation.
305
+
306
+ Returns
307
+ -------
308
+ list[Template]
309
+ Templates for all variants.
310
+ """
311
+ # extract frame_data from kwargs
312
+ frame_data = kwargs.get("frame_data")
313
+ if frame_data is None:
314
+ raise ValueError("frame_data must be provided in kwargs")
315
+
316
+ variants = self.get_frame_variants(frame_data)
317
+
318
+ templates: list[Template] = []
319
+ for variant in variants:
320
+ # create a modified kwargs with variant info
321
+ variant_kwargs = kwargs.copy()
322
+ variant_kwargs["variant_data"] = variant
323
+
324
+ template: Template = self._generate_variant(*args, **variant_kwargs)
325
+ templates.append(template)
326
+
327
+ return templates
328
+
329
+ @abstractmethod
330
+ def _generate_variant(
331
+ self,
332
+ *args: Any,
333
+ **kwargs: Any,
334
+ ) -> Template:
335
+ """Generate template for a single variant.
336
+
337
+ Parameters
338
+ ----------
339
+ *args : Any
340
+ Positional arguments.
341
+ **kwargs : Any
342
+ Keyword arguments, including variant_data.
343
+
344
+ Returns
345
+ -------
346
+ Template
347
+ Template for this variant.
348
+ """
349
+ ...
@@ -0,0 +1,29 @@
1
+ """Simulation framework for generating synthetic human judgments.
2
+
3
+ Provides annotators, noise models, and strategies for testing active
4
+ learning pipelines without real human data.
5
+ """
6
+
7
+ from bead.simulation.annotators.base import SimulatedAnnotator
8
+ from bead.simulation.annotators.lm_based import LMBasedAnnotator
9
+ from bead.simulation.noise_models.base import NoiseModel
10
+ from bead.simulation.noise_models.temperature import TemperatureNoiseModel
11
+ from bead.simulation.runner import SimulationRunner
12
+ from bead.simulation.strategies.base import SimulationStrategy
13
+ from bead.simulation.strategies.binary import BinaryStrategy
14
+ from bead.simulation.strategies.categorical import CategoricalStrategy
15
+ from bead.simulation.strategies.forced_choice import ForcedChoiceStrategy
16
+ from bead.simulation.strategies.ordinal_scale import OrdinalScaleStrategy
17
+
18
+ __all__ = [
19
+ "SimulatedAnnotator",
20
+ "LMBasedAnnotator",
21
+ "NoiseModel",
22
+ "TemperatureNoiseModel",
23
+ "SimulationRunner",
24
+ "SimulationStrategy",
25
+ "BinaryStrategy",
26
+ "CategoricalStrategy",
27
+ "ForcedChoiceStrategy",
28
+ "OrdinalScaleStrategy",
29
+ ]
@@ -0,0 +1,15 @@
1
+ """Simulated annotators for generating synthetic judgments."""
2
+
3
+ from bead.simulation.annotators.base import SimulatedAnnotator
4
+ from bead.simulation.annotators.distance_based import DistanceBasedAnnotator
5
+ from bead.simulation.annotators.lm_based import LMBasedAnnotator
6
+ from bead.simulation.annotators.oracle import OracleAnnotator
7
+ from bead.simulation.annotators.random import RandomAnnotator
8
+
9
+ __all__ = [
10
+ "SimulatedAnnotator",
11
+ "DistanceBasedAnnotator",
12
+ "LMBasedAnnotator",
13
+ "OracleAnnotator",
14
+ "RandomAnnotator",
15
+ ]
@@ -0,0 +1,175 @@
1
+ """Base class for simulated annotators."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import TYPE_CHECKING
7
+
8
+ import numpy as np
9
+
10
+ if TYPE_CHECKING:
11
+ from bead.config.simulation import SimulatedAnnotatorConfig
12
+ from bead.items.item import Item
13
+ from bead.items.item_template import ItemTemplate
14
+ from bead.simulation.noise_models.base import NoiseModel
15
+ from bead.simulation.strategies.base import SimulationStrategy
16
+
17
+
18
+ class SimulatedAnnotator(ABC):
19
+ """Abstract base for simulated annotators.
20
+
21
+ An annotator combines:
22
+ - Task-specific strategy (how to respond to each task type)
23
+ - Noise model (how to add human-like variability)
24
+ - Configuration (model output keys, random seed, etc.)
25
+
26
+ The annotator orchestrates the simulation process and provides
27
+ a unified interface for generating judgments.
28
+
29
+ Parameters
30
+ ----------
31
+ config
32
+ Configuration for annotator.
33
+ random_state
34
+ Random seed (overrides config if provided).
35
+ """
36
+
37
+ def __init__(
38
+ self, config: SimulatedAnnotatorConfig, random_state: int | None = None
39
+ ) -> None:
40
+ self.config = config
41
+ self.random_state = random_state or config.random_state
42
+ self.rng = np.random.RandomState(self.random_state)
43
+
44
+ # will be set by subclasses
45
+ self.strategies: dict[str, SimulationStrategy] = {}
46
+ self.noise_model: NoiseModel | None = None
47
+
48
+ @classmethod
49
+ def from_config(cls, config: SimulatedAnnotatorConfig) -> SimulatedAnnotator:
50
+ """Create annotator from configuration.
51
+
52
+ Parameters
53
+ ----------
54
+ config : SimulatedAnnotatorConfig
55
+ Configuration specifying annotator type and parameters.
56
+
57
+ Returns
58
+ -------
59
+ SimulatedAnnotator
60
+ Configured annotator instance.
61
+
62
+ Raises
63
+ ------
64
+ ValueError
65
+ If strategy is unknown.
66
+
67
+ Examples
68
+ --------
69
+ >>> from bead.config.simulation import SimulatedAnnotatorConfig
70
+ >>> config = SimulatedAnnotatorConfig(strategy="lm_score")
71
+ >>> annotator = SimulatedAnnotator.from_config(config)
72
+ """
73
+ # import here to avoid circular dependency
74
+ from bead.simulation.annotators.distance_based import ( # noqa: PLC0415
75
+ DistanceBasedAnnotator,
76
+ )
77
+ from bead.simulation.annotators.lm_based import ( # noqa: PLC0415
78
+ LMBasedAnnotator,
79
+ )
80
+ from bead.simulation.annotators.oracle import OracleAnnotator # noqa: PLC0415
81
+ from bead.simulation.annotators.random import RandomAnnotator # noqa: PLC0415
82
+
83
+ if config.strategy == "lm_score":
84
+ return LMBasedAnnotator(config)
85
+ elif config.strategy == "random":
86
+ return RandomAnnotator(config)
87
+ elif config.strategy == "oracle":
88
+ return OracleAnnotator(config)
89
+ elif config.strategy == "distance":
90
+ return DistanceBasedAnnotator(config)
91
+ else:
92
+ msg = f"Unknown strategy: {config.strategy}"
93
+ raise ValueError(msg)
94
+
95
+ @abstractmethod
96
+ def annotate(
97
+ self, item: Item, item_template: ItemTemplate
98
+ ) -> str | int | float | list[str]:
99
+ """Generate annotation for single item.
100
+
101
+ Parameters
102
+ ----------
103
+ item : Item
104
+ Item to annotate.
105
+ item_template : ItemTemplate
106
+ Template defining task structure.
107
+
108
+ Returns
109
+ -------
110
+ str | int | float | list[str]
111
+ Annotation (format depends on task type).
112
+ """
113
+
114
+ def annotate_batch(
115
+ self,
116
+ items: list[Item],
117
+ item_templates: list[ItemTemplate] | ItemTemplate,
118
+ ) -> dict[str, str | int | float | list[str]]:
119
+ """Generate annotations for batch of items.
120
+
121
+ Parameters
122
+ ----------
123
+ items : list[Item]
124
+ Items to annotate.
125
+ item_templates : list[ItemTemplate] | ItemTemplate
126
+ Templates (one per item or single template for all).
127
+
128
+ Returns
129
+ -------
130
+ dict[str, str | int | float | list[str]]
131
+ Mapping from item ID to annotation.
132
+
133
+ Examples
134
+ --------
135
+ >>> annotations = annotator.annotate_batch(items, template)
136
+ >>> annotations[str(items[0].id)]
137
+ 'option_a'
138
+ """
139
+ # handle single template
140
+ templates_list: list[ItemTemplate]
141
+ if not isinstance(item_templates, list):
142
+ templates_list = [item_templates] * len(items)
143
+ else:
144
+ templates_list = item_templates
145
+
146
+ # annotate each item
147
+ annotations: dict[str, str | int | float | list[str]] = {}
148
+ for item, template in zip(items, templates_list, strict=True):
149
+ annotation = self.annotate(item, template)
150
+ annotations[str(item.id)] = annotation
151
+
152
+ return annotations
153
+
154
+ def get_strategy(self, task_type: str) -> SimulationStrategy:
155
+ """Get strategy for task type.
156
+
157
+ Parameters
158
+ ----------
159
+ task_type : str
160
+ Task type (e.g., "forced_choice").
161
+
162
+ Returns
163
+ -------
164
+ SimulationStrategy
165
+ Strategy for this task type.
166
+
167
+ Raises
168
+ ------
169
+ ValueError
170
+ If task type not supported.
171
+ """
172
+ if task_type not in self.strategies:
173
+ msg = f"Task type '{task_type}' not supported by {self.__class__.__name__}"
174
+ raise ValueError(msg)
175
+ return self.strategies[task_type]
@@ -0,0 +1,135 @@
1
+ """Distance-based annotator using embeddings."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from bead.simulation.annotators.base import SimulatedAnnotator
8
+ from bead.simulation.strategies.binary import BinaryStrategy
9
+ from bead.simulation.strategies.categorical import CategoricalStrategy
10
+ from bead.simulation.strategies.cloze import ClozeStrategy
11
+ from bead.simulation.strategies.forced_choice import ForcedChoiceStrategy
12
+ from bead.simulation.strategies.free_text import FreeTextStrategy
13
+ from bead.simulation.strategies.magnitude import MagnitudeStrategy
14
+ from bead.simulation.strategies.multi_select import MultiSelectStrategy
15
+ from bead.simulation.strategies.ordinal_scale import OrdinalScaleStrategy
16
+
17
+ if TYPE_CHECKING:
18
+ from bead.config.simulation import SimulatedAnnotatorConfig
19
+ from bead.items.item import Item
20
+ from bead.items.item_template import ItemTemplate
21
+
22
+
23
+ class DistanceBasedAnnotator(SimulatedAnnotator):
24
+ """Annotator using embedding distances for decisions.
25
+
26
+ Uses embeddings from Item.model_outputs to compute similarity/distance
27
+ metrics, then makes decisions based on those distances.
28
+
29
+ For forced choice, selects option with lowest distance (highest similarity).
30
+ For ordinal scales, maps distance to scale values.
31
+ For binary, thresholds distance.
32
+
33
+ Parameters
34
+ ----------
35
+ config
36
+ Configuration for annotator.
37
+
38
+ Examples
39
+ --------
40
+ >>> from bead.config.simulation import SimulatedAnnotatorConfig, NoiseModelConfig
41
+ >>> config = SimulatedAnnotatorConfig(
42
+ ... strategy="distance",
43
+ ... model_output_key="embedding",
44
+ ... noise_model=NoiseModelConfig(noise_type="none")
45
+ ... )
46
+ >>> annotator = DistanceBasedAnnotator(config)
47
+ >>> # judgment = annotator.annotate(item, template)
48
+ """
49
+
50
+ def __init__(self, config: SimulatedAnnotatorConfig) -> None:
51
+ super().__init__(config)
52
+
53
+ # initialize strategies for different task types;
54
+ # use same strategies as LM-based, but extract embeddings instead of LM scores
55
+ self.strategies = {
56
+ "forced_choice": ForcedChoiceStrategy(),
57
+ "binary": BinaryStrategy(),
58
+ "ordinal_scale": OrdinalScaleStrategy(),
59
+ "categorical": CategoricalStrategy(),
60
+ "magnitude": MagnitudeStrategy(),
61
+ "multi_select": MultiSelectStrategy(),
62
+ "free_text": FreeTextStrategy(),
63
+ "cloze": ClozeStrategy(),
64
+ }
65
+
66
+ # initialize noise model if configured
67
+ if config.noise_model.noise_type == "temperature":
68
+ from bead.simulation.noise_models.temperature import ( # noqa: PLC0415
69
+ TemperatureNoiseModel,
70
+ )
71
+
72
+ self.noise_model = TemperatureNoiseModel(
73
+ temperature=config.noise_model.temperature
74
+ )
75
+ elif config.noise_model.noise_type == "none":
76
+ self.noise_model = None
77
+ else:
78
+ # default: no noise
79
+ self.noise_model = None
80
+
81
+ def annotate(
82
+ self, item: Item, item_template: ItemTemplate
83
+ ) -> str | int | float | bool | list[str]:
84
+ """Generate annotation using embedding distances.
85
+
86
+ Parameters
87
+ ----------
88
+ item : Item
89
+ Item to annotate.
90
+ item_template : ItemTemplate
91
+ Template defining task.
92
+
93
+ Returns
94
+ -------
95
+ str | int | float | bool | list[str]
96
+ Annotation (format depends on task type).
97
+
98
+ Notes
99
+ -----
100
+ For distance-based decisions, we convert embeddings to scores:
101
+ - Cosine similarity ranges from -1 (opposite) to 1 (identical)
102
+ - We convert to "score" by: score = similarity * 10
103
+ - This allows reuse of existing strategies
104
+ """
105
+ # get strategy for task type
106
+ strategy = self.get_strategy(item_template.task_type)
107
+
108
+ # validate item
109
+ strategy.validate_item(item, item_template)
110
+
111
+ # for distance-based, we need to convert embeddings to scores;
112
+ # this is a simplified approach that relies on strategies to extract
113
+ # embeddings and treat them as scores (strategies will use model_output_key)
114
+
115
+ # generate base response
116
+ response = strategy.simulate_response(
117
+ item=item,
118
+ item_template=item_template,
119
+ model_output_key=self.config.model_output_key,
120
+ rng=self.rng,
121
+ )
122
+
123
+ # apply noise model if configured
124
+ if self.noise_model is not None:
125
+ response = self.noise_model.apply(
126
+ value=response,
127
+ context={
128
+ "item": item,
129
+ "template": item_template,
130
+ "strategy": strategy,
131
+ },
132
+ rng=self.rng,
133
+ )
134
+
135
+ return response