bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,114 @@
1
+ """LM score-based annotator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from bead.simulation.annotators.base import SimulatedAnnotator
8
+ from bead.simulation.noise_models.temperature import TemperatureNoiseModel
9
+ from bead.simulation.strategies.binary import BinaryStrategy
10
+ from bead.simulation.strategies.categorical import CategoricalStrategy
11
+ from bead.simulation.strategies.cloze import ClozeStrategy
12
+ from bead.simulation.strategies.forced_choice import ForcedChoiceStrategy
13
+ from bead.simulation.strategies.free_text import FreeTextStrategy
14
+ from bead.simulation.strategies.magnitude import MagnitudeStrategy
15
+ from bead.simulation.strategies.multi_select import MultiSelectStrategy
16
+ from bead.simulation.strategies.ordinal_scale import OrdinalScaleStrategy
17
+
18
+ if TYPE_CHECKING:
19
+ from bead.config.simulation import SimulatedAnnotatorConfig
20
+ from bead.items.item import Item
21
+ from bead.items.item_template import ItemTemplate
22
+
23
+
24
+ class LMBasedAnnotator(SimulatedAnnotator):
25
+ """Annotator using language model scores for decisions.
26
+
27
+ Uses LM log probabilities or scores from Item.model_outputs
28
+ to make informed decisions. Applies noise model for variability.
29
+
30
+ Supports all task types via pluggable strategies.
31
+
32
+ Parameters
33
+ ----------
34
+ config
35
+ Configuration for annotator.
36
+
37
+ Examples
38
+ --------
39
+ >>> from bead.config.simulation import SimulatedAnnotatorConfig, NoiseModelConfig
40
+ >>> config = SimulatedAnnotatorConfig(
41
+ ... strategy="lm_score",
42
+ ... model_output_key="lm_score",
43
+ ... noise_model=NoiseModelConfig(noise_type="temperature", temperature=1.5)
44
+ ... )
45
+ >>> annotator = LMBasedAnnotator(config)
46
+ >>> # judgment = annotator.annotate(item, template)
47
+ """
48
+
49
+ def __init__(self, config: SimulatedAnnotatorConfig) -> None:
50
+ super().__init__(config)
51
+
52
+ # initialize strategies for different task types
53
+ self.strategies = {
54
+ "forced_choice": ForcedChoiceStrategy(),
55
+ "binary": BinaryStrategy(),
56
+ "ordinal_scale": OrdinalScaleStrategy(),
57
+ "categorical": CategoricalStrategy(),
58
+ "magnitude": MagnitudeStrategy(),
59
+ "multi_select": MultiSelectStrategy(),
60
+ "free_text": FreeTextStrategy(),
61
+ "cloze": ClozeStrategy(),
62
+ }
63
+
64
+ # initialize noise model
65
+ if config.noise_model.noise_type == "temperature":
66
+ self.noise_model = TemperatureNoiseModel(
67
+ temperature=config.noise_model.temperature
68
+ )
69
+ elif config.noise_model.noise_type == "none":
70
+ self.noise_model = None
71
+ else:
72
+ # default: no noise
73
+ self.noise_model = None
74
+
75
+ def annotate(
76
+ self, item: Item, item_template: ItemTemplate
77
+ ) -> str | int | float | list[str]:
78
+ """Generate annotation using LM scores.
79
+
80
+ Parameters
81
+ ----------
82
+ item : Item
83
+ Item to annotate.
84
+ item_template : ItemTemplate
85
+ Template defining task.
86
+
87
+ Returns
88
+ -------
89
+ str | int | float | list[str]
90
+ Annotation (format depends on task type).
91
+ """
92
+ # get strategy for task type
93
+ strategy = self.get_strategy(item_template.task_type)
94
+
95
+ # validate item
96
+ strategy.validate_item(item, item_template)
97
+
98
+ # generate base response
99
+ response = strategy.simulate_response(
100
+ item=item,
101
+ item_template=item_template,
102
+ model_output_key=self.config.model_output_key,
103
+ rng=self.rng,
104
+ )
105
+
106
+ # apply noise model if configured
107
+ if self.noise_model is not None:
108
+ response = self.noise_model.apply(
109
+ value=response,
110
+ context={"item": item, "template": item_template, "strategy": strategy},
111
+ rng=self.rng,
112
+ )
113
+
114
+ return response
@@ -0,0 +1,182 @@
1
+ """Oracle (perfect performance) annotator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from bead.simulation.annotators.base import SimulatedAnnotator
8
+ from bead.simulation.annotators.random import RandomAnnotator
9
+
10
+ if TYPE_CHECKING:
11
+ from bead.config.simulation import SimulatedAnnotatorConfig
12
+ from bead.items.item import Item
13
+ from bead.items.item_template import ItemTemplate
14
+
15
+
16
+ class OracleAnnotator(SimulatedAnnotator):
17
+ """Perfect performance annotator using ground truth.
18
+
19
+ Returns ground truth labels from item.item_metadata['ground_truth'].
20
+ Falls back to random when ground truth is not available.
21
+
22
+ Useful for establishing upper bound on performance.
23
+
24
+ Parameters
25
+ ----------
26
+ config
27
+ Configuration for annotator.
28
+
29
+ Examples
30
+ --------
31
+ >>> from bead.config.simulation import SimulatedAnnotatorConfig
32
+ >>> config = SimulatedAnnotatorConfig(strategy="oracle", random_state=42)
33
+ >>> annotator = OracleAnnotator(config)
34
+ >>> # judgment = annotator.annotate(item, template)
35
+ """
36
+
37
+ def __init__(self, config: SimulatedAnnotatorConfig) -> None:
38
+ super().__init__(config)
39
+
40
+ # create random annotator for fallback
41
+ self.random_annotator = RandomAnnotator(config)
42
+
43
+ def annotate(
44
+ self, item: Item, item_template: ItemTemplate
45
+ ) -> str | int | float | bool | list[str]:
46
+ """Generate oracle annotation using ground truth.
47
+
48
+ Parameters
49
+ ----------
50
+ item : Item
51
+ Item to annotate.
52
+ item_template : ItemTemplate
53
+ Template defining task.
54
+
55
+ Returns
56
+ -------
57
+ str | int | float | bool | list[str]
58
+ Ground truth annotation or random fallback.
59
+ """
60
+ # try to get ground truth from item metadata
61
+ if hasattr(item, "item_metadata") and item.item_metadata:
62
+ ground_truth = item.item_metadata.get("ground_truth")
63
+
64
+ if ground_truth is not None:
65
+ # validate and return ground truth
66
+ return self._validate_ground_truth(ground_truth, item_template)
67
+
68
+ # fallback to random if no ground truth
69
+ return self.random_annotator.annotate(item, item_template)
70
+
71
+ def _validate_ground_truth(
72
+ self, ground_truth: str | int | float | bool | list[str], template: ItemTemplate
73
+ ) -> str | int | float | bool | list[str]:
74
+ """Validate ground truth against task spec.
75
+
76
+ Parameters
77
+ ----------
78
+ ground_truth
79
+ Ground truth value.
80
+ template : ItemTemplate
81
+ Template defining task constraints.
82
+
83
+ Returns
84
+ -------
85
+ str | int | float | bool | list[str]
86
+ Validated ground truth.
87
+
88
+ Raises
89
+ ------
90
+ ValueError
91
+ If ground truth is invalid for task type.
92
+ """
93
+ task_type = template.task_type
94
+
95
+ if task_type == "forced_choice":
96
+ if not isinstance(ground_truth, str):
97
+ msg = (
98
+ f"forced_choice ground truth must be str, got {type(ground_truth)}"
99
+ )
100
+ raise ValueError(msg)
101
+ options = template.task_spec.options or []
102
+ if ground_truth not in options:
103
+ msg = f"Ground truth '{ground_truth}' not in options {options}"
104
+ raise ValueError(msg)
105
+ return ground_truth
106
+
107
+ elif task_type == "binary":
108
+ if not isinstance(ground_truth, bool):
109
+ msg = f"binary ground truth must be bool, got {type(ground_truth)}"
110
+ raise ValueError(msg)
111
+ return ground_truth
112
+
113
+ elif task_type == "ordinal_scale":
114
+ if not isinstance(ground_truth, int):
115
+ msg = (
116
+ f"ordinal_scale ground truth must be int, got {type(ground_truth)}"
117
+ )
118
+ raise ValueError(msg)
119
+ scale_bounds = template.task_spec.scale_bounds
120
+ if scale_bounds is not None:
121
+ min_val, max_val = scale_bounds
122
+ else:
123
+ min_val, max_val = 1, 7
124
+ if not (min_val <= ground_truth <= max_val):
125
+ msg = f"Ground truth {ground_truth} not in range [{min_val}, {max_val}]"
126
+ raise ValueError(msg)
127
+ return ground_truth
128
+
129
+ elif task_type == "categorical":
130
+ if not isinstance(ground_truth, str):
131
+ msg = f"categorical ground truth must be str, got {type(ground_truth)}"
132
+ raise ValueError(msg)
133
+ options = template.task_spec.options or []
134
+ if ground_truth not in options:
135
+ msg = f"Ground truth '{ground_truth}' not in options {options}"
136
+ raise ValueError(msg)
137
+ return ground_truth
138
+
139
+ elif task_type == "magnitude":
140
+ if not isinstance(ground_truth, int | float):
141
+ msg = (
142
+ f"magnitude ground truth must be numeric, got {type(ground_truth)}"
143
+ )
144
+ raise ValueError(msg)
145
+ return float(ground_truth)
146
+
147
+ elif task_type == "multi_select":
148
+ if not isinstance(ground_truth, list):
149
+ msg = (
150
+ f"multi_select ground truth must be list, got {type(ground_truth)}"
151
+ )
152
+ raise ValueError(msg)
153
+ options = template.task_spec.options or []
154
+ for item_val in ground_truth:
155
+ if item_val not in options:
156
+ msg = f"Ground truth item '{item_val}' not in options {options}"
157
+ raise ValueError(msg)
158
+ return ground_truth
159
+
160
+ elif task_type == "free_text":
161
+ if not isinstance(ground_truth, str):
162
+ msg = f"free_text ground truth must be str, got {type(ground_truth)}"
163
+ raise ValueError(msg)
164
+ return ground_truth
165
+
166
+ elif task_type == "cloze":
167
+ if not isinstance(ground_truth, dict):
168
+ msg = f"cloze ground truth must be dict, got {type(ground_truth)}"
169
+ raise ValueError(msg)
170
+ # validate all required slots are present
171
+ for slot in template.unfilled_slots:
172
+ if slot.slot_name not in ground_truth:
173
+ msg = (
174
+ f"Ground truth missing slot '{slot.slot_name}' "
175
+ f"(expected slots: {[s.slot_name for s in template.unfilled_slots]})" # noqa: E501
176
+ )
177
+ raise ValueError(msg)
178
+ # return dict of slot_name -> value
179
+ return {k: str(v) for k, v in ground_truth.items()}
180
+
181
+ else:
182
+ raise ValueError(f"Unsupported task type: {task_type}")
@@ -0,0 +1,181 @@
1
+ """Random baseline annotator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from bead.simulation.annotators.base import SimulatedAnnotator
8
+
9
+ if TYPE_CHECKING:
10
+ from bead.config.simulation import SimulatedAnnotatorConfig
11
+ from bead.items.item import Item
12
+ from bead.items.item_template import ItemTemplate
13
+
14
+
15
+ class RandomAnnotator(SimulatedAnnotator):
16
+ """Pure random baseline annotator.
17
+
18
+ Generates random responses that respect task spec constraints
19
+ (options, scale ranges, etc.) but are otherwise uninformed.
20
+
21
+ Useful for establishing baseline performance.
22
+
23
+ Parameters
24
+ ----------
25
+ config
26
+ Configuration for annotator.
27
+
28
+ Examples
29
+ --------
30
+ >>> from bead.config.simulation import SimulatedAnnotatorConfig
31
+ >>> config = SimulatedAnnotatorConfig(strategy="random", random_state=42)
32
+ >>> annotator = RandomAnnotator(config)
33
+ >>> # judgment = annotator.annotate(item, template)
34
+ """
35
+
36
+ def __init__(self, config: SimulatedAnnotatorConfig) -> None:
37
+ super().__init__(config)
38
+
39
+ # no strategies or noise models needed for random
40
+
41
+ def annotate(
42
+ self, item: Item, item_template: ItemTemplate
43
+ ) -> str | int | float | bool | list[str]:
44
+ """Generate random annotation.
45
+
46
+ Parameters
47
+ ----------
48
+ item : Item
49
+ Item to annotate (ignored).
50
+ item_template : ItemTemplate
51
+ Template defining task constraints.
52
+
53
+ Returns
54
+ -------
55
+ str | int | float | bool | list[str]
56
+ Random annotation (format depends on task type).
57
+
58
+ Raises
59
+ ------
60
+ ValueError
61
+ If task type is not supported.
62
+ """
63
+ task_type = item_template.task_type
64
+
65
+ if task_type == "forced_choice":
66
+ return self._random_forced_choice(item_template)
67
+ elif task_type == "binary":
68
+ return self._random_binary()
69
+ elif task_type == "ordinal_scale":
70
+ return self._random_ordinal(item_template)
71
+ elif task_type == "categorical":
72
+ return self._random_categorical(item_template)
73
+ elif task_type == "magnitude":
74
+ return self._random_magnitude()
75
+ elif task_type == "multi_select":
76
+ return self._random_multi_select(item_template)
77
+ elif task_type == "free_text":
78
+ return self._random_free_text()
79
+ elif task_type == "cloze":
80
+ return self._random_cloze(item)
81
+ else:
82
+ raise ValueError(f"Unsupported task type: {task_type}")
83
+
84
+ def _random_forced_choice(self, template: ItemTemplate) -> str:
85
+ """Generate random forced choice response."""
86
+ options = template.task_spec.options or []
87
+ if not options:
88
+ raise ValueError("forced_choice requires options")
89
+ return str(self.rng.choice(options))
90
+
91
+ def _random_binary(self) -> bool:
92
+ """Generate random binary response."""
93
+ return bool(self.rng.choice([True, False]))
94
+
95
+ def _random_ordinal(self, template: ItemTemplate) -> int:
96
+ """Generate random ordinal scale response."""
97
+ # get scale bounds from task_spec
98
+ scale_bounds = template.task_spec.scale_bounds
99
+ if scale_bounds is not None:
100
+ min_val, max_val = scale_bounds
101
+ else:
102
+ min_val, max_val = 1, 7
103
+ return int(self.rng.randint(min_val, max_val + 1))
104
+
105
+ def _random_categorical(self, template: ItemTemplate) -> str:
106
+ """Generate random categorical response."""
107
+ options = template.task_spec.options or []
108
+ if not options:
109
+ raise ValueError("categorical requires options")
110
+ return str(self.rng.choice(options))
111
+
112
+ def _random_magnitude(self) -> float:
113
+ """Generate random magnitude response."""
114
+ # log-normal distribution for positive magnitudes
115
+ return float(self.rng.lognormal(mean=0, sigma=1))
116
+
117
+ def _random_multi_select(self, template: ItemTemplate) -> list[str]:
118
+ """Generate random multi-select response."""
119
+ options = template.task_spec.options or []
120
+ if not options:
121
+ raise ValueError("multi_select requires options")
122
+
123
+ # randomly select subset of options
124
+ selected = []
125
+ for option in options:
126
+ if self.rng.random() < 0.5:
127
+ selected.append(option)
128
+ return selected
129
+
130
+ def _random_free_text(self) -> str:
131
+ """Generate random free text response."""
132
+ # simple random responses
133
+ responses = [
134
+ "No response",
135
+ "Unclear",
136
+ "Cannot determine",
137
+ "Not applicable",
138
+ "Unknown",
139
+ ]
140
+ return str(self.rng.choice(responses))
141
+
142
+ def _random_cloze(self, item: Item) -> dict[str, str]:
143
+ """Generate random cloze response."""
144
+ response = {}
145
+
146
+ # common word bank for random selection
147
+ word_bank = [
148
+ "the",
149
+ "a",
150
+ "is",
151
+ "was",
152
+ "has",
153
+ "can",
154
+ "will",
155
+ "thing",
156
+ "person",
157
+ "place",
158
+ "time",
159
+ "way",
160
+ "good",
161
+ "new",
162
+ "old",
163
+ "big",
164
+ "small",
165
+ "very",
166
+ "well",
167
+ "just",
168
+ "now",
169
+ "here",
170
+ "in",
171
+ "on",
172
+ "at",
173
+ "to",
174
+ "for",
175
+ ]
176
+
177
+ for slot in item.unfilled_slots:
178
+ # randomly select a word from the bank
179
+ response[slot.slot_name] = str(self.rng.choice(word_bank))
180
+
181
+ return response
@@ -0,0 +1,3 @@
1
+ """DSL extensions for simulation-specific operations."""
2
+
3
+ __all__ = []
@@ -0,0 +1,13 @@
1
+ """Noise models for simulating human variability."""
2
+
3
+ from bead.simulation.noise_models.base import NoiseModel
4
+ from bead.simulation.noise_models.random_noise import RandomNoiseModel
5
+ from bead.simulation.noise_models.systematic import SystematicNoiseModel
6
+ from bead.simulation.noise_models.temperature import TemperatureNoiseModel
7
+
8
+ __all__ = [
9
+ "NoiseModel",
10
+ "RandomNoiseModel",
11
+ "SystematicNoiseModel",
12
+ "TemperatureNoiseModel",
13
+ ]
@@ -0,0 +1,42 @@
1
+ """Base class for noise models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+
7
+ import numpy as np
8
+
9
+
10
+ class NoiseModel(ABC):
11
+ """Abstract base for noise models.
12
+
13
+ Noise models add human-like variability to simulated responses.
14
+ They can:
15
+ - Scale probabilities by temperature
16
+ - Add systematic biases (length, frequency, position)
17
+ - Inject random noise
18
+ """
19
+
20
+ @abstractmethod
21
+ def apply(
22
+ self,
23
+ value: str | int | float | list[str],
24
+ context: dict[str, str | int | float | bool | list[str]],
25
+ rng: np.random.RandomState,
26
+ ) -> str | int | float | list[str]:
27
+ """Apply noise to value.
28
+
29
+ Parameters
30
+ ----------
31
+ value : str | int | float | list[str]
32
+ Original value (probability, score, choice, etc.).
33
+ context : dict[str, str | int | float | bool | list[str]]
34
+ Additional context (item, template, strategy, etc.).
35
+ rng : np.random.RandomState
36
+ Random number generator.
37
+
38
+ Returns
39
+ -------
40
+ str | int | float | list[str]
41
+ Value with noise applied.
42
+ """
@@ -0,0 +1,82 @@
1
+ """Random noise injection model."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+
7
+ from bead.simulation.noise_models.base import NoiseModel
8
+
9
+
10
+ class RandomNoiseModel(NoiseModel):
11
+ """Random noise injection model.
12
+
13
+ Adds random noise to responses:
14
+ - Gaussian noise for numeric values
15
+ - Uniform noise for numeric values
16
+ - Random flipping for choice tasks
17
+
18
+ Parameters
19
+ ----------
20
+ noise_type
21
+ Type of noise ("gaussian" or "uniform"). Default: "gaussian".
22
+ strength
23
+ Noise strength (stddev for gaussian, range for uniform). Default: 1.0.
24
+
25
+ Examples
26
+ --------
27
+ >>> noise_model = RandomNoiseModel(noise_type="gaussian", strength=0.5)
28
+ >>> # Adds gaussian noise with stddev=0.5 to numeric responses
29
+ """
30
+
31
+ def __init__(self, noise_type: str = "gaussian", strength: float = 1.0) -> None:
32
+ self.noise_type = noise_type
33
+ self.strength = strength
34
+
35
+ def apply(
36
+ self,
37
+ value: str | int | float | bool | list[str],
38
+ context: dict[str, str | int | float | bool | list[str]],
39
+ rng: np.random.RandomState,
40
+ ) -> str | int | float | bool | list[str]:
41
+ """Apply random noise.
42
+
43
+ Parameters
44
+ ----------
45
+ value
46
+ Original value.
47
+ context : dict
48
+ Context with item, template, strategy.
49
+ rng : np.random.RandomState
50
+ Random number generator.
51
+
52
+ Returns
53
+ -------
54
+ str | int | float | bool | list[str]
55
+ Value with noise applied.
56
+ """
57
+ if self.strength == 0.0:
58
+ return value
59
+
60
+ # apply noise based on value type
61
+ if isinstance(value, int | float) and not isinstance(value, bool):
62
+ return self._add_numeric_noise(value, rng)
63
+ else:
64
+ # for non-numeric, return as-is
65
+ return value
66
+
67
+ def _add_numeric_noise(
68
+ self, value: int | float, rng: np.random.RandomState
69
+ ) -> int | float:
70
+ """Add noise to numeric value."""
71
+ if self.noise_type == "gaussian":
72
+ noisy_value = value + rng.normal(0, self.strength)
73
+ elif self.noise_type == "uniform":
74
+ noisy_value = value + rng.uniform(-self.strength, self.strength)
75
+ else:
76
+ noisy_value = value
77
+
78
+ # preserve type
79
+ if isinstance(value, int):
80
+ return int(round(noisy_value))
81
+ else:
82
+ return float(noisy_value)