bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/simulate.py ADDED
@@ -0,0 +1,840 @@
1
+ """Simulation commands for bead CLI.
2
+
3
+ This module provides commands for running multi-annotator simulations with
4
+ configurable annotator strategies and noise models.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from collections import Counter
11
+ from pathlib import Path
12
+
13
+ import click
14
+ import numpy as np
15
+ import yaml
16
+ from rich.console import Console
17
+ from rich.progress import Progress, SpinnerColumn, TextColumn
18
+ from rich.table import Table
19
+
20
+ from bead.cli.utils import JsonValue, print_error, print_info, print_success
21
+ from bead.config.simulation import (
22
+ NoiseModelConfig,
23
+ SimulatedAnnotatorConfig,
24
+ SimulationRunnerConfig,
25
+ )
26
+ from bead.data.serialization import read_jsonlines
27
+ from bead.evaluation.interannotator import InterAnnotatorMetrics
28
+ from bead.items.item import Item
29
+ from bead.items.item_template import ItemTemplate
30
+ from bead.simulation.runner import SimulationRunner
31
+
32
+ console = Console()
33
+
34
+
35
+ @click.group()
36
+ def simulate() -> None:
37
+ r"""Run multi-annotator simulation experiments.
38
+
39
+ Commands for running simulations with various annotator types
40
+ (oracle, random, LM-based, distance-based) and noise models.
41
+
42
+ \b
43
+ AVAILABLE COMMANDS:
44
+ run Run simulation with configured annotators
45
+ configure Create simulation configuration file
46
+ analyze Analyze simulation results
47
+ list-annotators List available annotator types
48
+ list-noise-models List available noise models
49
+
50
+ \b
51
+ Examples:
52
+ # Run simulation with LM-based annotator
53
+ $ bead simulate run \\
54
+ --items items.jsonl \\
55
+ --templates templates.jsonl \\
56
+ --annotator lm_score \\
57
+ --n-annotators 5 \\
58
+ --output results.jsonl
59
+
60
+ # Create configuration file
61
+ $ bead simulate configure \\
62
+ --strategy lm_score \\
63
+ --noise-type temperature \\
64
+ --temperature 1.5 \\
65
+ --output simulation_config.yaml
66
+ """
67
+
68
+
69
+ @click.command()
70
+ @click.option(
71
+ "--items",
72
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
73
+ required=True,
74
+ help="Path to items file (JSONL)",
75
+ )
76
+ @click.option(
77
+ "--templates",
78
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
79
+ help="Path to templates file (JSONL, optional if shared template)",
80
+ )
81
+ @click.option(
82
+ "--annotator",
83
+ type=click.Choice(["lm_score", "distance", "random", "oracle"]),
84
+ default="lm_score",
85
+ help="Annotator strategy (default: lm_score)",
86
+ )
87
+ @click.option(
88
+ "--n-annotators",
89
+ type=int,
90
+ default=5,
91
+ help="Number of simulated annotators (default: 5)",
92
+ )
93
+ @click.option(
94
+ "--noise-type",
95
+ type=click.Choice(["temperature", "systematic", "random", "none"]),
96
+ default="temperature",
97
+ help="Type of noise model (default: temperature)",
98
+ )
99
+ @click.option(
100
+ "--temperature",
101
+ type=float,
102
+ default=1.0,
103
+ help="Temperature for scaling (default: 1.0)",
104
+ )
105
+ @click.option(
106
+ "--bias-strength",
107
+ type=float,
108
+ default=0.0,
109
+ help="Systematic bias strength 0.0-1.0 (default: 0.0)",
110
+ )
111
+ @click.option(
112
+ "--bias-type",
113
+ type=str,
114
+ help="Type of systematic bias (length, frequency, position)",
115
+ )
116
+ @click.option(
117
+ "--random-seed",
118
+ type=int,
119
+ help="Random seed for reproducibility",
120
+ )
121
+ @click.option(
122
+ "--model-output-key",
123
+ type=str,
124
+ default="lm_score",
125
+ help="Key to extract from model outputs (default: lm_score)",
126
+ )
127
+ @click.option(
128
+ "--output",
129
+ type=click.Path(path_type=Path),
130
+ required=True,
131
+ help="Output path for simulation results (JSONL)",
132
+ )
133
+ @click.option(
134
+ "--config",
135
+ type=click.Path(exists=True, path_type=Path),
136
+ help="Path to simulation configuration file (JSON/YAML, overrides CLI options)",
137
+ )
138
+ @click.pass_context
139
+ def run(
140
+ ctx: click.Context,
141
+ items: Path,
142
+ templates: Path | None,
143
+ annotator: str,
144
+ n_annotators: int,
145
+ noise_type: str,
146
+ temperature: float,
147
+ bias_strength: float,
148
+ bias_type: str | None,
149
+ random_seed: int | None,
150
+ model_output_key: str,
151
+ output: Path,
152
+ config: Path | None,
153
+ ) -> None:
154
+ r"""Run multi-annotator simulation.
155
+
156
+ Simulates annotations from multiple annotators using specified strategy
157
+ and noise model. Results are saved in JSONL format with one annotation
158
+ per rater per item.
159
+
160
+ Parameters
161
+ ----------
162
+ ctx : click.Context
163
+ Click context object.
164
+ items : Path
165
+ Path to items file.
166
+ templates : Path | None
167
+ Path to templates file (optional).
168
+ annotator : str
169
+ Annotator strategy name.
170
+ n_annotators : int
171
+ Number of annotators to simulate.
172
+ noise_type : str
173
+ Type of noise model.
174
+ temperature : float
175
+ Temperature for scaling.
176
+ bias_strength : float
177
+ Systematic bias strength.
178
+ bias_type : str | None
179
+ Type of systematic bias.
180
+ random_seed : int | None
181
+ Random seed.
182
+ model_output_key : str
183
+ Key for model outputs.
184
+ output : Path
185
+ Output path for results.
186
+ config : Path | None
187
+ Configuration file path.
188
+
189
+ Examples
190
+ --------
191
+ $ bead simulate run \\
192
+ --items items.jsonl \\
193
+ --templates templates.jsonl \\
194
+ --annotator lm_score \\
195
+ --n-annotators 10 \\
196
+ --noise-type temperature \\
197
+ --temperature 1.5 \\
198
+ --output simulation_results.jsonl
199
+
200
+ $ bead simulate run \\
201
+ --items items.jsonl \\
202
+ --annotator oracle \\
203
+ --n-annotators 5 \\
204
+ --noise-type none \\
205
+ --output oracle_baseline.jsonl
206
+
207
+ $ bead simulate run \\
208
+ --items items.jsonl \\
209
+ --config simulation_config.yaml \\
210
+ --output results.jsonl
211
+ """
212
+ try:
213
+ console.rule("[bold]Simulation Runner[/bold]")
214
+
215
+ # Load configuration if provided
216
+ if config:
217
+ print_info(f"Loading configuration from {config}")
218
+ with open(config, encoding="utf-8") as f:
219
+ if config.suffix in [".yaml", ".yml"]:
220
+ config_dict = yaml.safe_load(f)
221
+ else:
222
+ config_dict = json.load(f)
223
+
224
+ sim_config = SimulationRunnerConfig(**config_dict)
225
+ print_success("Configuration loaded")
226
+ else:
227
+ # Build configuration from CLI options
228
+ noise_model = NoiseModelConfig(
229
+ noise_type=noise_type, # type: ignore[arg-type]
230
+ temperature=temperature,
231
+ bias_strength=bias_strength,
232
+ bias_type=bias_type,
233
+ )
234
+
235
+ annotator_config = SimulatedAnnotatorConfig(
236
+ strategy=annotator, # type: ignore[arg-type]
237
+ noise_model=noise_model,
238
+ random_state=random_seed,
239
+ model_output_key=model_output_key,
240
+ )
241
+
242
+ sim_config = SimulationRunnerConfig(
243
+ annotator_configs=[annotator_config],
244
+ n_annotators=n_annotators,
245
+ )
246
+
247
+ # Load items
248
+ print_info(f"Loading items from {items}")
249
+ with Progress(
250
+ SpinnerColumn(),
251
+ TextColumn("[progress.description]{task.description}"),
252
+ console=console,
253
+ ) as progress:
254
+ progress.add_task("Loading items...", total=None)
255
+ items_list = read_jsonlines(items, Item)
256
+
257
+ print_success(f"Loaded {len(items_list)} items")
258
+
259
+ # Load templates (optional)
260
+ templates_list: list[ItemTemplate] | ItemTemplate
261
+ if templates:
262
+ print_info(f"Loading templates from {templates}")
263
+ with Progress(
264
+ SpinnerColumn(),
265
+ TextColumn("[progress.description]{task.description}"),
266
+ console=console,
267
+ ) as progress:
268
+ progress.add_task("Loading templates...", total=None)
269
+ loaded_templates = read_jsonlines(templates, ItemTemplate)
270
+
271
+ print_success(f"Loaded {len(loaded_templates)} templates")
272
+
273
+ # Use single template if only one, otherwise list
274
+ if len(loaded_templates) == 1:
275
+ templates_list = loaded_templates[0]
276
+ else:
277
+ templates_list = loaded_templates
278
+ else:
279
+ # Create minimal template for items without explicit templates
280
+ print_info("No templates provided, using minimal template")
281
+ templates_list = ItemTemplate(
282
+ name="default_template",
283
+ judgment_type="acceptability",
284
+ task_type="forced_choice",
285
+ task_spec={"prompt": "Default simulation prompt"},
286
+ presentation_spec={"mode": "static"},
287
+ )
288
+
289
+ # Create simulation runner
290
+ print_info(f"Creating simulation with {sim_config.n_annotators} annotators")
291
+ runner = SimulationRunner(config=sim_config)
292
+
293
+ # Run simulation
294
+ with Progress(
295
+ SpinnerColumn(),
296
+ TextColumn("[progress.description]{task.description}"),
297
+ console=console,
298
+ ) as progress:
299
+ progress.add_task("Running simulation...", total=None)
300
+ results = runner.run(items=items_list, templates=templates_list)
301
+
302
+ # Display summary
303
+ table = Table(title="Simulation Summary")
304
+ table.add_column("Metric", style="cyan")
305
+ table.add_column("Value", style="green", justify="right")
306
+
307
+ table.add_row("Items", str(len(items_list)))
308
+ table.add_row("Annotators", str(sim_config.n_annotators))
309
+ table.add_row("Strategy", annotator if not config else "from config")
310
+ table.add_row("Noise Type", noise_type if not config else "from config")
311
+ table.add_row(
312
+ "Total Annotations", str(len(items_list) * sim_config.n_annotators)
313
+ )
314
+
315
+ console.print(table)
316
+
317
+ # Save results
318
+ print_info(f"Saving results to {output}")
319
+
320
+ # Convert results to JSONL format (one record per item per annotator)
321
+ output.parent.mkdir(parents=True, exist_ok=True)
322
+ with open(output, "w", encoding="utf-8") as f:
323
+ for i, item_id in enumerate(results["item_ids"]):
324
+ for annotator_idx in range(sim_config.n_annotators):
325
+ annotation = results[f"annotator_{annotator_idx}"][i]
326
+ record = {
327
+ "item_id": item_id,
328
+ "annotator_id": f"annotator_{annotator_idx}",
329
+ "annotation": annotation,
330
+ }
331
+ f.write(json.dumps(record) + "\n")
332
+
333
+ print_success(f"Simulation complete! Results saved to {output}")
334
+
335
+ except FileNotFoundError as e:
336
+ print_error(f"File not found: {e}", exit_code=0)
337
+ ctx.exit(1)
338
+ except KeyError as e:
339
+ print_error(f"Missing required field: {e}", exit_code=0)
340
+ ctx.exit(1)
341
+ except json.JSONDecodeError as e:
342
+ print_error(f"Invalid JSON in configuration: {e}", exit_code=0)
343
+ ctx.exit(1)
344
+ except ValueError as e:
345
+ print_error(f"Validation error: {e}", exit_code=0)
346
+ ctx.exit(1)
347
+
348
+
349
+ @click.command()
350
+ @click.option(
351
+ "--strategy",
352
+ type=click.Choice(["lm_score", "distance", "random", "oracle"]),
353
+ default="lm_score",
354
+ help="Annotator strategy (default: lm_score)",
355
+ )
356
+ @click.option(
357
+ "--n-annotators",
358
+ type=int,
359
+ default=5,
360
+ help="Number of annotators (default: 5)",
361
+ )
362
+ @click.option(
363
+ "--noise-type",
364
+ type=click.Choice(["temperature", "systematic", "random", "none"]),
365
+ default="temperature",
366
+ help="Noise model type (default: temperature)",
367
+ )
368
+ @click.option(
369
+ "--temperature",
370
+ type=float,
371
+ default=1.0,
372
+ help="Temperature for scaling (default: 1.0)",
373
+ )
374
+ @click.option(
375
+ "--bias-strength",
376
+ type=float,
377
+ default=0.0,
378
+ help="Systematic bias strength (default: 0.0)",
379
+ )
380
+ @click.option(
381
+ "--bias-type",
382
+ type=str,
383
+ help="Type of systematic bias (length, frequency, position)",
384
+ )
385
+ @click.option(
386
+ "--random-seed",
387
+ type=int,
388
+ help="Random seed for reproducibility",
389
+ )
390
+ @click.option(
391
+ "--model-output-key",
392
+ type=str,
393
+ default="lm_score",
394
+ help="Key for model outputs (default: lm_score)",
395
+ )
396
+ @click.option(
397
+ "--output",
398
+ type=click.Path(path_type=Path),
399
+ required=True,
400
+ help="Output path for configuration file (YAML/JSON)",
401
+ )
402
+ @click.option(
403
+ "--format",
404
+ "output_format",
405
+ type=click.Choice(["yaml", "json"]),
406
+ default="yaml",
407
+ help="Output format (default: yaml)",
408
+ )
409
+ @click.pass_context
410
+ def configure(
411
+ ctx: click.Context,
412
+ strategy: str,
413
+ n_annotators: int,
414
+ noise_type: str,
415
+ temperature: float,
416
+ bias_strength: float,
417
+ bias_type: str | None,
418
+ random_seed: int | None,
419
+ model_output_key: str,
420
+ output: Path,
421
+ output_format: str,
422
+ ) -> None:
423
+ r"""Create simulation configuration file.
424
+
425
+ Generates a configuration file that can be used with the 'run' command
426
+ via the --config option. Configuration includes annotator strategy,
427
+ noise model parameters, and simulation settings.
428
+
429
+ Parameters
430
+ ----------
431
+ ctx : click.Context
432
+ Click context object.
433
+ strategy : str
434
+ Annotator strategy.
435
+ n_annotators : int
436
+ Number of annotators.
437
+ noise_type : str
438
+ Type of noise model.
439
+ temperature : float
440
+ Temperature parameter.
441
+ bias_strength : float
442
+ Bias strength.
443
+ bias_type : str | None
444
+ Type of bias.
445
+ random_seed : int | None
446
+ Random seed.
447
+ model_output_key : str
448
+ Key for model outputs.
449
+ output : Path
450
+ Output path for config.
451
+ format : str
452
+ Output format (yaml or json).
453
+
454
+ Examples
455
+ --------
456
+ $ bead simulate configure \\
457
+ --strategy lm_score \\
458
+ --n-annotators 10 \\
459
+ --noise-type temperature \\
460
+ --temperature 2.0 \\
461
+ --random-seed 42 \\
462
+ --output simulation_config.yaml
463
+
464
+ $ bead simulate configure \\
465
+ --strategy systematic \\
466
+ --bias-strength 0.3 \\
467
+ --bias-type length \\
468
+ --output config.json \\
469
+ --format json
470
+ """
471
+ try:
472
+ console.rule("[bold]Simulation Configuration[/bold]")
473
+
474
+ # Build configuration
475
+ noise_model = NoiseModelConfig(
476
+ noise_type=noise_type, # type: ignore[arg-type]
477
+ temperature=temperature,
478
+ bias_strength=bias_strength,
479
+ bias_type=bias_type,
480
+ )
481
+
482
+ annotator_config = SimulatedAnnotatorConfig(
483
+ strategy=strategy, # type: ignore[arg-type]
484
+ noise_model=noise_model,
485
+ random_state=random_seed,
486
+ model_output_key=model_output_key,
487
+ )
488
+
489
+ sim_config = SimulationRunnerConfig(
490
+ annotator_configs=[annotator_config],
491
+ n_annotators=n_annotators,
492
+ )
493
+
494
+ # Display configuration
495
+ table = Table(title="Configuration Summary")
496
+ table.add_column("Setting", style="cyan")
497
+ table.add_column("Value", style="green")
498
+
499
+ table.add_row("Strategy", strategy)
500
+ table.add_row("Number of Annotators", str(n_annotators))
501
+ table.add_row("Noise Type", noise_type)
502
+ table.add_row("Temperature", f"{temperature:.2f}")
503
+ table.add_row("Bias Strength", f"{bias_strength:.2f}")
504
+ if bias_type:
505
+ table.add_row("Bias Type", bias_type)
506
+ if random_seed is not None:
507
+ table.add_row("Random Seed", str(random_seed))
508
+
509
+ console.print(table)
510
+
511
+ # Save configuration
512
+ output.parent.mkdir(parents=True, exist_ok=True)
513
+
514
+ config_dict = sim_config.model_dump()
515
+
516
+ if output_format == "yaml":
517
+ with open(output, "w", encoding="utf-8") as f:
518
+ yaml.safe_dump(config_dict, f, default_flow_style=False, indent=2)
519
+ else:
520
+ with open(output, "w", encoding="utf-8") as f:
521
+ json.dump(config_dict, f, indent=2)
522
+
523
+ print_success(f"Configuration saved to {output}")
524
+
525
+ except Exception as e:
526
+ print_error(f"Failed to create configuration: {e}", exit_code=0)
527
+ ctx.exit(1)
528
+
529
+
530
+ @click.command()
531
+ @click.option(
532
+ "--results",
533
+ type=click.Path(dir_okay=False, path_type=Path),
534
+ required=True,
535
+ help="Path to simulation results (JSONL)",
536
+ )
537
+ @click.option(
538
+ "--output",
539
+ type=click.Path(path_type=Path),
540
+ help="Output path for analysis report (JSON)",
541
+ )
542
+ @click.pass_context
543
+ def analyze(
544
+ ctx: click.Context,
545
+ results: Path,
546
+ output: Path | None,
547
+ ) -> None:
548
+ r"""Analyze simulation results.
549
+
550
+ Computes statistics and agreement metrics from simulation results,
551
+ including per-annotator statistics and inter-annotator agreement.
552
+
553
+ Parameters
554
+ ----------
555
+ ctx : click.Context
556
+ Click context object.
557
+ results : Path
558
+ Path to simulation results.
559
+ output : Path | None
560
+ Optional output path for report.
561
+
562
+ Examples
563
+ --------
564
+ $ bead simulate analyze \\
565
+ --results simulation_results.jsonl \\
566
+ --output analysis_report.json
567
+
568
+ $ bead simulate analyze \\
569
+ --results results.jsonl
570
+ """
571
+ try:
572
+ console.rule("[bold]Simulation Analysis[/bold]")
573
+
574
+ # Load results
575
+ print_info(f"Loading simulation results from {results}")
576
+ with open(results, encoding="utf-8") as f:
577
+ records = [json.loads(line) for line in f if line.strip()]
578
+
579
+ print_success(f"Loaded {len(records)} annotation records")
580
+
581
+ # Organize by item and annotator
582
+ # Annotations can be int, float, str, or list[str] depending on task type
583
+ items_dict: dict[str, dict[str, int | float | str | list[str]]] = {}
584
+ annotators_dict: dict[str, list[int | float | str | list[str]]] = {}
585
+
586
+ for record in records:
587
+ item_id = record["item_id"]
588
+ annotator_id = record["annotator_id"]
589
+ annotation = record["annotation"]
590
+
591
+ if item_id not in items_dict:
592
+ items_dict[item_id] = {}
593
+ items_dict[item_id][annotator_id] = annotation
594
+
595
+ if annotator_id not in annotators_dict:
596
+ annotators_dict[annotator_id] = []
597
+ annotators_dict[annotator_id].append(annotation)
598
+
599
+ n_items = len(items_dict)
600
+ n_annotators = len(annotators_dict)
601
+
602
+ print_info(f"Found {n_items} items and {n_annotators} annotators")
603
+
604
+ # Compute statistics
605
+ # Per-annotator statistics
606
+ annotator_stats: dict[str, dict[str, JsonValue]] = {}
607
+ for annotator_id, annotations in annotators_dict.items():
608
+ # Basic statistics (depends on annotation type)
609
+ if annotations and isinstance(annotations[0], int | float):
610
+ # Type narrowing: we know these are numeric now
611
+ numeric_annotations = [
612
+ a for a in annotations if isinstance(a, int | float)
613
+ ]
614
+ annotator_stats[annotator_id] = {
615
+ "count": len(annotations),
616
+ "mean": float(np.mean(numeric_annotations)),
617
+ "std": float(np.std(numeric_annotations)),
618
+ "min": float(np.min(numeric_annotations)),
619
+ "max": float(np.max(numeric_annotations)),
620
+ }
621
+ else:
622
+ counter = Counter(annotations)
623
+ # Convert Counter.most_common() result to JSON-serializable format
624
+ most_common_list = [
625
+ {"value": str(val), "count": count}
626
+ for val, count in counter.most_common(3)
627
+ ]
628
+ annotator_stats[annotator_id] = {
629
+ "count": len(annotations),
630
+ "unique_values": len(counter),
631
+ "most_common": most_common_list,
632
+ }
633
+
634
+ # Display summary
635
+ table = Table(title="Analysis Summary")
636
+ table.add_column("Metric", style="cyan")
637
+ table.add_column("Value", style="green", justify="right")
638
+
639
+ table.add_row("Total Items", str(n_items))
640
+ table.add_row("Total Annotators", str(n_annotators))
641
+ table.add_row("Total Annotations", str(len(records)))
642
+ table.add_row("Annotations per Item", f"{len(records) / n_items:.1f}")
643
+
644
+ console.print(table)
645
+
646
+ # Compute inter-annotator agreement if applicable
647
+ if n_annotators >= 2:
648
+ print_info("Computing inter-annotator agreement...")
649
+
650
+ # Organize for agreement computation
651
+ rater_data: dict[str, list[int | float | str | list[str] | None]] = {}
652
+ for item_id in sorted(items_dict.keys()):
653
+ for annotator_id in sorted(annotators_dict.keys()):
654
+ if annotator_id not in rater_data:
655
+ rater_data[annotator_id] = []
656
+ rater_data[annotator_id].append(
657
+ items_dict[item_id].get(annotator_id)
658
+ )
659
+
660
+ try:
661
+ # Type: ignore needed because InterAnnotatorMetrics expects
662
+ # specific Label type but annotations vary by task type
663
+ alpha = InterAnnotatorMetrics.krippendorff_alpha(
664
+ rater_data,
665
+ metric="nominal", # type: ignore[arg-type]
666
+ )
667
+
668
+ agreement_table = Table(title="Inter-Annotator Agreement")
669
+ agreement_table.add_column("Metric", style="cyan")
670
+ agreement_table.add_column("Value", style="green", justify="right")
671
+
672
+ agreement_table.add_row("Krippendorff's Alpha", f"{alpha:.4f}")
673
+
674
+ console.print(agreement_table)
675
+
676
+ annotator_stats["inter_annotator_agreement"] = {
677
+ "krippendorff_alpha": float(alpha)
678
+ }
679
+ except Exception as e:
680
+ print_info(f"Could not compute agreement: {e}")
681
+
682
+ # Save analysis report
683
+ if output:
684
+ analysis_report: dict[str, JsonValue] = {
685
+ "n_items": n_items,
686
+ "n_annotators": n_annotators,
687
+ "total_annotations": len(records),
688
+ "annotator_statistics": annotator_stats,
689
+ }
690
+
691
+ output.parent.mkdir(parents=True, exist_ok=True)
692
+ with open(output, "w", encoding="utf-8") as f:
693
+ json.dump(analysis_report, f, indent=2)
694
+
695
+ print_success(f"Analysis report saved to {output}")
696
+
697
+ print_success("Analysis complete!")
698
+
699
+ except FileNotFoundError as e:
700
+ print_error(f"File not found: {e}", exit_code=0)
701
+ ctx.exit(1)
702
+ except json.JSONDecodeError as e:
703
+ print_error(f"Invalid JSON: {e}", exit_code=0)
704
+ ctx.exit(1)
705
+ except Exception as e:
706
+ print_error(f"Analysis failed: {e}", exit_code=0)
707
+ ctx.exit(1)
708
+
709
+
710
+ @click.command()
711
+ def list_annotators() -> None:
712
+ """List available annotator types with descriptions.
713
+
714
+ Displays all available annotator strategies, their descriptions,
715
+ and typical use cases.
716
+
717
+ Examples
718
+ --------
719
+ $ bead simulate list-annotators
720
+ """
721
+ console.rule("[bold]Available Annotator Types[/bold]")
722
+
723
+ table = Table(show_header=True)
724
+ table.add_column("Strategy", style="cyan", width=15)
725
+ table.add_column("Description", style="green", width=50)
726
+ table.add_column("Use Case", style="yellow", width=30)
727
+
728
+ annotators = [
729
+ (
730
+ "lm_score",
731
+ "Uses language model scores from item.model_outputs",
732
+ "Simulate LM-based judgments",
733
+ ),
734
+ (
735
+ "distance",
736
+ "Uses distance metrics (embeddings, edit distance)",
737
+ "Similarity-based judgments",
738
+ ),
739
+ (
740
+ "random",
741
+ "Random selection from valid options",
742
+ "Baseline / control condition",
743
+ ),
744
+ (
745
+ "oracle",
746
+ "Uses ground truth labels (requires labels file)",
747
+ "Gold standard simulation",
748
+ ),
749
+ ]
750
+
751
+ for strategy, description, use_case in annotators:
752
+ table.add_row(strategy, description, use_case)
753
+
754
+ console.print(table)
755
+
756
+ # Print usage examples
757
+ console.print("\n[bold]Example Usage:[/bold]")
758
+ console.print(
759
+ " $ bead simulate run --items items.jsonl --annotator lm_score "
760
+ "--n-annotators 5"
761
+ )
762
+ console.print(
763
+ " $ bead simulate run --items items.jsonl --annotator oracle "
764
+ "--ground-truth labels.jsonl"
765
+ )
766
+
767
+
768
+ @click.command()
769
+ def list_noise_models() -> None:
770
+ """List available noise models with descriptions.
771
+
772
+ Displays all available noise model types, their parameters,
773
+ and effects on simulation results.
774
+
775
+ Examples
776
+ --------
777
+ $ bead simulate list-noise-models
778
+ """
779
+ console.rule("[bold]Available Noise Models[/bold]")
780
+
781
+ table = Table(show_header=True)
782
+ table.add_column("Noise Type", style="cyan", width=15)
783
+ table.add_column("Description", style="green", width=40)
784
+ table.add_column("Key Parameters", style="yellow", width=25)
785
+
786
+ noise_models = [
787
+ (
788
+ "temperature",
789
+ "Scales decision probabilities (higher = more random)",
790
+ "temperature (0.1-10.0)",
791
+ ),
792
+ (
793
+ "systematic",
794
+ "Applies systematic biases (length, frequency, position)",
795
+ "bias_strength, bias_type",
796
+ ),
797
+ (
798
+ "random",
799
+ "Adds Gaussian noise to scores",
800
+ "random_noise_stddev",
801
+ ),
802
+ (
803
+ "none",
804
+ "No noise (deterministic)",
805
+ "N/A",
806
+ ),
807
+ ]
808
+
809
+ for noise_type, description, parameters in noise_models:
810
+ table.add_row(noise_type, description, parameters)
811
+
812
+ console.print(table)
813
+
814
+ # Print parameter details
815
+ console.print("\n[bold]Parameter Details:[/bold]")
816
+ console.print(
817
+ " • temperature: Controls randomness (1.0 = unchanged, >1.0 = more random)"
818
+ )
819
+ console.print(" • bias_strength: Strength of systematic bias (0.0-1.0)")
820
+ console.print(" • bias_type: Type of bias (length/frequency/position)")
821
+ console.print(" • random_noise_stddev: Standard deviation for random noise")
822
+
823
+ # Print usage examples
824
+ console.print("\n[bold]Example Usage:[/bold]")
825
+ console.print(
826
+ " $ bead simulate run --items items.jsonl --noise-type temperature "
827
+ "--temperature 2.0"
828
+ )
829
+ console.print(
830
+ " $ bead simulate run --items items.jsonl --noise-type systematic "
831
+ "--bias-strength 0.3 --bias-type length"
832
+ )
833
+
834
+
835
+ # Register commands
836
+ simulate.add_command(run)
837
+ simulate.add_command(configure)
838
+ simulate.add_command(analyze)
839
+ simulate.add_command(list_annotators)
840
+ simulate.add_command(list_noise_models)