bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/items.py ADDED
@@ -0,0 +1,960 @@
1
+ """Item construction commands for bead CLI.
2
+
3
+ This module provides commands for constructing experimental items from filled
4
+ templates (Stage 3 of the bead pipeline).
5
+
6
+ Commands support:
7
+ - Full item construction with ItemTemplate specifications
8
+ - Model adapter integration (HuggingFace, OpenAI, Anthropic, Google, TogetherAI)
9
+ - Model output caching for efficiency
10
+ - Constraint-based filtering (DSL, extensional, intensional, relational)
11
+ - Batch processing with progress tracking
12
+ - Parallel execution for large-scale construction
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ from pathlib import Path
19
+ from typing import cast
20
+ from uuid import UUID
21
+
22
+ import click
23
+ from pydantic import ValidationError
24
+ from rich.console import Console
25
+ from rich.progress import Progress, SpinnerColumn, TextColumn
26
+ from rich.table import Table
27
+
28
+ from bead.cli.utils import print_error, print_info, print_success
29
+ from bead.items.adapters.registry import default_registry
30
+ from bead.items.cache import ModelOutputCache
31
+ from bead.items.constructor import ItemConstructor
32
+ from bead.items.item import Item
33
+ from bead.items.item_template import ItemTemplate, TaskType
34
+ from bead.items.validation import (
35
+ get_task_type_requirements,
36
+ infer_task_type_from_item,
37
+ validate_item_for_task_type,
38
+ )
39
+ from bead.resources.constraints import Constraint
40
+ from bead.templates.filler import FilledTemplate
41
+
42
+ console = Console()
43
+
44
+
45
+ # Helper functions for item construction
46
+
47
+
48
+ def _load_item_templates(template_file: Path) -> list[ItemTemplate]:
49
+ """Load ItemTemplate objects from JSONL file.
50
+
51
+ Parameters
52
+ ----------
53
+ template_file : Path
54
+ Path to ItemTemplate JSONL file.
55
+
56
+ Returns
57
+ -------
58
+ list[ItemTemplate]
59
+ List of loaded ItemTemplate objects.
60
+
61
+ Raises
62
+ ------
63
+ FileNotFoundError
64
+ If template file doesn't exist.
65
+ ValidationError
66
+ If template data is invalid.
67
+ """
68
+ templates: list[ItemTemplate] = []
69
+
70
+ with open(template_file, encoding="utf-8") as f:
71
+ for line_num, line in enumerate(f, start=1):
72
+ line = line.strip()
73
+ if not line:
74
+ continue
75
+
76
+ try:
77
+ template_data = json.loads(line)
78
+ template = ItemTemplate(**template_data)
79
+ templates.append(template)
80
+ except json.JSONDecodeError as e:
81
+ raise ValueError(f"Line {line_num}: Invalid JSON - {e}") from e
82
+ except ValidationError as e:
83
+ raise ValueError(f"Line {line_num}: Invalid ItemTemplate - {e}") from e
84
+
85
+ return templates
86
+
87
+
88
+ def _load_filled_templates(filled_file: Path) -> dict[UUID, FilledTemplate]:
89
+ """Load FilledTemplate objects from JSONL file.
90
+
91
+ Parameters
92
+ ----------
93
+ filled_file : Path
94
+ Path to FilledTemplate JSONL file.
95
+
96
+ Returns
97
+ -------
98
+ dict[UUID, FilledTemplate]
99
+ Map of FilledTemplate IDs to objects.
100
+
101
+ Raises
102
+ ------
103
+ FileNotFoundError
104
+ If filled templates file doesn't exist.
105
+ ValidationError
106
+ If filled template data is invalid.
107
+ """
108
+ filled_templates: dict[UUID, FilledTemplate] = {}
109
+
110
+ with open(filled_file, encoding="utf-8") as f:
111
+ for line_num, line in enumerate(f, start=1):
112
+ line = line.strip()
113
+ if not line:
114
+ continue
115
+
116
+ try:
117
+ filled_data = json.loads(line)
118
+ filled = FilledTemplate(**filled_data)
119
+ filled_templates[filled.id] = filled
120
+ except json.JSONDecodeError as e:
121
+ raise ValueError(f"Line {line_num}: Invalid JSON - {e}") from e
122
+ except ValidationError as e:
123
+ raise ValueError(
124
+ f"Line {line_num}: Invalid FilledTemplate - {e}"
125
+ ) from e
126
+
127
+ return filled_templates
128
+
129
+
130
+ def _load_constraints(constraints_file: Path) -> dict[UUID, Constraint]:
131
+ """Load Constraint objects from JSONL file.
132
+
133
+ Parameters
134
+ ----------
135
+ constraints_file : Path
136
+ Path to Constraint JSONL file.
137
+
138
+ Returns
139
+ -------
140
+ dict[UUID, Constraint]
141
+ Map of Constraint IDs to objects.
142
+
143
+ Raises
144
+ ------
145
+ FileNotFoundError
146
+ If constraints file doesn't exist.
147
+ ValidationError
148
+ If constraint data is invalid.
149
+ """
150
+ constraints: dict[UUID, Constraint] = {}
151
+
152
+ with open(constraints_file, encoding="utf-8") as f:
153
+ for line_num, line in enumerate(f, start=1):
154
+ line = line.strip()
155
+ if not line:
156
+ continue
157
+
158
+ try:
159
+ constraint_data = json.loads(line)
160
+ constraint = Constraint(**constraint_data) # type: ignore[misc]
161
+ constraints[constraint.id] = constraint # type: ignore[misc]
162
+ except json.JSONDecodeError as e:
163
+ raise ValueError(f"Line {line_num}: Invalid JSON - {e}") from e
164
+ except ValidationError as e:
165
+ raise ValueError(f"Line {line_num}: Invalid Constraint - {e}") from e
166
+
167
+ return constraints
168
+
169
+
170
+ def _setup_cache(
171
+ cache_dir: Path | None,
172
+ no_cache: bool,
173
+ ) -> ModelOutputCache:
174
+ """Set up model output cache.
175
+
176
+ Parameters
177
+ ----------
178
+ cache_dir : Path | None
179
+ Cache directory (None for default).
180
+ no_cache : bool
181
+ Whether to disable caching.
182
+
183
+ Returns
184
+ -------
185
+ ModelOutputCache
186
+ Configured cache instance.
187
+ """
188
+ if no_cache:
189
+ return ModelOutputCache(backend="memory", enabled=False)
190
+
191
+ if cache_dir:
192
+ return ModelOutputCache(cache_dir=cache_dir, backend="filesystem")
193
+
194
+ # Use default cache location
195
+ return ModelOutputCache(backend="filesystem")
196
+
197
+
198
+ def _display_construction_stats(
199
+ items: list[Item],
200
+ templates: list[ItemTemplate],
201
+ ) -> None:
202
+ """Display construction statistics.
203
+
204
+ Parameters
205
+ ----------
206
+ items : list[Item]
207
+ Constructed items.
208
+ templates : list[ItemTemplate]
209
+ ItemTemplates used for construction.
210
+ """
211
+ table = Table(title="Item Construction Statistics")
212
+ table.add_column("Metric", style="cyan")
213
+ table.add_column("Value", style="green", justify="right")
214
+
215
+ # Total items
216
+ table.add_row("Total Items Created", str(len(items)))
217
+ table.add_row("ItemTemplates Processed", str(len(templates)))
218
+ table.add_row("", "") # Separator
219
+
220
+ # Items per template
221
+ if templates:
222
+ items_per_template = len(items) / len(templates)
223
+ table.add_row("Avg Items per Template", f"{items_per_template:.1f}")
224
+
225
+ # Model outputs
226
+ total_model_outputs = sum(len(item.model_outputs) for item in items)
227
+ if total_model_outputs > 0:
228
+ table.add_row("Total Model Outputs", str(total_model_outputs))
229
+ avg_outputs_per_item = total_model_outputs / len(items) if items else 0
230
+ table.add_row("Avg Outputs per Item", f"{avg_outputs_per_item:.1f}")
231
+
232
+ # Constraint satisfaction
233
+ if items and items[0].constraint_satisfaction:
234
+ satisfied_count = sum(
235
+ 1
236
+ for item in items
237
+ for satisfied in item.constraint_satisfaction.values()
238
+ if satisfied
239
+ )
240
+ total_constraints = sum(len(item.constraint_satisfaction) for item in items)
241
+ if total_constraints > 0:
242
+ table.add_row("", "") # Separator
243
+ table.add_row("Constraints Satisfied", str(satisfied_count))
244
+ table.add_row("Total Constraint Checks", str(total_constraints))
245
+ satisfaction_rate = (satisfied_count / total_constraints) * 100
246
+ table.add_row("Satisfaction Rate", f"{satisfaction_rate:.1f}%")
247
+
248
+ console.print(table)
249
+
250
+
251
+ @click.group()
252
+ def items() -> None:
253
+ r"""Item construction commands (Stage 3).
254
+
255
+ Commands for constructing and managing experimental items.
256
+
257
+ \b
258
+ Examples:
259
+ $ bead items construct --item-template template.jsonl \
260
+ --filled-templates filled.jsonl --output items.jsonl
261
+ $ bead items list items.jsonl
262
+ $ bead items validate items.jsonl
263
+ $ bead items show-stats items.jsonl
264
+ """
265
+
266
+
267
+ @click.command()
268
+ @click.option(
269
+ "--item-template",
270
+ type=click.Path(exists=True, path_type=Path),
271
+ required=True,
272
+ help="Path to ItemTemplate JSONL file",
273
+ )
274
+ @click.option(
275
+ "--filled-templates",
276
+ type=click.Path(exists=True, path_type=Path),
277
+ required=True,
278
+ help="Path to filled templates JSONL file",
279
+ )
280
+ @click.option(
281
+ "--output",
282
+ type=click.Path(path_type=Path),
283
+ required=True,
284
+ help="Path to output items JSONL file",
285
+ )
286
+ @click.option(
287
+ "--constraints",
288
+ type=click.Path(exists=True, path_type=Path),
289
+ help="Path to constraints JSONL file (optional)",
290
+ )
291
+ @click.option(
292
+ "--cache-dir",
293
+ type=click.Path(path_type=Path),
294
+ help="Cache directory for model outputs",
295
+ )
296
+ @click.option(
297
+ "--no-cache",
298
+ is_flag=True,
299
+ help="Disable model output caching",
300
+ )
301
+ @click.option(
302
+ "--dry-run",
303
+ is_flag=True,
304
+ help="Preview construction without executing",
305
+ )
306
+ @click.pass_context
307
+ def construct(
308
+ ctx: click.Context,
309
+ item_template: Path,
310
+ filled_templates: Path,
311
+ output: Path,
312
+ constraints: Path | None,
313
+ cache_dir: Path | None,
314
+ no_cache: bool,
315
+ dry_run: bool,
316
+ ) -> None:
317
+ r"""Construct experimental items from filled templates.
318
+
319
+ Constructs items by combining filled templates according to ItemTemplate
320
+ specifications. Supports model-based constraints, caching, and batch processing.
321
+
322
+ Parameters
323
+ ----------
324
+ ctx : click.Context
325
+ Click context object.
326
+ item_template : Path
327
+ Path to ItemTemplate JSONL file.
328
+ filled_templates : Path
329
+ Path to filled templates JSONL file.
330
+ output : Path
331
+ Path to output items JSONL file.
332
+ constraints : Path | None
333
+ Path to constraints JSONL file (optional).
334
+ cache_dir : Path | None
335
+ Cache directory for model outputs.
336
+ no_cache : bool
337
+ Whether to disable caching.
338
+ dry_run : bool
339
+ Whether to preview without executing.
340
+
341
+ Examples
342
+ --------
343
+ # Basic construction
344
+ $ bead items construct \
345
+ --item-template templates.jsonl \
346
+ --filled-templates filled.jsonl \
347
+ --output items.jsonl
348
+
349
+ # With constraints
350
+ $ bead items construct \
351
+ --item-template templates.jsonl \
352
+ --filled-templates filled.jsonl \
353
+ --constraints constraints.jsonl \
354
+ --output items.jsonl
355
+
356
+ # With custom cache
357
+ $ bead items construct \
358
+ --item-template templates.jsonl \
359
+ --filled-templates filled.jsonl \
360
+ --output items.jsonl \
361
+ --cache-dir .cache/models
362
+
363
+ # Dry run
364
+ $ bead items construct \
365
+ --item-template templates.jsonl \
366
+ --filled-templates filled.jsonl \
367
+ --output items.jsonl \
368
+ --dry-run
369
+ """
370
+ try:
371
+ # Load ItemTemplates
372
+ print_info(f"Loading ItemTemplates from {item_template}")
373
+ templates = _load_item_templates(item_template)
374
+ print_info(f"Loaded {len(templates)} ItemTemplate(s)")
375
+
376
+ # Load filled templates
377
+ print_info(f"Loading filled templates from {filled_templates}")
378
+ filled_map = _load_filled_templates(filled_templates)
379
+ print_info(f"Loaded {len(filled_map)} filled template(s)")
380
+
381
+ # Load constraints if provided
382
+ constraints_map: dict[UUID, Constraint] = {}
383
+ if constraints:
384
+ print_info(f"Loading constraints from {constraints}")
385
+ constraints_map = _load_constraints(constraints)
386
+ print_info(f"Loaded {len(constraints_map)} constraint(s)")
387
+
388
+ # Validate constraint references
389
+ for template in templates:
390
+ for constraint_id in template.constraints:
391
+ if constraint_id not in constraints_map:
392
+ print_error(
393
+ f"ItemTemplate '{template.name}' references unknown "
394
+ f"constraint {constraint_id}"
395
+ )
396
+ ctx.exit(1)
397
+
398
+ # Dry run mode
399
+ if dry_run:
400
+ print_info("[DRY RUN] Construction preview:")
401
+ console.print(f" ItemTemplates: {len(templates)}")
402
+ console.print(f" Filled Templates: {len(filled_map)}")
403
+ console.print(f" Constraints: {len(constraints_map)}")
404
+ console.print(f" Output: {output}")
405
+ print_info("[DRY RUN] No items will be constructed")
406
+ return
407
+
408
+ # Set up cache
409
+ print_info("Setting up model output cache")
410
+ cache = _setup_cache(cache_dir, no_cache)
411
+
412
+ # Set up constructor
413
+ constructor = ItemConstructor(
414
+ model_registry=default_registry,
415
+ cache=cache,
416
+ )
417
+
418
+ # Construct items with progress
419
+ all_items: list[Item] = []
420
+
421
+ with Progress(
422
+ SpinnerColumn(),
423
+ TextColumn("[progress.description]{task.description}"),
424
+ console=console,
425
+ ) as progress:
426
+ task = progress.add_task(
427
+ f"Constructing items from {len(templates)} template(s)...",
428
+ total=len(templates),
429
+ )
430
+
431
+ for template in templates:
432
+ try:
433
+ # Construct items for this template
434
+ items = list(
435
+ constructor.construct_items(
436
+ template, filled_map, constraints_map
437
+ )
438
+ )
439
+ all_items.extend(items)
440
+ progress.advance(task)
441
+ except Exception as e:
442
+ print_error(
443
+ f"Failed to construct items for template '{template.name}': {e}"
444
+ )
445
+ continue
446
+
447
+ # Save items
448
+ output.parent.mkdir(parents=True, exist_ok=True)
449
+ with open(output, "w", encoding="utf-8") as f:
450
+ for item in all_items:
451
+ f.write(item.model_dump_json() + "\n")
452
+
453
+ print_success(f"Created {len(all_items)} item(s): {output}")
454
+
455
+ # Display statistics
456
+ if all_items:
457
+ _display_construction_stats(all_items, templates)
458
+
459
+ except FileNotFoundError as e:
460
+ print_error(f"File not found: {e}")
461
+ ctx.exit(1)
462
+ except ValidationError as e:
463
+ print_error(f"Validation error: {e}")
464
+ ctx.exit(1)
465
+ except ValueError as e:
466
+ print_error(str(e))
467
+ ctx.exit(1)
468
+ except Exception as e:
469
+ print_error(f"Failed to construct items: {e}")
470
+ ctx.exit(1)
471
+
472
+
473
+ @click.command(name="list")
474
+ @click.option(
475
+ "--directory",
476
+ type=click.Path(exists=True, file_okay=False, path_type=Path),
477
+ default=Path.cwd(),
478
+ help="Directory to search for item files",
479
+ )
480
+ @click.option(
481
+ "--pattern",
482
+ default="*.jsonl",
483
+ help="File pattern to match (default: *.jsonl)",
484
+ )
485
+ @click.pass_context
486
+ def list_items(
487
+ ctx: click.Context,
488
+ directory: Path,
489
+ pattern: str,
490
+ ) -> None:
491
+ """List item files in a directory.
492
+
493
+ Parameters
494
+ ----------
495
+ ctx : click.Context
496
+ Click context object.
497
+ directory : Path
498
+ Directory to search.
499
+ pattern : str
500
+ File pattern to match.
501
+
502
+ Examples
503
+ --------
504
+ $ bead items list
505
+ $ bead items list --directory items/
506
+ $ bead items list --pattern "experiment_*.jsonl"
507
+ """
508
+ try:
509
+ files = list(directory.glob(pattern))
510
+
511
+ if not files:
512
+ print_info(f"No files found in {directory} matching {pattern}")
513
+ return
514
+
515
+ table = Table(title=f"Items in {directory}")
516
+ table.add_column("File", style="cyan")
517
+ table.add_column("Count", justify="right", style="yellow")
518
+ table.add_column("Sample", style="white")
519
+
520
+ for file_path in sorted(files):
521
+ try:
522
+ with open(file_path, encoding="utf-8") as f:
523
+ lines = [line.strip() for line in f if line.strip()]
524
+
525
+ if not lines:
526
+ continue
527
+
528
+ count = len(lines)
529
+
530
+ # Parse first item for preview
531
+ first_data = json.loads(lines[0])
532
+ rendered = first_data.get("rendered_elements", {})
533
+
534
+ # Get first rendered element as sample
535
+ sample = "N/A"
536
+ if rendered:
537
+ first_key = next(iter(rendered))
538
+ sample = str(rendered[first_key])
539
+ if len(sample) > 40:
540
+ sample = sample[:37] + "..."
541
+
542
+ table.add_row(
543
+ str(file_path.name),
544
+ str(count),
545
+ sample,
546
+ )
547
+ except Exception:
548
+ continue
549
+
550
+ console.print(table)
551
+
552
+ except Exception as e:
553
+ print_error(f"Failed to list items: {e}")
554
+ ctx.exit(1)
555
+
556
+
557
+ @click.command()
558
+ @click.argument("items_file", type=click.Path(exists=True, path_type=Path))
559
+ @click.pass_context
560
+ def validate(ctx: click.Context, items_file: Path) -> None:
561
+ """Validate an items file.
562
+
563
+ Checks that all items are properly formatted.
564
+
565
+ Parameters
566
+ ----------
567
+ ctx : click.Context
568
+ Click context object.
569
+ items_file : Path
570
+ Path to items file.
571
+
572
+ Examples
573
+ --------
574
+ $ bead items validate items.jsonl
575
+ """
576
+ try:
577
+ print_info(f"Validating items: {items_file}")
578
+
579
+ count = 0
580
+ errors: list[str] = []
581
+
582
+ with open(items_file, encoding="utf-8") as f:
583
+ for line_num, line in enumerate(f, start=1):
584
+ line = line.strip()
585
+ if not line:
586
+ continue
587
+
588
+ try:
589
+ item_data = json.loads(line)
590
+ Item(**item_data)
591
+ count += 1
592
+ except json.JSONDecodeError as e:
593
+ errors.append(f"Line {line_num}: Invalid JSON - {e}")
594
+ except ValidationError as e:
595
+ errors.append(f"Line {line_num}: Validation error - {e}")
596
+
597
+ if errors:
598
+ print_error(f"Validation failed with {len(errors)} errors:")
599
+ for error in errors[:10]:
600
+ console.print(f" [red]✗[/red] {error}")
601
+ if len(errors) > 10:
602
+ console.print(f" ... and {len(errors) - 10} more errors")
603
+ ctx.exit(1)
604
+ else:
605
+ print_success(f"Items file is valid: {count} items")
606
+
607
+ except Exception as e:
608
+ print_error(f"Failed to validate items: {e}")
609
+ ctx.exit(1)
610
+
611
+
612
+ @click.command()
613
+ @click.argument("items_file", type=click.Path(exists=True, path_type=Path))
614
+ @click.pass_context
615
+ def show_stats(ctx: click.Context, items_file: Path) -> None:
616
+ """Show statistics about items.
617
+
618
+ Parameters
619
+ ----------
620
+ ctx : click.Context
621
+ Click context object.
622
+ items_file : Path
623
+ Path to items file.
624
+
625
+ Examples
626
+ --------
627
+ $ bead items show-stats items.jsonl
628
+ """
629
+ try:
630
+ print_info(f"Analyzing items: {items_file}")
631
+
632
+ total_count = 0
633
+ templates_seen: set[str] = set()
634
+ model_output_counts: dict[str, int] = {}
635
+
636
+ with open(items_file, encoding="utf-8") as f:
637
+ for line in f:
638
+ line = line.strip()
639
+ if not line:
640
+ continue
641
+
642
+ try:
643
+ item_data = json.loads(line)
644
+ item = Item(**item_data)
645
+
646
+ total_count += 1
647
+ templates_seen.add(str(item.item_template_id))
648
+
649
+ # Count model outputs
650
+ for output in item.model_outputs:
651
+ model_name = output.model_name
652
+ model_output_counts[model_name] = (
653
+ model_output_counts.get(model_name, 0) + 1
654
+ )
655
+
656
+ except Exception:
657
+ continue
658
+
659
+ if total_count == 0:
660
+ print_error("No valid items found")
661
+ ctx.exit(1)
662
+
663
+ # Display statistics
664
+ table = Table(title="Item Statistics")
665
+ table.add_column("Metric", style="cyan")
666
+ table.add_column("Value", style="green", justify="right")
667
+
668
+ table.add_row("Total Items", str(total_count))
669
+ table.add_row("Unique Templates", str(len(templates_seen)))
670
+ table.add_row("", "") # Separator
671
+
672
+ if model_output_counts:
673
+ for model_name, count in sorted(model_output_counts.items()):
674
+ table.add_row(f"Model Outputs: {model_name}", str(count))
675
+
676
+ console.print(table)
677
+
678
+ except Exception as e:
679
+ print_error(f"Failed to show statistics: {e}")
680
+ ctx.exit(1)
681
+
682
+
683
+ # Import task-type factory commands
684
+ from bead.cli.items_factories import ( # noqa: E402
685
+ create_binary_from_texts,
686
+ create_categorical,
687
+ create_forced_choice,
688
+ create_forced_choice_from_texts,
689
+ create_free_text_from_texts,
690
+ create_likert_7,
691
+ create_magnitude_from_texts,
692
+ create_multi_select_from_texts,
693
+ create_nli,
694
+ create_ordinal_scale_from_texts,
695
+ create_simple_cloze,
696
+ )
697
+
698
+ # Register core commands
699
+ items.add_command(construct)
700
+ items.add_command(list_items)
701
+ items.add_command(validate)
702
+ items.add_command(show_stats)
703
+
704
+ # Register task-type factory commands
705
+ items.add_command(create_forced_choice)
706
+ items.add_command(
707
+ create_forced_choice_from_texts, name="create-forced-choice-from-texts"
708
+ )
709
+ items.add_command(create_likert_7, name="create-likert-7")
710
+ items.add_command(
711
+ create_ordinal_scale_from_texts, name="create-ordinal-scale-from-texts"
712
+ )
713
+ items.add_command(create_nli)
714
+ items.add_command(create_categorical)
715
+ items.add_command(create_binary_from_texts, name="create-binary-from-texts")
716
+ items.add_command(create_multi_select_from_texts, name="create-multi-select-from-texts")
717
+ items.add_command(create_magnitude_from_texts, name="create-magnitude-from-texts")
718
+ items.add_command(create_free_text_from_texts, name="create-free-text-from-texts")
719
+ items.add_command(create_simple_cloze, name="create-simple-cloze")
720
+
721
+
722
+ # ==================== Validation Commands ====================
723
+
724
+
725
+ @items.command()
726
+ @click.argument("items_file", type=click.Path(exists=True, path_type=Path))
727
+ @click.option(
728
+ "--task-type",
729
+ type=click.Choice(
730
+ [
731
+ "forced_choice",
732
+ "ordinal_scale",
733
+ "categorical",
734
+ "binary",
735
+ "multi_select",
736
+ "magnitude",
737
+ "free_text",
738
+ "cloze",
739
+ ],
740
+ case_sensitive=False,
741
+ ),
742
+ required=True,
743
+ help="Task type to validate against",
744
+ )
745
+ @click.option(
746
+ "--strict",
747
+ is_flag=True,
748
+ help="Strict validation mode",
749
+ )
750
+ @click.pass_context
751
+ def validate_for_task_type(
752
+ ctx: click.Context,
753
+ items_file: Path,
754
+ task_type: str,
755
+ strict: bool,
756
+ ) -> None:
757
+ r"""Validate items for specific task type.
758
+
759
+ Examples
760
+ --------
761
+ $ bead items validate-for-task-type items.jsonl --task-type forced_choice
762
+
763
+ $ bead items validate-for-task-type items.jsonl \\
764
+ --task-type ordinal_scale --strict
765
+ """
766
+ try:
767
+ print_info(f"Validating items for task type: {task_type}")
768
+
769
+ # Cast string to TaskType literal (validated by Click Choice)
770
+ task_type_lit: TaskType = cast(TaskType, task_type)
771
+ valid_count: int = 0
772
+ invalid_count: int = 0
773
+ errors: list[str] = []
774
+
775
+ with open(items_file) as f:
776
+ for line_num, line in enumerate(f, start=1):
777
+ line = line.strip()
778
+ if not line:
779
+ continue
780
+
781
+ try:
782
+ item_data = json.loads(line)
783
+ item = Item(**item_data)
784
+
785
+ if validate_item_for_task_type(item, task_type_lit):
786
+ valid_count += 1
787
+ else:
788
+ invalid_count += 1
789
+ errors.append(f"Line {line_num}: Invalid for {task_type}")
790
+
791
+ except Exception as e:
792
+ invalid_count += 1
793
+ errors.append(f"Line {line_num}: {e}")
794
+
795
+ # Display results
796
+ table = Table(title="Validation Results")
797
+ table.add_column("Metric", style="cyan")
798
+ table.add_column("Count", justify="right", style="green")
799
+
800
+ table.add_row("Valid items", str(valid_count))
801
+ table.add_row(
802
+ "Invalid items",
803
+ str(invalid_count),
804
+ style="red" if invalid_count else "green",
805
+ )
806
+ table.add_row("Total", str(valid_count + invalid_count))
807
+
808
+ console.print(table)
809
+
810
+ # Show errors if any
811
+ if errors and strict:
812
+ print_error("Validation errors:")
813
+ for error in errors[:10]:
814
+ console.print(f" [red]✗[/red] {error}")
815
+ if len(errors) > 10:
816
+ console.print(f" ... and {len(errors) - 10} more errors")
817
+
818
+ if invalid_count > 0 and strict:
819
+ ctx.exit(1)
820
+ else:
821
+ print_success(f"Validation complete: {valid_count} valid items")
822
+
823
+ except Exception as e:
824
+ print_error(f"Failed to validate items: {e}")
825
+ ctx.exit(1)
826
+
827
+
828
+ @items.command()
829
+ @click.argument("items_file", type=click.Path(exists=True, path_type=Path))
830
+ @click.option(
831
+ "--output",
832
+ "-o",
833
+ type=click.Path(path_type=Path),
834
+ help="Output file for inferred types (JSONL)",
835
+ )
836
+ @click.pass_context
837
+ def infer_task_type(
838
+ ctx: click.Context,
839
+ items_file: Path,
840
+ output: Path | None,
841
+ ) -> None:
842
+ """Infer task type for each item.
843
+
844
+ Examples
845
+ --------
846
+ $ bead items infer-task-type items.jsonl
847
+
848
+ $ bead items infer-task-type items.jsonl --output types.jsonl
849
+ """
850
+ try:
851
+ print_info("Inferring task types...")
852
+
853
+ results: list[dict[str, str]] = []
854
+ type_counts: dict[str, int] = {}
855
+
856
+ with open(items_file) as f:
857
+ line: str
858
+ for line in f:
859
+ line = line.strip()
860
+ if not line:
861
+ continue
862
+
863
+ item: Item = Item(**json.loads(line))
864
+
865
+ try:
866
+ task_type_val: str = infer_task_type_from_item(item)
867
+ # task_type is already a string (Literal type), not enum
868
+ type_counts[task_type_val] = type_counts.get(task_type_val, 0) + 1
869
+ result_item: dict[str, str] = {
870
+ "item_id": str(item.id),
871
+ "task_type": task_type_val,
872
+ }
873
+ results.append(result_item)
874
+ except ValueError:
875
+ result_unknown: dict[str, str] = {
876
+ "item_id": str(item.id),
877
+ "task_type": "unknown",
878
+ }
879
+ results.append(result_unknown)
880
+ type_counts["unknown"] = type_counts.get("unknown", 0) + 1
881
+
882
+ # Display results
883
+ table = Table(title="Task Type Distribution")
884
+ table.add_column("Task Type", style="cyan")
885
+ table.add_column("Count", justify="right", style="green")
886
+
887
+ for task_type, count in sorted(type_counts.items()):
888
+ table.add_row(task_type, str(count))
889
+
890
+ console.print(table)
891
+
892
+ # Save if output specified
893
+ if output:
894
+ output.parent.mkdir(parents=True, exist_ok=True)
895
+ with open(output, "w") as f:
896
+ for result in results:
897
+ f.write(json.dumps(result) + "\n")
898
+ print_success(f"Saved task type inference results: {output}")
899
+
900
+ except Exception as e:
901
+ print_error(f"Failed to infer task types: {e}")
902
+ ctx.exit(1)
903
+
904
+
905
+ @items.command()
906
+ @click.option(
907
+ "--task-type",
908
+ type=click.Choice(
909
+ [
910
+ "forced_choice",
911
+ "ordinal_scale",
912
+ "categorical",
913
+ "binary",
914
+ "multi_select",
915
+ "magnitude",
916
+ "free_text",
917
+ "cloze",
918
+ ],
919
+ case_sensitive=False,
920
+ ),
921
+ required=True,
922
+ help="Task type",
923
+ )
924
+ def get_task_requirements(task_type: str) -> None:
925
+ """Get requirements for a task type.
926
+
927
+ Examples
928
+ --------
929
+ $ bead items get-task-requirements --task-type forced_choice
930
+
931
+ $ bead items get-task-requirements --task-type ordinal_scale
932
+ """
933
+ try:
934
+ # Cast string to TaskType literal (validated by Click Choice)
935
+ task_type_lit: TaskType = cast(TaskType, task_type)
936
+ requirements: dict[str, list[str] | str] = get_task_type_requirements(
937
+ task_type_lit
938
+ )
939
+
940
+ print_info(f"Requirements for task type: {task_type}")
941
+ console.print()
942
+
943
+ table = Table(show_header=False)
944
+ table.add_column("Key", style="cyan", no_wrap=True)
945
+ table.add_column("Value", style="white")
946
+
947
+ key: str
948
+ value: list[str] | str
949
+ for key, value in requirements.items():
950
+ if isinstance(value, list):
951
+ # Requirements lists contain strings
952
+ value_str: str = ", ".join(value)
953
+ else:
954
+ value_str = str(value)
955
+ table.add_row(key, value_str)
956
+
957
+ console.print(table)
958
+
959
+ except Exception as e:
960
+ print_error(f"Failed to get task requirements: {e}")