bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/items.py
ADDED
|
@@ -0,0 +1,960 @@
|
|
|
1
|
+
"""Item construction commands for bead CLI.
|
|
2
|
+
|
|
3
|
+
This module provides commands for constructing experimental items from filled
|
|
4
|
+
templates (Stage 3 of the bead pipeline).
|
|
5
|
+
|
|
6
|
+
Commands support:
|
|
7
|
+
- Full item construction with ItemTemplate specifications
|
|
8
|
+
- Model adapter integration (HuggingFace, OpenAI, Anthropic, Google, TogetherAI)
|
|
9
|
+
- Model output caching for efficiency
|
|
10
|
+
- Constraint-based filtering (DSL, extensional, intensional, relational)
|
|
11
|
+
- Batch processing with progress tracking
|
|
12
|
+
- Parallel execution for large-scale construction
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import cast
|
|
20
|
+
from uuid import UUID
|
|
21
|
+
|
|
22
|
+
import click
|
|
23
|
+
from pydantic import ValidationError
|
|
24
|
+
from rich.console import Console
|
|
25
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
26
|
+
from rich.table import Table
|
|
27
|
+
|
|
28
|
+
from bead.cli.utils import print_error, print_info, print_success
|
|
29
|
+
from bead.items.adapters.registry import default_registry
|
|
30
|
+
from bead.items.cache import ModelOutputCache
|
|
31
|
+
from bead.items.constructor import ItemConstructor
|
|
32
|
+
from bead.items.item import Item
|
|
33
|
+
from bead.items.item_template import ItemTemplate, TaskType
|
|
34
|
+
from bead.items.validation import (
|
|
35
|
+
get_task_type_requirements,
|
|
36
|
+
infer_task_type_from_item,
|
|
37
|
+
validate_item_for_task_type,
|
|
38
|
+
)
|
|
39
|
+
from bead.resources.constraints import Constraint
|
|
40
|
+
from bead.templates.filler import FilledTemplate
|
|
41
|
+
|
|
42
|
+
console = Console()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# Helper functions for item construction
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _load_item_templates(template_file: Path) -> list[ItemTemplate]:
|
|
49
|
+
"""Load ItemTemplate objects from JSONL file.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
template_file : Path
|
|
54
|
+
Path to ItemTemplate JSONL file.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
list[ItemTemplate]
|
|
59
|
+
List of loaded ItemTemplate objects.
|
|
60
|
+
|
|
61
|
+
Raises
|
|
62
|
+
------
|
|
63
|
+
FileNotFoundError
|
|
64
|
+
If template file doesn't exist.
|
|
65
|
+
ValidationError
|
|
66
|
+
If template data is invalid.
|
|
67
|
+
"""
|
|
68
|
+
templates: list[ItemTemplate] = []
|
|
69
|
+
|
|
70
|
+
with open(template_file, encoding="utf-8") as f:
|
|
71
|
+
for line_num, line in enumerate(f, start=1):
|
|
72
|
+
line = line.strip()
|
|
73
|
+
if not line:
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
template_data = json.loads(line)
|
|
78
|
+
template = ItemTemplate(**template_data)
|
|
79
|
+
templates.append(template)
|
|
80
|
+
except json.JSONDecodeError as e:
|
|
81
|
+
raise ValueError(f"Line {line_num}: Invalid JSON - {e}") from e
|
|
82
|
+
except ValidationError as e:
|
|
83
|
+
raise ValueError(f"Line {line_num}: Invalid ItemTemplate - {e}") from e
|
|
84
|
+
|
|
85
|
+
return templates
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _load_filled_templates(filled_file: Path) -> dict[UUID, FilledTemplate]:
|
|
89
|
+
"""Load FilledTemplate objects from JSONL file.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
filled_file : Path
|
|
94
|
+
Path to FilledTemplate JSONL file.
|
|
95
|
+
|
|
96
|
+
Returns
|
|
97
|
+
-------
|
|
98
|
+
dict[UUID, FilledTemplate]
|
|
99
|
+
Map of FilledTemplate IDs to objects.
|
|
100
|
+
|
|
101
|
+
Raises
|
|
102
|
+
------
|
|
103
|
+
FileNotFoundError
|
|
104
|
+
If filled templates file doesn't exist.
|
|
105
|
+
ValidationError
|
|
106
|
+
If filled template data is invalid.
|
|
107
|
+
"""
|
|
108
|
+
filled_templates: dict[UUID, FilledTemplate] = {}
|
|
109
|
+
|
|
110
|
+
with open(filled_file, encoding="utf-8") as f:
|
|
111
|
+
for line_num, line in enumerate(f, start=1):
|
|
112
|
+
line = line.strip()
|
|
113
|
+
if not line:
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
filled_data = json.loads(line)
|
|
118
|
+
filled = FilledTemplate(**filled_data)
|
|
119
|
+
filled_templates[filled.id] = filled
|
|
120
|
+
except json.JSONDecodeError as e:
|
|
121
|
+
raise ValueError(f"Line {line_num}: Invalid JSON - {e}") from e
|
|
122
|
+
except ValidationError as e:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
f"Line {line_num}: Invalid FilledTemplate - {e}"
|
|
125
|
+
) from e
|
|
126
|
+
|
|
127
|
+
return filled_templates
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _load_constraints(constraints_file: Path) -> dict[UUID, Constraint]:
|
|
131
|
+
"""Load Constraint objects from JSONL file.
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
constraints_file : Path
|
|
136
|
+
Path to Constraint JSONL file.
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
dict[UUID, Constraint]
|
|
141
|
+
Map of Constraint IDs to objects.
|
|
142
|
+
|
|
143
|
+
Raises
|
|
144
|
+
------
|
|
145
|
+
FileNotFoundError
|
|
146
|
+
If constraints file doesn't exist.
|
|
147
|
+
ValidationError
|
|
148
|
+
If constraint data is invalid.
|
|
149
|
+
"""
|
|
150
|
+
constraints: dict[UUID, Constraint] = {}
|
|
151
|
+
|
|
152
|
+
with open(constraints_file, encoding="utf-8") as f:
|
|
153
|
+
for line_num, line in enumerate(f, start=1):
|
|
154
|
+
line = line.strip()
|
|
155
|
+
if not line:
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
constraint_data = json.loads(line)
|
|
160
|
+
constraint = Constraint(**constraint_data) # type: ignore[misc]
|
|
161
|
+
constraints[constraint.id] = constraint # type: ignore[misc]
|
|
162
|
+
except json.JSONDecodeError as e:
|
|
163
|
+
raise ValueError(f"Line {line_num}: Invalid JSON - {e}") from e
|
|
164
|
+
except ValidationError as e:
|
|
165
|
+
raise ValueError(f"Line {line_num}: Invalid Constraint - {e}") from e
|
|
166
|
+
|
|
167
|
+
return constraints
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _setup_cache(
|
|
171
|
+
cache_dir: Path | None,
|
|
172
|
+
no_cache: bool,
|
|
173
|
+
) -> ModelOutputCache:
|
|
174
|
+
"""Set up model output cache.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
cache_dir : Path | None
|
|
179
|
+
Cache directory (None for default).
|
|
180
|
+
no_cache : bool
|
|
181
|
+
Whether to disable caching.
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
ModelOutputCache
|
|
186
|
+
Configured cache instance.
|
|
187
|
+
"""
|
|
188
|
+
if no_cache:
|
|
189
|
+
return ModelOutputCache(backend="memory", enabled=False)
|
|
190
|
+
|
|
191
|
+
if cache_dir:
|
|
192
|
+
return ModelOutputCache(cache_dir=cache_dir, backend="filesystem")
|
|
193
|
+
|
|
194
|
+
# Use default cache location
|
|
195
|
+
return ModelOutputCache(backend="filesystem")
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _display_construction_stats(
|
|
199
|
+
items: list[Item],
|
|
200
|
+
templates: list[ItemTemplate],
|
|
201
|
+
) -> None:
|
|
202
|
+
"""Display construction statistics.
|
|
203
|
+
|
|
204
|
+
Parameters
|
|
205
|
+
----------
|
|
206
|
+
items : list[Item]
|
|
207
|
+
Constructed items.
|
|
208
|
+
templates : list[ItemTemplate]
|
|
209
|
+
ItemTemplates used for construction.
|
|
210
|
+
"""
|
|
211
|
+
table = Table(title="Item Construction Statistics")
|
|
212
|
+
table.add_column("Metric", style="cyan")
|
|
213
|
+
table.add_column("Value", style="green", justify="right")
|
|
214
|
+
|
|
215
|
+
# Total items
|
|
216
|
+
table.add_row("Total Items Created", str(len(items)))
|
|
217
|
+
table.add_row("ItemTemplates Processed", str(len(templates)))
|
|
218
|
+
table.add_row("", "") # Separator
|
|
219
|
+
|
|
220
|
+
# Items per template
|
|
221
|
+
if templates:
|
|
222
|
+
items_per_template = len(items) / len(templates)
|
|
223
|
+
table.add_row("Avg Items per Template", f"{items_per_template:.1f}")
|
|
224
|
+
|
|
225
|
+
# Model outputs
|
|
226
|
+
total_model_outputs = sum(len(item.model_outputs) for item in items)
|
|
227
|
+
if total_model_outputs > 0:
|
|
228
|
+
table.add_row("Total Model Outputs", str(total_model_outputs))
|
|
229
|
+
avg_outputs_per_item = total_model_outputs / len(items) if items else 0
|
|
230
|
+
table.add_row("Avg Outputs per Item", f"{avg_outputs_per_item:.1f}")
|
|
231
|
+
|
|
232
|
+
# Constraint satisfaction
|
|
233
|
+
if items and items[0].constraint_satisfaction:
|
|
234
|
+
satisfied_count = sum(
|
|
235
|
+
1
|
|
236
|
+
for item in items
|
|
237
|
+
for satisfied in item.constraint_satisfaction.values()
|
|
238
|
+
if satisfied
|
|
239
|
+
)
|
|
240
|
+
total_constraints = sum(len(item.constraint_satisfaction) for item in items)
|
|
241
|
+
if total_constraints > 0:
|
|
242
|
+
table.add_row("", "") # Separator
|
|
243
|
+
table.add_row("Constraints Satisfied", str(satisfied_count))
|
|
244
|
+
table.add_row("Total Constraint Checks", str(total_constraints))
|
|
245
|
+
satisfaction_rate = (satisfied_count / total_constraints) * 100
|
|
246
|
+
table.add_row("Satisfaction Rate", f"{satisfaction_rate:.1f}%")
|
|
247
|
+
|
|
248
|
+
console.print(table)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@click.group()
|
|
252
|
+
def items() -> None:
|
|
253
|
+
r"""Item construction commands (Stage 3).
|
|
254
|
+
|
|
255
|
+
Commands for constructing and managing experimental items.
|
|
256
|
+
|
|
257
|
+
\b
|
|
258
|
+
Examples:
|
|
259
|
+
$ bead items construct --item-template template.jsonl \
|
|
260
|
+
--filled-templates filled.jsonl --output items.jsonl
|
|
261
|
+
$ bead items list items.jsonl
|
|
262
|
+
$ bead items validate items.jsonl
|
|
263
|
+
$ bead items show-stats items.jsonl
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
@click.command()
|
|
268
|
+
@click.option(
|
|
269
|
+
"--item-template",
|
|
270
|
+
type=click.Path(exists=True, path_type=Path),
|
|
271
|
+
required=True,
|
|
272
|
+
help="Path to ItemTemplate JSONL file",
|
|
273
|
+
)
|
|
274
|
+
@click.option(
|
|
275
|
+
"--filled-templates",
|
|
276
|
+
type=click.Path(exists=True, path_type=Path),
|
|
277
|
+
required=True,
|
|
278
|
+
help="Path to filled templates JSONL file",
|
|
279
|
+
)
|
|
280
|
+
@click.option(
|
|
281
|
+
"--output",
|
|
282
|
+
type=click.Path(path_type=Path),
|
|
283
|
+
required=True,
|
|
284
|
+
help="Path to output items JSONL file",
|
|
285
|
+
)
|
|
286
|
+
@click.option(
|
|
287
|
+
"--constraints",
|
|
288
|
+
type=click.Path(exists=True, path_type=Path),
|
|
289
|
+
help="Path to constraints JSONL file (optional)",
|
|
290
|
+
)
|
|
291
|
+
@click.option(
|
|
292
|
+
"--cache-dir",
|
|
293
|
+
type=click.Path(path_type=Path),
|
|
294
|
+
help="Cache directory for model outputs",
|
|
295
|
+
)
|
|
296
|
+
@click.option(
|
|
297
|
+
"--no-cache",
|
|
298
|
+
is_flag=True,
|
|
299
|
+
help="Disable model output caching",
|
|
300
|
+
)
|
|
301
|
+
@click.option(
|
|
302
|
+
"--dry-run",
|
|
303
|
+
is_flag=True,
|
|
304
|
+
help="Preview construction without executing",
|
|
305
|
+
)
|
|
306
|
+
@click.pass_context
|
|
307
|
+
def construct(
|
|
308
|
+
ctx: click.Context,
|
|
309
|
+
item_template: Path,
|
|
310
|
+
filled_templates: Path,
|
|
311
|
+
output: Path,
|
|
312
|
+
constraints: Path | None,
|
|
313
|
+
cache_dir: Path | None,
|
|
314
|
+
no_cache: bool,
|
|
315
|
+
dry_run: bool,
|
|
316
|
+
) -> None:
|
|
317
|
+
r"""Construct experimental items from filled templates.
|
|
318
|
+
|
|
319
|
+
Constructs items by combining filled templates according to ItemTemplate
|
|
320
|
+
specifications. Supports model-based constraints, caching, and batch processing.
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
ctx : click.Context
|
|
325
|
+
Click context object.
|
|
326
|
+
item_template : Path
|
|
327
|
+
Path to ItemTemplate JSONL file.
|
|
328
|
+
filled_templates : Path
|
|
329
|
+
Path to filled templates JSONL file.
|
|
330
|
+
output : Path
|
|
331
|
+
Path to output items JSONL file.
|
|
332
|
+
constraints : Path | None
|
|
333
|
+
Path to constraints JSONL file (optional).
|
|
334
|
+
cache_dir : Path | None
|
|
335
|
+
Cache directory for model outputs.
|
|
336
|
+
no_cache : bool
|
|
337
|
+
Whether to disable caching.
|
|
338
|
+
dry_run : bool
|
|
339
|
+
Whether to preview without executing.
|
|
340
|
+
|
|
341
|
+
Examples
|
|
342
|
+
--------
|
|
343
|
+
# Basic construction
|
|
344
|
+
$ bead items construct \
|
|
345
|
+
--item-template templates.jsonl \
|
|
346
|
+
--filled-templates filled.jsonl \
|
|
347
|
+
--output items.jsonl
|
|
348
|
+
|
|
349
|
+
# With constraints
|
|
350
|
+
$ bead items construct \
|
|
351
|
+
--item-template templates.jsonl \
|
|
352
|
+
--filled-templates filled.jsonl \
|
|
353
|
+
--constraints constraints.jsonl \
|
|
354
|
+
--output items.jsonl
|
|
355
|
+
|
|
356
|
+
# With custom cache
|
|
357
|
+
$ bead items construct \
|
|
358
|
+
--item-template templates.jsonl \
|
|
359
|
+
--filled-templates filled.jsonl \
|
|
360
|
+
--output items.jsonl \
|
|
361
|
+
--cache-dir .cache/models
|
|
362
|
+
|
|
363
|
+
# Dry run
|
|
364
|
+
$ bead items construct \
|
|
365
|
+
--item-template templates.jsonl \
|
|
366
|
+
--filled-templates filled.jsonl \
|
|
367
|
+
--output items.jsonl \
|
|
368
|
+
--dry-run
|
|
369
|
+
"""
|
|
370
|
+
try:
|
|
371
|
+
# Load ItemTemplates
|
|
372
|
+
print_info(f"Loading ItemTemplates from {item_template}")
|
|
373
|
+
templates = _load_item_templates(item_template)
|
|
374
|
+
print_info(f"Loaded {len(templates)} ItemTemplate(s)")
|
|
375
|
+
|
|
376
|
+
# Load filled templates
|
|
377
|
+
print_info(f"Loading filled templates from {filled_templates}")
|
|
378
|
+
filled_map = _load_filled_templates(filled_templates)
|
|
379
|
+
print_info(f"Loaded {len(filled_map)} filled template(s)")
|
|
380
|
+
|
|
381
|
+
# Load constraints if provided
|
|
382
|
+
constraints_map: dict[UUID, Constraint] = {}
|
|
383
|
+
if constraints:
|
|
384
|
+
print_info(f"Loading constraints from {constraints}")
|
|
385
|
+
constraints_map = _load_constraints(constraints)
|
|
386
|
+
print_info(f"Loaded {len(constraints_map)} constraint(s)")
|
|
387
|
+
|
|
388
|
+
# Validate constraint references
|
|
389
|
+
for template in templates:
|
|
390
|
+
for constraint_id in template.constraints:
|
|
391
|
+
if constraint_id not in constraints_map:
|
|
392
|
+
print_error(
|
|
393
|
+
f"ItemTemplate '{template.name}' references unknown "
|
|
394
|
+
f"constraint {constraint_id}"
|
|
395
|
+
)
|
|
396
|
+
ctx.exit(1)
|
|
397
|
+
|
|
398
|
+
# Dry run mode
|
|
399
|
+
if dry_run:
|
|
400
|
+
print_info("[DRY RUN] Construction preview:")
|
|
401
|
+
console.print(f" ItemTemplates: {len(templates)}")
|
|
402
|
+
console.print(f" Filled Templates: {len(filled_map)}")
|
|
403
|
+
console.print(f" Constraints: {len(constraints_map)}")
|
|
404
|
+
console.print(f" Output: {output}")
|
|
405
|
+
print_info("[DRY RUN] No items will be constructed")
|
|
406
|
+
return
|
|
407
|
+
|
|
408
|
+
# Set up cache
|
|
409
|
+
print_info("Setting up model output cache")
|
|
410
|
+
cache = _setup_cache(cache_dir, no_cache)
|
|
411
|
+
|
|
412
|
+
# Set up constructor
|
|
413
|
+
constructor = ItemConstructor(
|
|
414
|
+
model_registry=default_registry,
|
|
415
|
+
cache=cache,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# Construct items with progress
|
|
419
|
+
all_items: list[Item] = []
|
|
420
|
+
|
|
421
|
+
with Progress(
|
|
422
|
+
SpinnerColumn(),
|
|
423
|
+
TextColumn("[progress.description]{task.description}"),
|
|
424
|
+
console=console,
|
|
425
|
+
) as progress:
|
|
426
|
+
task = progress.add_task(
|
|
427
|
+
f"Constructing items from {len(templates)} template(s)...",
|
|
428
|
+
total=len(templates),
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
for template in templates:
|
|
432
|
+
try:
|
|
433
|
+
# Construct items for this template
|
|
434
|
+
items = list(
|
|
435
|
+
constructor.construct_items(
|
|
436
|
+
template, filled_map, constraints_map
|
|
437
|
+
)
|
|
438
|
+
)
|
|
439
|
+
all_items.extend(items)
|
|
440
|
+
progress.advance(task)
|
|
441
|
+
except Exception as e:
|
|
442
|
+
print_error(
|
|
443
|
+
f"Failed to construct items for template '{template.name}': {e}"
|
|
444
|
+
)
|
|
445
|
+
continue
|
|
446
|
+
|
|
447
|
+
# Save items
|
|
448
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
449
|
+
with open(output, "w", encoding="utf-8") as f:
|
|
450
|
+
for item in all_items:
|
|
451
|
+
f.write(item.model_dump_json() + "\n")
|
|
452
|
+
|
|
453
|
+
print_success(f"Created {len(all_items)} item(s): {output}")
|
|
454
|
+
|
|
455
|
+
# Display statistics
|
|
456
|
+
if all_items:
|
|
457
|
+
_display_construction_stats(all_items, templates)
|
|
458
|
+
|
|
459
|
+
except FileNotFoundError as e:
|
|
460
|
+
print_error(f"File not found: {e}")
|
|
461
|
+
ctx.exit(1)
|
|
462
|
+
except ValidationError as e:
|
|
463
|
+
print_error(f"Validation error: {e}")
|
|
464
|
+
ctx.exit(1)
|
|
465
|
+
except ValueError as e:
|
|
466
|
+
print_error(str(e))
|
|
467
|
+
ctx.exit(1)
|
|
468
|
+
except Exception as e:
|
|
469
|
+
print_error(f"Failed to construct items: {e}")
|
|
470
|
+
ctx.exit(1)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
@click.command(name="list")
|
|
474
|
+
@click.option(
|
|
475
|
+
"--directory",
|
|
476
|
+
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
|
477
|
+
default=Path.cwd(),
|
|
478
|
+
help="Directory to search for item files",
|
|
479
|
+
)
|
|
480
|
+
@click.option(
|
|
481
|
+
"--pattern",
|
|
482
|
+
default="*.jsonl",
|
|
483
|
+
help="File pattern to match (default: *.jsonl)",
|
|
484
|
+
)
|
|
485
|
+
@click.pass_context
|
|
486
|
+
def list_items(
|
|
487
|
+
ctx: click.Context,
|
|
488
|
+
directory: Path,
|
|
489
|
+
pattern: str,
|
|
490
|
+
) -> None:
|
|
491
|
+
"""List item files in a directory.
|
|
492
|
+
|
|
493
|
+
Parameters
|
|
494
|
+
----------
|
|
495
|
+
ctx : click.Context
|
|
496
|
+
Click context object.
|
|
497
|
+
directory : Path
|
|
498
|
+
Directory to search.
|
|
499
|
+
pattern : str
|
|
500
|
+
File pattern to match.
|
|
501
|
+
|
|
502
|
+
Examples
|
|
503
|
+
--------
|
|
504
|
+
$ bead items list
|
|
505
|
+
$ bead items list --directory items/
|
|
506
|
+
$ bead items list --pattern "experiment_*.jsonl"
|
|
507
|
+
"""
|
|
508
|
+
try:
|
|
509
|
+
files = list(directory.glob(pattern))
|
|
510
|
+
|
|
511
|
+
if not files:
|
|
512
|
+
print_info(f"No files found in {directory} matching {pattern}")
|
|
513
|
+
return
|
|
514
|
+
|
|
515
|
+
table = Table(title=f"Items in {directory}")
|
|
516
|
+
table.add_column("File", style="cyan")
|
|
517
|
+
table.add_column("Count", justify="right", style="yellow")
|
|
518
|
+
table.add_column("Sample", style="white")
|
|
519
|
+
|
|
520
|
+
for file_path in sorted(files):
|
|
521
|
+
try:
|
|
522
|
+
with open(file_path, encoding="utf-8") as f:
|
|
523
|
+
lines = [line.strip() for line in f if line.strip()]
|
|
524
|
+
|
|
525
|
+
if not lines:
|
|
526
|
+
continue
|
|
527
|
+
|
|
528
|
+
count = len(lines)
|
|
529
|
+
|
|
530
|
+
# Parse first item for preview
|
|
531
|
+
first_data = json.loads(lines[0])
|
|
532
|
+
rendered = first_data.get("rendered_elements", {})
|
|
533
|
+
|
|
534
|
+
# Get first rendered element as sample
|
|
535
|
+
sample = "N/A"
|
|
536
|
+
if rendered:
|
|
537
|
+
first_key = next(iter(rendered))
|
|
538
|
+
sample = str(rendered[first_key])
|
|
539
|
+
if len(sample) > 40:
|
|
540
|
+
sample = sample[:37] + "..."
|
|
541
|
+
|
|
542
|
+
table.add_row(
|
|
543
|
+
str(file_path.name),
|
|
544
|
+
str(count),
|
|
545
|
+
sample,
|
|
546
|
+
)
|
|
547
|
+
except Exception:
|
|
548
|
+
continue
|
|
549
|
+
|
|
550
|
+
console.print(table)
|
|
551
|
+
|
|
552
|
+
except Exception as e:
|
|
553
|
+
print_error(f"Failed to list items: {e}")
|
|
554
|
+
ctx.exit(1)
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
@click.command()
|
|
558
|
+
@click.argument("items_file", type=click.Path(exists=True, path_type=Path))
|
|
559
|
+
@click.pass_context
|
|
560
|
+
def validate(ctx: click.Context, items_file: Path) -> None:
|
|
561
|
+
"""Validate an items file.
|
|
562
|
+
|
|
563
|
+
Checks that all items are properly formatted.
|
|
564
|
+
|
|
565
|
+
Parameters
|
|
566
|
+
----------
|
|
567
|
+
ctx : click.Context
|
|
568
|
+
Click context object.
|
|
569
|
+
items_file : Path
|
|
570
|
+
Path to items file.
|
|
571
|
+
|
|
572
|
+
Examples
|
|
573
|
+
--------
|
|
574
|
+
$ bead items validate items.jsonl
|
|
575
|
+
"""
|
|
576
|
+
try:
|
|
577
|
+
print_info(f"Validating items: {items_file}")
|
|
578
|
+
|
|
579
|
+
count = 0
|
|
580
|
+
errors: list[str] = []
|
|
581
|
+
|
|
582
|
+
with open(items_file, encoding="utf-8") as f:
|
|
583
|
+
for line_num, line in enumerate(f, start=1):
|
|
584
|
+
line = line.strip()
|
|
585
|
+
if not line:
|
|
586
|
+
continue
|
|
587
|
+
|
|
588
|
+
try:
|
|
589
|
+
item_data = json.loads(line)
|
|
590
|
+
Item(**item_data)
|
|
591
|
+
count += 1
|
|
592
|
+
except json.JSONDecodeError as e:
|
|
593
|
+
errors.append(f"Line {line_num}: Invalid JSON - {e}")
|
|
594
|
+
except ValidationError as e:
|
|
595
|
+
errors.append(f"Line {line_num}: Validation error - {e}")
|
|
596
|
+
|
|
597
|
+
if errors:
|
|
598
|
+
print_error(f"Validation failed with {len(errors)} errors:")
|
|
599
|
+
for error in errors[:10]:
|
|
600
|
+
console.print(f" [red]✗[/red] {error}")
|
|
601
|
+
if len(errors) > 10:
|
|
602
|
+
console.print(f" ... and {len(errors) - 10} more errors")
|
|
603
|
+
ctx.exit(1)
|
|
604
|
+
else:
|
|
605
|
+
print_success(f"Items file is valid: {count} items")
|
|
606
|
+
|
|
607
|
+
except Exception as e:
|
|
608
|
+
print_error(f"Failed to validate items: {e}")
|
|
609
|
+
ctx.exit(1)
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
@click.command()
|
|
613
|
+
@click.argument("items_file", type=click.Path(exists=True, path_type=Path))
|
|
614
|
+
@click.pass_context
|
|
615
|
+
def show_stats(ctx: click.Context, items_file: Path) -> None:
|
|
616
|
+
"""Show statistics about items.
|
|
617
|
+
|
|
618
|
+
Parameters
|
|
619
|
+
----------
|
|
620
|
+
ctx : click.Context
|
|
621
|
+
Click context object.
|
|
622
|
+
items_file : Path
|
|
623
|
+
Path to items file.
|
|
624
|
+
|
|
625
|
+
Examples
|
|
626
|
+
--------
|
|
627
|
+
$ bead items show-stats items.jsonl
|
|
628
|
+
"""
|
|
629
|
+
try:
|
|
630
|
+
print_info(f"Analyzing items: {items_file}")
|
|
631
|
+
|
|
632
|
+
total_count = 0
|
|
633
|
+
templates_seen: set[str] = set()
|
|
634
|
+
model_output_counts: dict[str, int] = {}
|
|
635
|
+
|
|
636
|
+
with open(items_file, encoding="utf-8") as f:
|
|
637
|
+
for line in f:
|
|
638
|
+
line = line.strip()
|
|
639
|
+
if not line:
|
|
640
|
+
continue
|
|
641
|
+
|
|
642
|
+
try:
|
|
643
|
+
item_data = json.loads(line)
|
|
644
|
+
item = Item(**item_data)
|
|
645
|
+
|
|
646
|
+
total_count += 1
|
|
647
|
+
templates_seen.add(str(item.item_template_id))
|
|
648
|
+
|
|
649
|
+
# Count model outputs
|
|
650
|
+
for output in item.model_outputs:
|
|
651
|
+
model_name = output.model_name
|
|
652
|
+
model_output_counts[model_name] = (
|
|
653
|
+
model_output_counts.get(model_name, 0) + 1
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
except Exception:
|
|
657
|
+
continue
|
|
658
|
+
|
|
659
|
+
if total_count == 0:
|
|
660
|
+
print_error("No valid items found")
|
|
661
|
+
ctx.exit(1)
|
|
662
|
+
|
|
663
|
+
# Display statistics
|
|
664
|
+
table = Table(title="Item Statistics")
|
|
665
|
+
table.add_column("Metric", style="cyan")
|
|
666
|
+
table.add_column("Value", style="green", justify="right")
|
|
667
|
+
|
|
668
|
+
table.add_row("Total Items", str(total_count))
|
|
669
|
+
table.add_row("Unique Templates", str(len(templates_seen)))
|
|
670
|
+
table.add_row("", "") # Separator
|
|
671
|
+
|
|
672
|
+
if model_output_counts:
|
|
673
|
+
for model_name, count in sorted(model_output_counts.items()):
|
|
674
|
+
table.add_row(f"Model Outputs: {model_name}", str(count))
|
|
675
|
+
|
|
676
|
+
console.print(table)
|
|
677
|
+
|
|
678
|
+
except Exception as e:
|
|
679
|
+
print_error(f"Failed to show statistics: {e}")
|
|
680
|
+
ctx.exit(1)
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
# Import task-type factory commands
|
|
684
|
+
from bead.cli.items_factories import ( # noqa: E402
|
|
685
|
+
create_binary_from_texts,
|
|
686
|
+
create_categorical,
|
|
687
|
+
create_forced_choice,
|
|
688
|
+
create_forced_choice_from_texts,
|
|
689
|
+
create_free_text_from_texts,
|
|
690
|
+
create_likert_7,
|
|
691
|
+
create_magnitude_from_texts,
|
|
692
|
+
create_multi_select_from_texts,
|
|
693
|
+
create_nli,
|
|
694
|
+
create_ordinal_scale_from_texts,
|
|
695
|
+
create_simple_cloze,
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
# Register core commands
|
|
699
|
+
items.add_command(construct)
|
|
700
|
+
items.add_command(list_items)
|
|
701
|
+
items.add_command(validate)
|
|
702
|
+
items.add_command(show_stats)
|
|
703
|
+
|
|
704
|
+
# Register task-type factory commands
|
|
705
|
+
items.add_command(create_forced_choice)
|
|
706
|
+
items.add_command(
|
|
707
|
+
create_forced_choice_from_texts, name="create-forced-choice-from-texts"
|
|
708
|
+
)
|
|
709
|
+
items.add_command(create_likert_7, name="create-likert-7")
|
|
710
|
+
items.add_command(
|
|
711
|
+
create_ordinal_scale_from_texts, name="create-ordinal-scale-from-texts"
|
|
712
|
+
)
|
|
713
|
+
items.add_command(create_nli)
|
|
714
|
+
items.add_command(create_categorical)
|
|
715
|
+
items.add_command(create_binary_from_texts, name="create-binary-from-texts")
|
|
716
|
+
items.add_command(create_multi_select_from_texts, name="create-multi-select-from-texts")
|
|
717
|
+
items.add_command(create_magnitude_from_texts, name="create-magnitude-from-texts")
|
|
718
|
+
items.add_command(create_free_text_from_texts, name="create-free-text-from-texts")
|
|
719
|
+
items.add_command(create_simple_cloze, name="create-simple-cloze")
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
# ==================== Validation Commands ====================
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
@items.command()
|
|
726
|
+
@click.argument("items_file", type=click.Path(exists=True, path_type=Path))
|
|
727
|
+
@click.option(
|
|
728
|
+
"--task-type",
|
|
729
|
+
type=click.Choice(
|
|
730
|
+
[
|
|
731
|
+
"forced_choice",
|
|
732
|
+
"ordinal_scale",
|
|
733
|
+
"categorical",
|
|
734
|
+
"binary",
|
|
735
|
+
"multi_select",
|
|
736
|
+
"magnitude",
|
|
737
|
+
"free_text",
|
|
738
|
+
"cloze",
|
|
739
|
+
],
|
|
740
|
+
case_sensitive=False,
|
|
741
|
+
),
|
|
742
|
+
required=True,
|
|
743
|
+
help="Task type to validate against",
|
|
744
|
+
)
|
|
745
|
+
@click.option(
|
|
746
|
+
"--strict",
|
|
747
|
+
is_flag=True,
|
|
748
|
+
help="Strict validation mode",
|
|
749
|
+
)
|
|
750
|
+
@click.pass_context
|
|
751
|
+
def validate_for_task_type(
|
|
752
|
+
ctx: click.Context,
|
|
753
|
+
items_file: Path,
|
|
754
|
+
task_type: str,
|
|
755
|
+
strict: bool,
|
|
756
|
+
) -> None:
|
|
757
|
+
r"""Validate items for specific task type.
|
|
758
|
+
|
|
759
|
+
Examples
|
|
760
|
+
--------
|
|
761
|
+
$ bead items validate-for-task-type items.jsonl --task-type forced_choice
|
|
762
|
+
|
|
763
|
+
$ bead items validate-for-task-type items.jsonl \\
|
|
764
|
+
--task-type ordinal_scale --strict
|
|
765
|
+
"""
|
|
766
|
+
try:
|
|
767
|
+
print_info(f"Validating items for task type: {task_type}")
|
|
768
|
+
|
|
769
|
+
# Cast string to TaskType literal (validated by Click Choice)
|
|
770
|
+
task_type_lit: TaskType = cast(TaskType, task_type)
|
|
771
|
+
valid_count: int = 0
|
|
772
|
+
invalid_count: int = 0
|
|
773
|
+
errors: list[str] = []
|
|
774
|
+
|
|
775
|
+
with open(items_file) as f:
|
|
776
|
+
for line_num, line in enumerate(f, start=1):
|
|
777
|
+
line = line.strip()
|
|
778
|
+
if not line:
|
|
779
|
+
continue
|
|
780
|
+
|
|
781
|
+
try:
|
|
782
|
+
item_data = json.loads(line)
|
|
783
|
+
item = Item(**item_data)
|
|
784
|
+
|
|
785
|
+
if validate_item_for_task_type(item, task_type_lit):
|
|
786
|
+
valid_count += 1
|
|
787
|
+
else:
|
|
788
|
+
invalid_count += 1
|
|
789
|
+
errors.append(f"Line {line_num}: Invalid for {task_type}")
|
|
790
|
+
|
|
791
|
+
except Exception as e:
|
|
792
|
+
invalid_count += 1
|
|
793
|
+
errors.append(f"Line {line_num}: {e}")
|
|
794
|
+
|
|
795
|
+
# Display results
|
|
796
|
+
table = Table(title="Validation Results")
|
|
797
|
+
table.add_column("Metric", style="cyan")
|
|
798
|
+
table.add_column("Count", justify="right", style="green")
|
|
799
|
+
|
|
800
|
+
table.add_row("Valid items", str(valid_count))
|
|
801
|
+
table.add_row(
|
|
802
|
+
"Invalid items",
|
|
803
|
+
str(invalid_count),
|
|
804
|
+
style="red" if invalid_count else "green",
|
|
805
|
+
)
|
|
806
|
+
table.add_row("Total", str(valid_count + invalid_count))
|
|
807
|
+
|
|
808
|
+
console.print(table)
|
|
809
|
+
|
|
810
|
+
# Show errors if any
|
|
811
|
+
if errors and strict:
|
|
812
|
+
print_error("Validation errors:")
|
|
813
|
+
for error in errors[:10]:
|
|
814
|
+
console.print(f" [red]✗[/red] {error}")
|
|
815
|
+
if len(errors) > 10:
|
|
816
|
+
console.print(f" ... and {len(errors) - 10} more errors")
|
|
817
|
+
|
|
818
|
+
if invalid_count > 0 and strict:
|
|
819
|
+
ctx.exit(1)
|
|
820
|
+
else:
|
|
821
|
+
print_success(f"Validation complete: {valid_count} valid items")
|
|
822
|
+
|
|
823
|
+
except Exception as e:
|
|
824
|
+
print_error(f"Failed to validate items: {e}")
|
|
825
|
+
ctx.exit(1)
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
@items.command()
|
|
829
|
+
@click.argument("items_file", type=click.Path(exists=True, path_type=Path))
|
|
830
|
+
@click.option(
|
|
831
|
+
"--output",
|
|
832
|
+
"-o",
|
|
833
|
+
type=click.Path(path_type=Path),
|
|
834
|
+
help="Output file for inferred types (JSONL)",
|
|
835
|
+
)
|
|
836
|
+
@click.pass_context
|
|
837
|
+
def infer_task_type(
|
|
838
|
+
ctx: click.Context,
|
|
839
|
+
items_file: Path,
|
|
840
|
+
output: Path | None,
|
|
841
|
+
) -> None:
|
|
842
|
+
"""Infer task type for each item.
|
|
843
|
+
|
|
844
|
+
Examples
|
|
845
|
+
--------
|
|
846
|
+
$ bead items infer-task-type items.jsonl
|
|
847
|
+
|
|
848
|
+
$ bead items infer-task-type items.jsonl --output types.jsonl
|
|
849
|
+
"""
|
|
850
|
+
try:
|
|
851
|
+
print_info("Inferring task types...")
|
|
852
|
+
|
|
853
|
+
results: list[dict[str, str]] = []
|
|
854
|
+
type_counts: dict[str, int] = {}
|
|
855
|
+
|
|
856
|
+
with open(items_file) as f:
|
|
857
|
+
line: str
|
|
858
|
+
for line in f:
|
|
859
|
+
line = line.strip()
|
|
860
|
+
if not line:
|
|
861
|
+
continue
|
|
862
|
+
|
|
863
|
+
item: Item = Item(**json.loads(line))
|
|
864
|
+
|
|
865
|
+
try:
|
|
866
|
+
task_type_val: str = infer_task_type_from_item(item)
|
|
867
|
+
# task_type is already a string (Literal type), not enum
|
|
868
|
+
type_counts[task_type_val] = type_counts.get(task_type_val, 0) + 1
|
|
869
|
+
result_item: dict[str, str] = {
|
|
870
|
+
"item_id": str(item.id),
|
|
871
|
+
"task_type": task_type_val,
|
|
872
|
+
}
|
|
873
|
+
results.append(result_item)
|
|
874
|
+
except ValueError:
|
|
875
|
+
result_unknown: dict[str, str] = {
|
|
876
|
+
"item_id": str(item.id),
|
|
877
|
+
"task_type": "unknown",
|
|
878
|
+
}
|
|
879
|
+
results.append(result_unknown)
|
|
880
|
+
type_counts["unknown"] = type_counts.get("unknown", 0) + 1
|
|
881
|
+
|
|
882
|
+
# Display results
|
|
883
|
+
table = Table(title="Task Type Distribution")
|
|
884
|
+
table.add_column("Task Type", style="cyan")
|
|
885
|
+
table.add_column("Count", justify="right", style="green")
|
|
886
|
+
|
|
887
|
+
for task_type, count in sorted(type_counts.items()):
|
|
888
|
+
table.add_row(task_type, str(count))
|
|
889
|
+
|
|
890
|
+
console.print(table)
|
|
891
|
+
|
|
892
|
+
# Save if output specified
|
|
893
|
+
if output:
|
|
894
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
895
|
+
with open(output, "w") as f:
|
|
896
|
+
for result in results:
|
|
897
|
+
f.write(json.dumps(result) + "\n")
|
|
898
|
+
print_success(f"Saved task type inference results: {output}")
|
|
899
|
+
|
|
900
|
+
except Exception as e:
|
|
901
|
+
print_error(f"Failed to infer task types: {e}")
|
|
902
|
+
ctx.exit(1)
|
|
903
|
+
|
|
904
|
+
|
|
905
|
+
@items.command()
|
|
906
|
+
@click.option(
|
|
907
|
+
"--task-type",
|
|
908
|
+
type=click.Choice(
|
|
909
|
+
[
|
|
910
|
+
"forced_choice",
|
|
911
|
+
"ordinal_scale",
|
|
912
|
+
"categorical",
|
|
913
|
+
"binary",
|
|
914
|
+
"multi_select",
|
|
915
|
+
"magnitude",
|
|
916
|
+
"free_text",
|
|
917
|
+
"cloze",
|
|
918
|
+
],
|
|
919
|
+
case_sensitive=False,
|
|
920
|
+
),
|
|
921
|
+
required=True,
|
|
922
|
+
help="Task type",
|
|
923
|
+
)
|
|
924
|
+
def get_task_requirements(task_type: str) -> None:
|
|
925
|
+
"""Get requirements for a task type.
|
|
926
|
+
|
|
927
|
+
Examples
|
|
928
|
+
--------
|
|
929
|
+
$ bead items get-task-requirements --task-type forced_choice
|
|
930
|
+
|
|
931
|
+
$ bead items get-task-requirements --task-type ordinal_scale
|
|
932
|
+
"""
|
|
933
|
+
try:
|
|
934
|
+
# Cast string to TaskType literal (validated by Click Choice)
|
|
935
|
+
task_type_lit: TaskType = cast(TaskType, task_type)
|
|
936
|
+
requirements: dict[str, list[str] | str] = get_task_type_requirements(
|
|
937
|
+
task_type_lit
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
print_info(f"Requirements for task type: {task_type}")
|
|
941
|
+
console.print()
|
|
942
|
+
|
|
943
|
+
table = Table(show_header=False)
|
|
944
|
+
table.add_column("Key", style="cyan", no_wrap=True)
|
|
945
|
+
table.add_column("Value", style="white")
|
|
946
|
+
|
|
947
|
+
key: str
|
|
948
|
+
value: list[str] | str
|
|
949
|
+
for key, value in requirements.items():
|
|
950
|
+
if isinstance(value, list):
|
|
951
|
+
# Requirements lists contain strings
|
|
952
|
+
value_str: str = ", ".join(value)
|
|
953
|
+
else:
|
|
954
|
+
value_str = str(value)
|
|
955
|
+
table.add_row(key, value_str)
|
|
956
|
+
|
|
957
|
+
console.print(table)
|
|
958
|
+
|
|
959
|
+
except Exception as e:
|
|
960
|
+
print_error(f"Failed to get task requirements: {e}")
|