bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/workflow.py
ADDED
|
@@ -0,0 +1,1273 @@
|
|
|
1
|
+
"""Workflow orchestration commands for the bead CLI.
|
|
2
|
+
|
|
3
|
+
This module provides commands for managing end-to-end pipeline workflows,
|
|
4
|
+
including running complete pipelines, resuming interrupted workflows, and
|
|
5
|
+
rolling back to previous stages.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import shutil
|
|
12
|
+
import subprocess
|
|
13
|
+
import sys
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import click
|
|
18
|
+
import yaml
|
|
19
|
+
from rich.console import Console
|
|
20
|
+
from rich.table import Table
|
|
21
|
+
|
|
22
|
+
from bead.cli.utils import print_error, print_info, print_success
|
|
23
|
+
from bead.data.base import JsonValue
|
|
24
|
+
|
|
25
|
+
console = Console()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# ============================================================================
|
|
29
|
+
# State Management Utilities
|
|
30
|
+
# ============================================================================
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_state_file(project_dir: Path) -> Path:
|
|
34
|
+
"""Get path to workflow state file.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
project_dir : Path
|
|
39
|
+
Project directory path.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
Path
|
|
44
|
+
Path to .bead/workflow_state.json
|
|
45
|
+
"""
|
|
46
|
+
bead_dir = project_dir / ".bead"
|
|
47
|
+
bead_dir.mkdir(exist_ok=True)
|
|
48
|
+
return bead_dir / "workflow_state.json"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def load_state(project_dir: Path) -> dict[str, JsonValue]:
|
|
52
|
+
"""Load workflow state from file.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
project_dir : Path
|
|
57
|
+
Project directory path.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
dict[str, JsonValue]
|
|
62
|
+
Workflow state dictionary.
|
|
63
|
+
"""
|
|
64
|
+
state_file = get_state_file(project_dir)
|
|
65
|
+
if not state_file.exists():
|
|
66
|
+
return {"stages": {}, "last_run": None}
|
|
67
|
+
|
|
68
|
+
with open(state_file) as f:
|
|
69
|
+
return json.load(f)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def save_state(project_dir: Path, state: dict[str, JsonValue]) -> None:
|
|
73
|
+
"""Save workflow state to file.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
project_dir : Path
|
|
78
|
+
Project directory path.
|
|
79
|
+
state : dict[str, JsonValue]
|
|
80
|
+
Workflow state dictionary.
|
|
81
|
+
"""
|
|
82
|
+
state_file = get_state_file(project_dir)
|
|
83
|
+
with open(state_file, "w") as f:
|
|
84
|
+
json.dump(state, f, indent=2, default=str)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def update_stage_state(
|
|
88
|
+
project_dir: Path, stage: str, status: str, error: str | None = None
|
|
89
|
+
) -> None:
|
|
90
|
+
"""Update state for a specific stage.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
project_dir : Path
|
|
95
|
+
Project directory path.
|
|
96
|
+
stage : str
|
|
97
|
+
Stage name.
|
|
98
|
+
status : str
|
|
99
|
+
Stage status ('pending', 'running', 'completed', 'failed').
|
|
100
|
+
error : str | None
|
|
101
|
+
Error message if status is 'failed'.
|
|
102
|
+
"""
|
|
103
|
+
state = load_state(project_dir)
|
|
104
|
+
if not isinstance(state.get("stages"), dict):
|
|
105
|
+
state["stages"] = {}
|
|
106
|
+
|
|
107
|
+
stages_dict = state["stages"]
|
|
108
|
+
assert isinstance(stages_dict, dict)
|
|
109
|
+
|
|
110
|
+
stages_dict[stage] = {
|
|
111
|
+
"status": status,
|
|
112
|
+
"timestamp": datetime.now().isoformat(),
|
|
113
|
+
"error": error,
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
state["last_run"] = datetime.now().isoformat()
|
|
117
|
+
save_state(project_dir, state)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def detect_stage_completion(project_dir: Path, stage: str) -> bool:
|
|
121
|
+
"""Detect if a stage has been completed by checking filesystem.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
project_dir : Path
|
|
126
|
+
Project directory path.
|
|
127
|
+
stage : str
|
|
128
|
+
Stage name.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
bool
|
|
133
|
+
True if stage appears completed.
|
|
134
|
+
"""
|
|
135
|
+
# Check for expected output files
|
|
136
|
+
if stage == "resources":
|
|
137
|
+
return (project_dir / "lexicons").exists() and any(
|
|
138
|
+
(project_dir / "lexicons").glob("*.jsonl")
|
|
139
|
+
)
|
|
140
|
+
elif stage == "templates":
|
|
141
|
+
return (project_dir / "templates").exists() and any(
|
|
142
|
+
(project_dir / "templates").glob("*.jsonl")
|
|
143
|
+
)
|
|
144
|
+
elif stage == "items":
|
|
145
|
+
return (project_dir / "items").exists() and any(
|
|
146
|
+
(project_dir / "items").glob("*.jsonl")
|
|
147
|
+
)
|
|
148
|
+
elif stage == "lists":
|
|
149
|
+
return (project_dir / "lists").exists() and any(
|
|
150
|
+
(project_dir / "lists").glob("*.jsonl")
|
|
151
|
+
)
|
|
152
|
+
elif stage == "deployment":
|
|
153
|
+
return (project_dir / "experiments").exists() and any(
|
|
154
|
+
(project_dir / "experiments").iterdir()
|
|
155
|
+
)
|
|
156
|
+
elif stage == "training":
|
|
157
|
+
return (project_dir / "models").exists() and any(
|
|
158
|
+
(project_dir / "models").iterdir()
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# ============================================================================
|
|
165
|
+
# Workflow Templates
|
|
166
|
+
# ============================================================================
|
|
167
|
+
|
|
168
|
+
WORKFLOW_TEMPLATES = {
|
|
169
|
+
"acceptability-study": {
|
|
170
|
+
"name": "Acceptability Judgment Study",
|
|
171
|
+
"description": "Collect acceptability judgments on linguistic stimuli",
|
|
172
|
+
"config": {
|
|
173
|
+
"project": {
|
|
174
|
+
"name": "acceptability_study",
|
|
175
|
+
"language_code": "eng",
|
|
176
|
+
"description": "Acceptability judgment experiment",
|
|
177
|
+
},
|
|
178
|
+
"paths": {
|
|
179
|
+
"lexicons_dir": "lexicons",
|
|
180
|
+
"templates_dir": "templates",
|
|
181
|
+
"items_dir": "items",
|
|
182
|
+
"lists_dir": "lists",
|
|
183
|
+
"experiments_dir": "experiments",
|
|
184
|
+
},
|
|
185
|
+
"templates": {"filling_strategy": "exhaustive"},
|
|
186
|
+
"items": {"validation_enabled": True},
|
|
187
|
+
"lists": {"n_lists": 10},
|
|
188
|
+
"deployment": {"platform": "jatos", "jspsych_version": "8.0.0"},
|
|
189
|
+
},
|
|
190
|
+
},
|
|
191
|
+
"forced-choice": {
|
|
192
|
+
"name": "Forced Choice Study",
|
|
193
|
+
"description": "2AFC or 3AFC comparison judgments",
|
|
194
|
+
"config": {
|
|
195
|
+
"project": {
|
|
196
|
+
"name": "forced_choice_study",
|
|
197
|
+
"language_code": "eng",
|
|
198
|
+
"description": "Forced choice experiment",
|
|
199
|
+
},
|
|
200
|
+
"paths": {
|
|
201
|
+
"lexicons_dir": "lexicons",
|
|
202
|
+
"templates_dir": "templates",
|
|
203
|
+
"items_dir": "items",
|
|
204
|
+
"lists_dir": "lists",
|
|
205
|
+
"experiments_dir": "experiments",
|
|
206
|
+
},
|
|
207
|
+
"templates": {"filling_strategy": "stratified"},
|
|
208
|
+
"items": {"validation_enabled": True},
|
|
209
|
+
"lists": {"n_lists": 20},
|
|
210
|
+
"deployment": {"platform": "jatos", "jspsych_version": "8.0.0"},
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
"ordinal-scale": {
|
|
214
|
+
"name": "Ordinal Scale Study",
|
|
215
|
+
"description": "Likert scale or slider ratings",
|
|
216
|
+
"config": {
|
|
217
|
+
"project": {
|
|
218
|
+
"name": "ordinal_scale_study",
|
|
219
|
+
"language_code": "eng",
|
|
220
|
+
"description": "Ordinal scale experiment",
|
|
221
|
+
},
|
|
222
|
+
"paths": {
|
|
223
|
+
"lexicons_dir": "lexicons",
|
|
224
|
+
"templates_dir": "templates",
|
|
225
|
+
"items_dir": "items",
|
|
226
|
+
"lists_dir": "lists",
|
|
227
|
+
"experiments_dir": "experiments",
|
|
228
|
+
},
|
|
229
|
+
"templates": {"filling_strategy": "random", "max_combinations": 500},
|
|
230
|
+
"items": {"validation_enabled": True},
|
|
231
|
+
"lists": {"n_lists": 15},
|
|
232
|
+
"deployment": {"platform": "jatos", "jspsych_version": "8.0.0"},
|
|
233
|
+
},
|
|
234
|
+
},
|
|
235
|
+
"active-learning": {
|
|
236
|
+
"name": "Active Learning Study",
|
|
237
|
+
"description": "Human-in-the-loop training with convergence detection",
|
|
238
|
+
"config": {
|
|
239
|
+
"project": {
|
|
240
|
+
"name": "active_learning_study",
|
|
241
|
+
"language_code": "eng",
|
|
242
|
+
"description": "Active learning experiment",
|
|
243
|
+
},
|
|
244
|
+
"paths": {
|
|
245
|
+
"lexicons_dir": "lexicons",
|
|
246
|
+
"templates_dir": "templates",
|
|
247
|
+
"items_dir": "items",
|
|
248
|
+
"lists_dir": "lists",
|
|
249
|
+
"experiments_dir": "experiments",
|
|
250
|
+
"models_dir": "models",
|
|
251
|
+
},
|
|
252
|
+
"templates": {"filling_strategy": "stratified"},
|
|
253
|
+
"items": {"validation_enabled": True},
|
|
254
|
+
"lists": {"n_lists": 10},
|
|
255
|
+
"deployment": {"platform": "jatos", "jspsych_version": "8.0.0"},
|
|
256
|
+
"training": {
|
|
257
|
+
"framework": "huggingface",
|
|
258
|
+
"epochs": 10,
|
|
259
|
+
"convergence": {"metric": "krippendorff_alpha", "threshold": 0.80},
|
|
260
|
+
},
|
|
261
|
+
},
|
|
262
|
+
},
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
# ============================================================================
|
|
267
|
+
# Stage Execution Utilities
|
|
268
|
+
# ============================================================================
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _execute_stage(
|
|
272
|
+
stage: str,
|
|
273
|
+
config: dict[str, JsonValue],
|
|
274
|
+
project_dir: Path,
|
|
275
|
+
verbose: bool,
|
|
276
|
+
) -> None:
|
|
277
|
+
"""Execute a specific pipeline stage.
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
stage : str
|
|
282
|
+
Stage name ('resources', 'templates', 'items', 'lists',
|
|
283
|
+
'deployment', 'training').
|
|
284
|
+
config : dict[str, JsonValue]
|
|
285
|
+
Configuration dictionary from YAML.
|
|
286
|
+
project_dir : Path
|
|
287
|
+
Project directory path.
|
|
288
|
+
verbose : bool
|
|
289
|
+
Whether to show detailed command output.
|
|
290
|
+
|
|
291
|
+
Raises
|
|
292
|
+
------
|
|
293
|
+
RuntimeError
|
|
294
|
+
If stage execution fails.
|
|
295
|
+
"""
|
|
296
|
+
if stage == "resources":
|
|
297
|
+
_execute_resources_stage(config, project_dir, verbose)
|
|
298
|
+
elif stage == "templates":
|
|
299
|
+
_execute_templates_stage(config, project_dir, verbose)
|
|
300
|
+
elif stage == "items":
|
|
301
|
+
_execute_items_stage(config, project_dir, verbose)
|
|
302
|
+
elif stage == "lists":
|
|
303
|
+
_execute_lists_stage(config, project_dir, verbose)
|
|
304
|
+
elif stage == "deployment":
|
|
305
|
+
_execute_deployment_stage(config, project_dir, verbose)
|
|
306
|
+
elif stage == "training":
|
|
307
|
+
_execute_training_stage(config, project_dir, verbose)
|
|
308
|
+
else:
|
|
309
|
+
raise ValueError(f"Unknown stage: {stage}")
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _execute_resources_stage(
|
|
313
|
+
config: dict[str, JsonValue], project_dir: Path, verbose: bool
|
|
314
|
+
) -> None:
|
|
315
|
+
"""Execute resources stage (lexicon and template creation).
|
|
316
|
+
|
|
317
|
+
This stage typically involves manual creation of lexicons and templates
|
|
318
|
+
or importing from external sources. For now, we validate that the
|
|
319
|
+
required directories exist.
|
|
320
|
+
|
|
321
|
+
Parameters
|
|
322
|
+
----------
|
|
323
|
+
config : dict[str, JsonValue]
|
|
324
|
+
Configuration dictionary.
|
|
325
|
+
project_dir : Path
|
|
326
|
+
Project directory.
|
|
327
|
+
verbose : bool
|
|
328
|
+
Verbose output flag.
|
|
329
|
+
|
|
330
|
+
Raises
|
|
331
|
+
------
|
|
332
|
+
RuntimeError
|
|
333
|
+
If resources directory doesn't exist or is empty.
|
|
334
|
+
"""
|
|
335
|
+
paths = config.get("paths", {})
|
|
336
|
+
lexicons_dir = project_dir / paths.get("lexicons_dir", "lexicons")
|
|
337
|
+
templates_dir = project_dir / paths.get("templates_dir", "templates")
|
|
338
|
+
|
|
339
|
+
# Check that resources exist
|
|
340
|
+
if not lexicons_dir.exists():
|
|
341
|
+
raise RuntimeError(f"Lexicons directory not found: {lexicons_dir}")
|
|
342
|
+
|
|
343
|
+
if not templates_dir.exists():
|
|
344
|
+
raise RuntimeError(f"Templates directory not found: {templates_dir}")
|
|
345
|
+
|
|
346
|
+
# Count files
|
|
347
|
+
lexicon_files = list(lexicons_dir.glob("*.jsonl"))
|
|
348
|
+
template_files = list(templates_dir.glob("*.jsonl"))
|
|
349
|
+
|
|
350
|
+
if not lexicon_files:
|
|
351
|
+
raise RuntimeError(f"No lexicon files found in {lexicons_dir}")
|
|
352
|
+
|
|
353
|
+
if not template_files:
|
|
354
|
+
raise RuntimeError(f"No template files found in {templates_dir}")
|
|
355
|
+
|
|
356
|
+
console.print(
|
|
357
|
+
f"[green]✓[/green] Found {len(lexicon_files)} lexicon(s) "
|
|
358
|
+
f"and {len(template_files)} template(s)"
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _execute_templates_stage(
|
|
363
|
+
config: dict[str, JsonValue], project_dir: Path, verbose: bool
|
|
364
|
+
) -> None:
|
|
365
|
+
"""Execute templates stage (template filling).
|
|
366
|
+
|
|
367
|
+
Parameters
|
|
368
|
+
----------
|
|
369
|
+
config : dict[str, JsonValue]
|
|
370
|
+
Configuration dictionary.
|
|
371
|
+
project_dir : Path
|
|
372
|
+
Project directory.
|
|
373
|
+
verbose : bool
|
|
374
|
+
Verbose output flag.
|
|
375
|
+
|
|
376
|
+
Raises
|
|
377
|
+
------
|
|
378
|
+
RuntimeError
|
|
379
|
+
If template filling fails.
|
|
380
|
+
"""
|
|
381
|
+
paths = config.get("paths", {})
|
|
382
|
+
templates_config = config.get("templates", {})
|
|
383
|
+
|
|
384
|
+
templates_dir = project_dir / paths.get("templates_dir", "templates")
|
|
385
|
+
lexicons_dir = project_dir / paths.get("lexicons_dir", "lexicons")
|
|
386
|
+
output_dir = project_dir / paths.get("filled_templates_dir", "filled_templates")
|
|
387
|
+
output_dir.mkdir(exist_ok=True)
|
|
388
|
+
|
|
389
|
+
# Get template and lexicon files
|
|
390
|
+
template_files = list(templates_dir.glob("*.jsonl"))
|
|
391
|
+
lexicon_files = list(lexicons_dir.glob("*.jsonl"))
|
|
392
|
+
|
|
393
|
+
if not template_files:
|
|
394
|
+
raise RuntimeError(f"No template files found in {templates_dir}")
|
|
395
|
+
if not lexicon_files:
|
|
396
|
+
raise RuntimeError(f"No lexicon files found in {lexicons_dir}")
|
|
397
|
+
|
|
398
|
+
# Build command for each template file
|
|
399
|
+
strategy = templates_config.get("filling_strategy", "exhaustive")
|
|
400
|
+
|
|
401
|
+
for template_file in template_files:
|
|
402
|
+
output_file = output_dir / f"filled_{template_file.name}"
|
|
403
|
+
|
|
404
|
+
cmd = [
|
|
405
|
+
"bead",
|
|
406
|
+
"templates",
|
|
407
|
+
"fill",
|
|
408
|
+
str(template_file),
|
|
409
|
+
*[str(f) for f in lexicon_files],
|
|
410
|
+
str(output_file),
|
|
411
|
+
"--strategy",
|
|
412
|
+
strategy,
|
|
413
|
+
]
|
|
414
|
+
|
|
415
|
+
console.print(f"[cyan]Filling template: {template_file.name}[/cyan]")
|
|
416
|
+
_run_command(cmd, verbose)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _execute_items_stage(
|
|
420
|
+
config: dict[str, JsonValue], project_dir: Path, verbose: bool
|
|
421
|
+
) -> None:
|
|
422
|
+
"""Execute items stage (item construction).
|
|
423
|
+
|
|
424
|
+
Parameters
|
|
425
|
+
----------
|
|
426
|
+
config : dict[str, JsonValue]
|
|
427
|
+
Configuration dictionary.
|
|
428
|
+
project_dir : Path
|
|
429
|
+
Project directory.
|
|
430
|
+
verbose : bool
|
|
431
|
+
Verbose output flag.
|
|
432
|
+
|
|
433
|
+
Raises
|
|
434
|
+
------
|
|
435
|
+
RuntimeError
|
|
436
|
+
If item construction fails.
|
|
437
|
+
"""
|
|
438
|
+
paths = config.get("paths", {})
|
|
439
|
+
items_config = config.get("items", {})
|
|
440
|
+
|
|
441
|
+
filled_dir = project_dir / paths.get("filled_templates_dir", "filled_templates")
|
|
442
|
+
output_dir = project_dir / paths.get("items_dir", "items")
|
|
443
|
+
output_dir.mkdir(exist_ok=True)
|
|
444
|
+
|
|
445
|
+
# Get filled template files
|
|
446
|
+
filled_files = list(filled_dir.glob("*.jsonl"))
|
|
447
|
+
|
|
448
|
+
if not filled_files:
|
|
449
|
+
raise RuntimeError(f"No filled templates found in {filled_dir}")
|
|
450
|
+
|
|
451
|
+
# Build item construction command
|
|
452
|
+
task_type = items_config.get("task_type")
|
|
453
|
+
|
|
454
|
+
if task_type:
|
|
455
|
+
# Use task-type-specific command if specified
|
|
456
|
+
output_file = output_dir / "items.jsonl"
|
|
457
|
+
|
|
458
|
+
cmd = [
|
|
459
|
+
"bead",
|
|
460
|
+
"items",
|
|
461
|
+
"construct",
|
|
462
|
+
*[str(f) for f in filled_files],
|
|
463
|
+
str(output_file),
|
|
464
|
+
"--task-type",
|
|
465
|
+
task_type,
|
|
466
|
+
]
|
|
467
|
+
else:
|
|
468
|
+
# Use generic construct command
|
|
469
|
+
output_file = output_dir / "items.jsonl"
|
|
470
|
+
|
|
471
|
+
cmd = [
|
|
472
|
+
"bead",
|
|
473
|
+
"items",
|
|
474
|
+
"construct",
|
|
475
|
+
*[str(f) for f in filled_files],
|
|
476
|
+
str(output_file),
|
|
477
|
+
]
|
|
478
|
+
|
|
479
|
+
console.print(
|
|
480
|
+
f"[cyan]Constructing items from {len(filled_files)} template(s)[/cyan]"
|
|
481
|
+
)
|
|
482
|
+
_run_command(cmd, verbose)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def _execute_lists_stage(
|
|
486
|
+
config: dict[str, JsonValue], project_dir: Path, verbose: bool
|
|
487
|
+
) -> None:
|
|
488
|
+
"""Execute lists stage (list partitioning).
|
|
489
|
+
|
|
490
|
+
Parameters
|
|
491
|
+
----------
|
|
492
|
+
config : dict[str, JsonValue]
|
|
493
|
+
Configuration dictionary.
|
|
494
|
+
project_dir : Path
|
|
495
|
+
Project directory.
|
|
496
|
+
verbose : bool
|
|
497
|
+
Verbose output flag.
|
|
498
|
+
|
|
499
|
+
Raises
|
|
500
|
+
------
|
|
501
|
+
RuntimeError
|
|
502
|
+
If list partitioning fails.
|
|
503
|
+
"""
|
|
504
|
+
paths = config.get("paths", {})
|
|
505
|
+
lists_config = config.get("lists", {})
|
|
506
|
+
|
|
507
|
+
items_dir = project_dir / paths.get("items_dir", "items")
|
|
508
|
+
output_dir = project_dir / paths.get("lists_dir", "lists")
|
|
509
|
+
output_dir.mkdir(exist_ok=True)
|
|
510
|
+
|
|
511
|
+
# Get item files
|
|
512
|
+
item_files = list(items_dir.glob("*.jsonl"))
|
|
513
|
+
|
|
514
|
+
if not item_files:
|
|
515
|
+
raise RuntimeError(f"No item files found in {items_dir}")
|
|
516
|
+
|
|
517
|
+
# Get first item file (typically there's just one)
|
|
518
|
+
item_file = item_files[0]
|
|
519
|
+
|
|
520
|
+
# Build partitioning command
|
|
521
|
+
n_lists = lists_config.get("n_lists", 10)
|
|
522
|
+
|
|
523
|
+
cmd = [
|
|
524
|
+
"bead",
|
|
525
|
+
"lists",
|
|
526
|
+
"partition",
|
|
527
|
+
str(item_file),
|
|
528
|
+
str(output_dir),
|
|
529
|
+
"--n-lists",
|
|
530
|
+
str(n_lists),
|
|
531
|
+
]
|
|
532
|
+
|
|
533
|
+
# Add constraints if specified
|
|
534
|
+
if "list_constraints" in lists_config:
|
|
535
|
+
constraints_file = project_dir / lists_config["list_constraints"]
|
|
536
|
+
if constraints_file.exists():
|
|
537
|
+
cmd.extend(["--list-constraints", str(constraints_file)])
|
|
538
|
+
|
|
539
|
+
if "batch_constraints" in lists_config:
|
|
540
|
+
constraints_file = project_dir / lists_config["batch_constraints"]
|
|
541
|
+
if constraints_file.exists():
|
|
542
|
+
cmd.extend(["--batch-constraints", str(constraints_file)])
|
|
543
|
+
|
|
544
|
+
console.print(f"[cyan]Partitioning items into {n_lists} lists[/cyan]")
|
|
545
|
+
_run_command(cmd, verbose)
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _execute_deployment_stage(
|
|
549
|
+
config: dict[str, JsonValue], project_dir: Path, verbose: bool
|
|
550
|
+
) -> None:
|
|
551
|
+
"""Execute deployment stage (experiment generation).
|
|
552
|
+
|
|
553
|
+
Parameters
|
|
554
|
+
----------
|
|
555
|
+
config : dict[str, JsonValue]
|
|
556
|
+
Configuration dictionary.
|
|
557
|
+
project_dir : Path
|
|
558
|
+
Project directory.
|
|
559
|
+
verbose : bool
|
|
560
|
+
Verbose output flag.
|
|
561
|
+
|
|
562
|
+
Raises
|
|
563
|
+
------
|
|
564
|
+
RuntimeError
|
|
565
|
+
If deployment generation fails.
|
|
566
|
+
"""
|
|
567
|
+
paths = config.get("paths", {})
|
|
568
|
+
deployment_config = config.get("deployment", {})
|
|
569
|
+
|
|
570
|
+
lists_dir = project_dir / paths.get("lists_dir", "lists")
|
|
571
|
+
items_dir = project_dir / paths.get("items_dir", "items")
|
|
572
|
+
output_dir = project_dir / paths.get("experiments_dir", "experiments")
|
|
573
|
+
output_dir.mkdir(exist_ok=True)
|
|
574
|
+
|
|
575
|
+
# Get list and item files
|
|
576
|
+
item_files = list(items_dir.glob("*.jsonl"))
|
|
577
|
+
|
|
578
|
+
if not item_files:
|
|
579
|
+
raise RuntimeError(f"No item files found in {items_dir}")
|
|
580
|
+
|
|
581
|
+
if not lists_dir.exists():
|
|
582
|
+
raise RuntimeError(f"Lists directory not found: {lists_dir}")
|
|
583
|
+
|
|
584
|
+
item_file = item_files[0]
|
|
585
|
+
|
|
586
|
+
# Build deployment command
|
|
587
|
+
cmd = [
|
|
588
|
+
"bead",
|
|
589
|
+
"deployment",
|
|
590
|
+
"generate",
|
|
591
|
+
str(lists_dir),
|
|
592
|
+
str(item_file),
|
|
593
|
+
str(output_dir),
|
|
594
|
+
]
|
|
595
|
+
|
|
596
|
+
# Add distribution strategy (required)
|
|
597
|
+
dist_strategy = deployment_config.get("distribution_strategy", "balanced")
|
|
598
|
+
cmd.extend(["--distribution-strategy", dist_strategy])
|
|
599
|
+
|
|
600
|
+
# Add experiment type if specified
|
|
601
|
+
if "experiment_type" in deployment_config:
|
|
602
|
+
cmd.extend(["--experiment-type", deployment_config["experiment_type"]])
|
|
603
|
+
|
|
604
|
+
console.print(
|
|
605
|
+
f"[cyan]Generating deployment with {dist_strategy} distribution[/cyan]"
|
|
606
|
+
)
|
|
607
|
+
_run_command(cmd, verbose)
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def _execute_training_stage(
|
|
611
|
+
config: dict[str, JsonValue], project_dir: Path, verbose: bool
|
|
612
|
+
) -> None:
|
|
613
|
+
"""Execute training stage (model training).
|
|
614
|
+
|
|
615
|
+
Parameters
|
|
616
|
+
----------
|
|
617
|
+
config : dict[str, JsonValue]
|
|
618
|
+
Configuration dictionary.
|
|
619
|
+
project_dir : Path
|
|
620
|
+
Project directory.
|
|
621
|
+
verbose : bool
|
|
622
|
+
Verbose output flag.
|
|
623
|
+
|
|
624
|
+
Raises
|
|
625
|
+
------
|
|
626
|
+
RuntimeError
|
|
627
|
+
If training fails.
|
|
628
|
+
"""
|
|
629
|
+
paths = config.get("paths", {})
|
|
630
|
+
_ = config.get("training", {}) # For future use
|
|
631
|
+
|
|
632
|
+
items_dir = project_dir / paths.get("items_dir", "items")
|
|
633
|
+
output_dir = project_dir / paths.get("models_dir", "models")
|
|
634
|
+
output_dir.mkdir(exist_ok=True)
|
|
635
|
+
|
|
636
|
+
# Get item files
|
|
637
|
+
item_files = list(items_dir.glob("*.jsonl"))
|
|
638
|
+
|
|
639
|
+
if not item_files:
|
|
640
|
+
raise RuntimeError(f"No item files found in {items_dir}")
|
|
641
|
+
|
|
642
|
+
item_file = item_files[0]
|
|
643
|
+
|
|
644
|
+
# Training typically requires data collection first
|
|
645
|
+
# For workflow orchestration, we just validate the setup
|
|
646
|
+
console.print(
|
|
647
|
+
"[yellow]⚠[/yellow] Training stage requires data collection. "
|
|
648
|
+
"Skipping automated execution."
|
|
649
|
+
)
|
|
650
|
+
console.print(
|
|
651
|
+
"[cyan]ℹ[/cyan] After data collection, run: "
|
|
652
|
+
f"bead training train-model --items {item_file} --data <data.jsonl>"
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def _run_command(cmd: list[str], verbose: bool) -> None:
|
|
657
|
+
"""Run a subprocess command with error handling.
|
|
658
|
+
|
|
659
|
+
Parameters
|
|
660
|
+
----------
|
|
661
|
+
cmd : list[str]
|
|
662
|
+
Command and arguments to execute.
|
|
663
|
+
verbose : bool
|
|
664
|
+
Whether to show command output in real-time.
|
|
665
|
+
|
|
666
|
+
Raises
|
|
667
|
+
------
|
|
668
|
+
RuntimeError
|
|
669
|
+
If command execution fails.
|
|
670
|
+
"""
|
|
671
|
+
if verbose:
|
|
672
|
+
console.print(f"[dim]Running: {' '.join(cmd)}[/dim]")
|
|
673
|
+
|
|
674
|
+
try:
|
|
675
|
+
if verbose:
|
|
676
|
+
# Show output in real-time
|
|
677
|
+
result = subprocess.run(cmd, check=True, text=True)
|
|
678
|
+
else:
|
|
679
|
+
# Capture output
|
|
680
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
681
|
+
|
|
682
|
+
if result.stdout and verbose:
|
|
683
|
+
console.print(result.stdout)
|
|
684
|
+
|
|
685
|
+
except subprocess.CalledProcessError as e:
|
|
686
|
+
error_msg = f"Command failed: {' '.join(cmd)}"
|
|
687
|
+
if e.stderr:
|
|
688
|
+
error_msg += f"\n{e.stderr}"
|
|
689
|
+
raise RuntimeError(error_msg) from e
|
|
690
|
+
except FileNotFoundError as e:
|
|
691
|
+
raise RuntimeError(f"Command not found: {cmd[0]}. Is bead installed?") from e
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
# ============================================================================
|
|
695
|
+
# Workflow Commands
|
|
696
|
+
# ============================================================================
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
@click.group()
|
|
700
|
+
def workflow() -> None:
|
|
701
|
+
"""Manage end-to-end pipeline workflows.
|
|
702
|
+
|
|
703
|
+
Examples
|
|
704
|
+
--------
|
|
705
|
+
Run complete pipeline:
|
|
706
|
+
$ bead workflow run --config bead.yaml
|
|
707
|
+
|
|
708
|
+
Initialize new project:
|
|
709
|
+
$ bead workflow init acceptability-study
|
|
710
|
+
|
|
711
|
+
Check workflow status:
|
|
712
|
+
$ bead workflow status
|
|
713
|
+
|
|
714
|
+
Resume interrupted workflow:
|
|
715
|
+
$ bead workflow resume
|
|
716
|
+
|
|
717
|
+
Rollback to previous stage:
|
|
718
|
+
$ bead workflow rollback deployment
|
|
719
|
+
"""
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
@workflow.command()
|
|
723
|
+
@click.option(
|
|
724
|
+
"--config",
|
|
725
|
+
"-c",
|
|
726
|
+
"config_path",
|
|
727
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
728
|
+
default="bead.yaml",
|
|
729
|
+
help="Path to configuration file",
|
|
730
|
+
)
|
|
731
|
+
@click.option(
|
|
732
|
+
"--stages",
|
|
733
|
+
type=str,
|
|
734
|
+
default=None,
|
|
735
|
+
help="Comma-separated list of stages to run (default: all)",
|
|
736
|
+
)
|
|
737
|
+
@click.option(
|
|
738
|
+
"--from-stage",
|
|
739
|
+
type=click.Choice(
|
|
740
|
+
["resources", "templates", "items", "lists", "deployment", "training"]
|
|
741
|
+
),
|
|
742
|
+
default=None,
|
|
743
|
+
help="Start from this stage",
|
|
744
|
+
)
|
|
745
|
+
@click.option(
|
|
746
|
+
"--dry-run",
|
|
747
|
+
is_flag=True,
|
|
748
|
+
default=False,
|
|
749
|
+
help="Show what would be executed without running",
|
|
750
|
+
)
|
|
751
|
+
@click.option(
|
|
752
|
+
"--verbose",
|
|
753
|
+
"-v",
|
|
754
|
+
is_flag=True,
|
|
755
|
+
default=False,
|
|
756
|
+
help="Show detailed command output",
|
|
757
|
+
)
|
|
758
|
+
def run(
|
|
759
|
+
config_path: Path,
|
|
760
|
+
stages: str | None,
|
|
761
|
+
from_stage: str | None,
|
|
762
|
+
dry_run: bool,
|
|
763
|
+
verbose: bool,
|
|
764
|
+
) -> None:
|
|
765
|
+
"""Run complete pipeline workflow.
|
|
766
|
+
|
|
767
|
+
Executes all pipeline stages sequentially:
|
|
768
|
+
1. resources - Create lexicons and templates
|
|
769
|
+
2. templates - Fill templates with lexicon items
|
|
770
|
+
3. items - Construct experimental items
|
|
771
|
+
4. lists - Partition items into experiment lists
|
|
772
|
+
5. deployment - Generate jsPsych experiments
|
|
773
|
+
6. training - Train models with active learning (optional)
|
|
774
|
+
|
|
775
|
+
The workflow tracks progress and can be resumed if interrupted.
|
|
776
|
+
|
|
777
|
+
Examples
|
|
778
|
+
--------
|
|
779
|
+
Run all stages:
|
|
780
|
+
$ bead workflow run --config bead.yaml
|
|
781
|
+
|
|
782
|
+
Run specific stages:
|
|
783
|
+
$ bead workflow run --stages resources,templates,items
|
|
784
|
+
|
|
785
|
+
Start from items stage:
|
|
786
|
+
$ bead workflow run --from-stage items
|
|
787
|
+
|
|
788
|
+
Dry run to preview:
|
|
789
|
+
$ bead workflow run --dry-run
|
|
790
|
+
"""
|
|
791
|
+
project_dir = config_path.parent
|
|
792
|
+
console.rule("[bold]Pipeline Workflow Execution[/bold]")
|
|
793
|
+
|
|
794
|
+
# Load configuration
|
|
795
|
+
try:
|
|
796
|
+
with open(config_path) as f:
|
|
797
|
+
config = yaml.safe_load(f)
|
|
798
|
+
except Exception as e:
|
|
799
|
+
print_error(f"Failed to load config: {e}")
|
|
800
|
+
sys.exit(1)
|
|
801
|
+
|
|
802
|
+
# Determine which stages to run
|
|
803
|
+
all_stages = [
|
|
804
|
+
"resources",
|
|
805
|
+
"templates",
|
|
806
|
+
"items",
|
|
807
|
+
"lists",
|
|
808
|
+
"deployment",
|
|
809
|
+
"training",
|
|
810
|
+
]
|
|
811
|
+
|
|
812
|
+
if stages:
|
|
813
|
+
selected_stages = [s.strip() for s in stages.split(",")]
|
|
814
|
+
elif from_stage:
|
|
815
|
+
start_idx = all_stages.index(from_stage)
|
|
816
|
+
selected_stages = all_stages[start_idx:]
|
|
817
|
+
else:
|
|
818
|
+
selected_stages = all_stages
|
|
819
|
+
|
|
820
|
+
# Show plan
|
|
821
|
+
print_info(f"Configuration: {config_path}")
|
|
822
|
+
print_info(f"Stages to run: {', '.join(selected_stages)}")
|
|
823
|
+
|
|
824
|
+
if dry_run:
|
|
825
|
+
console.print("\n[yellow]DRY RUN MODE - No commands will be executed[/yellow]")
|
|
826
|
+
for stage in selected_stages:
|
|
827
|
+
console.print(f" • Would execute: [cyan]{stage}[/cyan] stage")
|
|
828
|
+
return
|
|
829
|
+
|
|
830
|
+
# Execute stages
|
|
831
|
+
_ = load_state(project_dir) # For resume/status compatibility
|
|
832
|
+
failed = False
|
|
833
|
+
|
|
834
|
+
for stage in selected_stages:
|
|
835
|
+
console.rule(f"[bold cyan]Stage: {stage}[/bold cyan]")
|
|
836
|
+
|
|
837
|
+
try:
|
|
838
|
+
update_stage_state(project_dir, stage, "running")
|
|
839
|
+
|
|
840
|
+
# Execute the stage
|
|
841
|
+
_execute_stage(stage, config, project_dir, verbose)
|
|
842
|
+
|
|
843
|
+
update_stage_state(project_dir, stage, "completed")
|
|
844
|
+
print_success(f"{stage} stage completed")
|
|
845
|
+
|
|
846
|
+
except RuntimeError as e:
|
|
847
|
+
update_stage_state(project_dir, stage, "failed", str(e))
|
|
848
|
+
print_error(f"Stage '{stage}' failed: {e}")
|
|
849
|
+
failed = True
|
|
850
|
+
break
|
|
851
|
+
except Exception as e:
|
|
852
|
+
update_stage_state(project_dir, stage, "failed", str(e))
|
|
853
|
+
print_error(f"Stage '{stage}' failed with unexpected error: {e}")
|
|
854
|
+
failed = True
|
|
855
|
+
break
|
|
856
|
+
|
|
857
|
+
if not failed:
|
|
858
|
+
console.rule("[bold green]Pipeline Complete[/bold green]")
|
|
859
|
+
print_success("All stages completed successfully")
|
|
860
|
+
else:
|
|
861
|
+
console.rule("[bold red]Pipeline Failed[/bold red]")
|
|
862
|
+
print_error("Pipeline execution failed. Use 'bead workflow resume' to continue")
|
|
863
|
+
sys.exit(1)
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
@workflow.command()
|
|
867
|
+
@click.argument("template", type=click.Choice(list(WORKFLOW_TEMPLATES.keys())))
|
|
868
|
+
@click.option(
|
|
869
|
+
"--output-dir",
|
|
870
|
+
"-o",
|
|
871
|
+
type=click.Path(path_type=Path),
|
|
872
|
+
default=None,
|
|
873
|
+
help="Output directory (default: current directory)",
|
|
874
|
+
)
|
|
875
|
+
@click.option(
|
|
876
|
+
"--force",
|
|
877
|
+
is_flag=True,
|
|
878
|
+
default=False,
|
|
879
|
+
help="Overwrite existing files",
|
|
880
|
+
)
|
|
881
|
+
def init(template: str, output_dir: Path | None, force: bool) -> None:
|
|
882
|
+
"""Initialize new project from template.
|
|
883
|
+
|
|
884
|
+
Creates a complete project structure with configuration file,
|
|
885
|
+
directory layout, and example files.
|
|
886
|
+
|
|
887
|
+
Available templates:
|
|
888
|
+
- acceptability-study: Acceptability judgment experiments
|
|
889
|
+
- forced-choice: 2AFC or 3AFC comparison judgments
|
|
890
|
+
- ordinal-scale: Likert scale or slider ratings
|
|
891
|
+
- active-learning: Human-in-the-loop training
|
|
892
|
+
|
|
893
|
+
Examples
|
|
894
|
+
--------
|
|
895
|
+
Initialize acceptability study:
|
|
896
|
+
$ bead workflow init acceptability-study
|
|
897
|
+
|
|
898
|
+
Initialize in specific directory:
|
|
899
|
+
$ bead workflow init forced-choice --output-dir my-project
|
|
900
|
+
|
|
901
|
+
Overwrite existing files:
|
|
902
|
+
$ bead workflow init ordinal-scale --force
|
|
903
|
+
"""
|
|
904
|
+
if output_dir is None:
|
|
905
|
+
output_dir = Path.cwd()
|
|
906
|
+
|
|
907
|
+
template_spec = WORKFLOW_TEMPLATES[template]
|
|
908
|
+
console.rule(f"[bold]Initialize: {template_spec['name']}[/bold]")
|
|
909
|
+
|
|
910
|
+
# Create directory structure
|
|
911
|
+
config_data = template_spec["config"]
|
|
912
|
+
paths_value = config_data.get("paths", {})
|
|
913
|
+
paths: dict[str, JsonValue] = paths_value if isinstance(paths_value, dict) else {}
|
|
914
|
+
|
|
915
|
+
dirs_to_create: list[str] = [
|
|
916
|
+
str(paths.get("lexicons_dir", "lexicons")),
|
|
917
|
+
str(paths.get("templates_dir", "templates")),
|
|
918
|
+
str(paths.get("items_dir", "items")),
|
|
919
|
+
str(paths.get("lists_dir", "lists")),
|
|
920
|
+
str(paths.get("experiments_dir", "experiments")),
|
|
921
|
+
]
|
|
922
|
+
|
|
923
|
+
if "models_dir" in paths:
|
|
924
|
+
dirs_to_create.append(str(paths["models_dir"]))
|
|
925
|
+
|
|
926
|
+
for dir_name in dirs_to_create:
|
|
927
|
+
dir_path = output_dir / dir_name
|
|
928
|
+
if dir_path.exists() and not force:
|
|
929
|
+
console.print(f"[yellow]⚠[/yellow] Directory exists: {dir_name}")
|
|
930
|
+
else:
|
|
931
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
|
932
|
+
console.print(f"[green]✓[/green] Created: {dir_name}/")
|
|
933
|
+
|
|
934
|
+
# Create configuration file
|
|
935
|
+
config_file = output_dir / "bead.yaml"
|
|
936
|
+
if config_file.exists() and not force:
|
|
937
|
+
print_error(f"Configuration file exists: {config_file}")
|
|
938
|
+
print_info("Use --force to overwrite")
|
|
939
|
+
sys.exit(1)
|
|
940
|
+
|
|
941
|
+
with open(config_file, "w") as f:
|
|
942
|
+
yaml.dump(config_data, f, default_flow_style=False, sort_keys=False)
|
|
943
|
+
print_success(f"Created configuration: {config_file}")
|
|
944
|
+
|
|
945
|
+
# Create .gitignore
|
|
946
|
+
gitignore_file = output_dir / ".gitignore"
|
|
947
|
+
gitignore_content = """# bead workflow ignores
|
|
948
|
+
.bead/
|
|
949
|
+
.cache/
|
|
950
|
+
*.pyc
|
|
951
|
+
__pycache__/
|
|
952
|
+
*.jsonl
|
|
953
|
+
experiments/
|
|
954
|
+
models/
|
|
955
|
+
"""
|
|
956
|
+
with open(gitignore_file, "w") as f:
|
|
957
|
+
f.write(gitignore_content)
|
|
958
|
+
print_success("Created .gitignore")
|
|
959
|
+
|
|
960
|
+
console.print("\n[bold green]✓ Project initialized[/bold green]")
|
|
961
|
+
console.print("\n[bold]Next steps:[/bold]")
|
|
962
|
+
console.print(" 1. Edit bead.yaml to configure your experiment")
|
|
963
|
+
console.print(" 2. Create lexicon files in lexicons/")
|
|
964
|
+
console.print(" 3. Create template files in templates/")
|
|
965
|
+
console.print(" 4. Run: bead workflow run")
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
@workflow.command()
|
|
969
|
+
@click.option(
|
|
970
|
+
"--config",
|
|
971
|
+
"-c",
|
|
972
|
+
"config_path",
|
|
973
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
974
|
+
default="bead.yaml",
|
|
975
|
+
help="Path to configuration file",
|
|
976
|
+
)
|
|
977
|
+
def status(config_path: Path) -> None:
|
|
978
|
+
"""Show current workflow status.
|
|
979
|
+
|
|
980
|
+
Displays completion status for each pipeline stage by checking
|
|
981
|
+
both the workflow state file and the filesystem.
|
|
982
|
+
|
|
983
|
+
Examples
|
|
984
|
+
--------
|
|
985
|
+
Show workflow status:
|
|
986
|
+
$ bead workflow status
|
|
987
|
+
|
|
988
|
+
Use custom config:
|
|
989
|
+
$ bead workflow status --config my-config.yaml
|
|
990
|
+
"""
|
|
991
|
+
project_dir = config_path.parent
|
|
992
|
+
console.rule("[bold]Workflow Status[/bold]")
|
|
993
|
+
|
|
994
|
+
# Load state
|
|
995
|
+
state = load_state(project_dir)
|
|
996
|
+
stages_state: dict[str, JsonValue] = state.get("stages", {}) # type: ignore
|
|
997
|
+
|
|
998
|
+
# Check filesystem
|
|
999
|
+
all_stages = [
|
|
1000
|
+
"resources",
|
|
1001
|
+
"templates",
|
|
1002
|
+
"items",
|
|
1003
|
+
"lists",
|
|
1004
|
+
"deployment",
|
|
1005
|
+
"training",
|
|
1006
|
+
]
|
|
1007
|
+
|
|
1008
|
+
table = Table(title="Pipeline Stage Status")
|
|
1009
|
+
table.add_column("Stage", style="cyan")
|
|
1010
|
+
table.add_column("Status", style="bold")
|
|
1011
|
+
table.add_column("Last Updated")
|
|
1012
|
+
|
|
1013
|
+
for stage in all_stages:
|
|
1014
|
+
stage_info: dict[str, JsonValue] = stages_state.get(stage, {})
|
|
1015
|
+
|
|
1016
|
+
# Check filesystem
|
|
1017
|
+
fs_complete = detect_stage_completion(project_dir, stage)
|
|
1018
|
+
|
|
1019
|
+
# Determine status
|
|
1020
|
+
if stage_info.get("status") == "completed":
|
|
1021
|
+
status = "[green]✓ Completed[/green]"
|
|
1022
|
+
timestamp = str(stage_info.get("timestamp", "Unknown"))
|
|
1023
|
+
elif stage_info.get("status") == "failed":
|
|
1024
|
+
status = "[red]✗ Failed[/red]"
|
|
1025
|
+
timestamp = str(stage_info.get("timestamp", "Unknown"))
|
|
1026
|
+
elif fs_complete:
|
|
1027
|
+
status = "[yellow]⚠ Detected[/yellow]"
|
|
1028
|
+
timestamp = "Filesystem check"
|
|
1029
|
+
else:
|
|
1030
|
+
status = "[dim]○ Pending[/dim]"
|
|
1031
|
+
timestamp = "-"
|
|
1032
|
+
|
|
1033
|
+
table.add_row(stage, status, timestamp)
|
|
1034
|
+
|
|
1035
|
+
console.print(table)
|
|
1036
|
+
|
|
1037
|
+
if state.get("last_run"):
|
|
1038
|
+
console.print(f"\n[dim]Last run: {state.get('last_run')}[/dim]")
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
@workflow.command()
|
|
1042
|
+
@click.option(
|
|
1043
|
+
"--config",
|
|
1044
|
+
"-c",
|
|
1045
|
+
"config_path",
|
|
1046
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
1047
|
+
default="bead.yaml",
|
|
1048
|
+
help="Path to configuration file",
|
|
1049
|
+
)
|
|
1050
|
+
def resume(config_path: Path) -> None:
|
|
1051
|
+
"""Resume interrupted workflow.
|
|
1052
|
+
|
|
1053
|
+
Reads the workflow state file and continues execution from the
|
|
1054
|
+
last incomplete stage, skipping stages that have already completed.
|
|
1055
|
+
|
|
1056
|
+
Examples
|
|
1057
|
+
--------
|
|
1058
|
+
Resume workflow:
|
|
1059
|
+
$ bead workflow resume
|
|
1060
|
+
|
|
1061
|
+
Resume with custom config:
|
|
1062
|
+
$ bead workflow resume --config my-config.yaml
|
|
1063
|
+
"""
|
|
1064
|
+
project_dir = config_path.parent
|
|
1065
|
+
console.rule("[bold]Resume Workflow[/bold]")
|
|
1066
|
+
|
|
1067
|
+
# Load state
|
|
1068
|
+
state = load_state(project_dir)
|
|
1069
|
+
stages_value = state.get("stages", {})
|
|
1070
|
+
|
|
1071
|
+
# Validate stages is a dict
|
|
1072
|
+
if not isinstance(stages_value, dict):
|
|
1073
|
+
print_error("Invalid workflow state. Use 'bead workflow run' to start.")
|
|
1074
|
+
sys.exit(1)
|
|
1075
|
+
|
|
1076
|
+
stages_state: dict[str, JsonValue] = stages_value
|
|
1077
|
+
|
|
1078
|
+
if not stages_state:
|
|
1079
|
+
print_error("No workflow state found. Use 'bead workflow run' to start.")
|
|
1080
|
+
sys.exit(1)
|
|
1081
|
+
|
|
1082
|
+
# Find last completed stage
|
|
1083
|
+
all_stages = [
|
|
1084
|
+
"resources",
|
|
1085
|
+
"templates",
|
|
1086
|
+
"items",
|
|
1087
|
+
"lists",
|
|
1088
|
+
"deployment",
|
|
1089
|
+
"training",
|
|
1090
|
+
]
|
|
1091
|
+
|
|
1092
|
+
last_completed_idx = -1
|
|
1093
|
+
for i, stage in enumerate(all_stages):
|
|
1094
|
+
stage_info: dict[str, JsonValue] = stages_state.get(stage, {}) # type: ignore[assignment]
|
|
1095
|
+
if stage_info.get("status") == "completed":
|
|
1096
|
+
last_completed_idx = i
|
|
1097
|
+
|
|
1098
|
+
if last_completed_idx == len(all_stages) - 1:
|
|
1099
|
+
print_success("All stages completed. Nothing to resume.")
|
|
1100
|
+
return
|
|
1101
|
+
|
|
1102
|
+
# Resume from next stage
|
|
1103
|
+
resume_from = all_stages[last_completed_idx + 1]
|
|
1104
|
+
console.print(f"[cyan]Resuming from stage: {resume_from}[/cyan]")
|
|
1105
|
+
console.print(f"[dim]Last completed: {all_stages[last_completed_idx]}[/dim]\n")
|
|
1106
|
+
|
|
1107
|
+
# Invoke run command with from-stage
|
|
1108
|
+
ctx = click.get_current_context()
|
|
1109
|
+
ctx.invoke(
|
|
1110
|
+
run,
|
|
1111
|
+
config_path=config_path,
|
|
1112
|
+
stages=None,
|
|
1113
|
+
from_stage=resume_from,
|
|
1114
|
+
dry_run=False,
|
|
1115
|
+
verbose=False,
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1118
|
+
|
|
1119
|
+
@workflow.command()
|
|
1120
|
+
@click.argument(
|
|
1121
|
+
"stage",
|
|
1122
|
+
type=click.Choice(
|
|
1123
|
+
["resources", "templates", "items", "lists", "deployment", "training"]
|
|
1124
|
+
),
|
|
1125
|
+
)
|
|
1126
|
+
@click.option(
|
|
1127
|
+
"--config",
|
|
1128
|
+
"-c",
|
|
1129
|
+
"config_path",
|
|
1130
|
+
type=click.Path(exists=True, dir_okay=False, path_type=Path),
|
|
1131
|
+
default="bead.yaml",
|
|
1132
|
+
help="Path to configuration file",
|
|
1133
|
+
)
|
|
1134
|
+
@click.option(
|
|
1135
|
+
"--force",
|
|
1136
|
+
is_flag=True,
|
|
1137
|
+
default=False,
|
|
1138
|
+
help="Skip confirmation prompt",
|
|
1139
|
+
)
|
|
1140
|
+
@click.option(
|
|
1141
|
+
"--dry-run",
|
|
1142
|
+
is_flag=True,
|
|
1143
|
+
default=False,
|
|
1144
|
+
help="Show what would be deleted without deleting",
|
|
1145
|
+
)
|
|
1146
|
+
def rollback(stage: str, config_path: Path, force: bool, dry_run: bool) -> None:
|
|
1147
|
+
"""Rollback to previous stage.
|
|
1148
|
+
|
|
1149
|
+
Deletes outputs from the specified stage and all subsequent stages,
|
|
1150
|
+
allowing you to re-run from that point. Also updates the workflow
|
|
1151
|
+
state file.
|
|
1152
|
+
|
|
1153
|
+
Examples
|
|
1154
|
+
--------
|
|
1155
|
+
Rollback to items stage:
|
|
1156
|
+
$ bead workflow rollback items
|
|
1157
|
+
|
|
1158
|
+
Dry run to preview:
|
|
1159
|
+
$ bead workflow rollback deployment --dry-run
|
|
1160
|
+
|
|
1161
|
+
Skip confirmation:
|
|
1162
|
+
$ bead workflow rollback templates --force
|
|
1163
|
+
"""
|
|
1164
|
+
project_dir = config_path.parent
|
|
1165
|
+
console.rule("[bold yellow]Rollback Workflow[/bold yellow]")
|
|
1166
|
+
|
|
1167
|
+
# Determine stages to delete
|
|
1168
|
+
all_stages = [
|
|
1169
|
+
"resources",
|
|
1170
|
+
"templates",
|
|
1171
|
+
"items",
|
|
1172
|
+
"lists",
|
|
1173
|
+
"deployment",
|
|
1174
|
+
"training",
|
|
1175
|
+
]
|
|
1176
|
+
|
|
1177
|
+
stage_idx = all_stages.index(stage)
|
|
1178
|
+
stages_to_delete = all_stages[stage_idx:]
|
|
1179
|
+
|
|
1180
|
+
# Map stages to directories
|
|
1181
|
+
stage_dirs: dict[str, list[str]] = {
|
|
1182
|
+
"resources": ["lexicons", "templates"],
|
|
1183
|
+
"templates": ["filled_templates"],
|
|
1184
|
+
"items": ["items"],
|
|
1185
|
+
"lists": ["lists"],
|
|
1186
|
+
"deployment": ["experiments"],
|
|
1187
|
+
"training": ["models"],
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
dirs_to_delete: list[str] = []
|
|
1191
|
+
for s in stages_to_delete:
|
|
1192
|
+
dirs_to_delete.extend(stage_dirs.get(s, []))
|
|
1193
|
+
|
|
1194
|
+
# Show what will be deleted
|
|
1195
|
+
console.print("[yellow]Will rollback stages:[/yellow]")
|
|
1196
|
+
for s in stages_to_delete:
|
|
1197
|
+
console.print(f" • {s}")
|
|
1198
|
+
|
|
1199
|
+
console.print("\n[yellow]Will delete directories:[/yellow]")
|
|
1200
|
+
for dir_name in dirs_to_delete:
|
|
1201
|
+
dir_path = project_dir / dir_name
|
|
1202
|
+
if dir_path.exists():
|
|
1203
|
+
file_count = sum(1 for _ in dir_path.rglob("*") if _.is_file())
|
|
1204
|
+
console.print(f" • {dir_name}/ ({file_count} files)")
|
|
1205
|
+
|
|
1206
|
+
if dry_run:
|
|
1207
|
+
console.print("\n[cyan]DRY RUN MODE - No files will be deleted[/cyan]")
|
|
1208
|
+
return
|
|
1209
|
+
|
|
1210
|
+
# Confirm deletion
|
|
1211
|
+
if not force:
|
|
1212
|
+
console.print()
|
|
1213
|
+
if not click.confirm(
|
|
1214
|
+
"Are you sure you want to delete these files?", default=False
|
|
1215
|
+
):
|
|
1216
|
+
print_info("Rollback cancelled")
|
|
1217
|
+
return
|
|
1218
|
+
|
|
1219
|
+
# Delete directories
|
|
1220
|
+
console.print()
|
|
1221
|
+
for dir_name in dirs_to_delete:
|
|
1222
|
+
dir_path = project_dir / dir_name
|
|
1223
|
+
if dir_path.exists():
|
|
1224
|
+
try:
|
|
1225
|
+
shutil.rmtree(dir_path)
|
|
1226
|
+
console.print(f"[green]✓[/green] Deleted: {dir_name}/")
|
|
1227
|
+
except Exception as e:
|
|
1228
|
+
print_error(f"Failed to delete {dir_name}/: {e}")
|
|
1229
|
+
|
|
1230
|
+
# Update state
|
|
1231
|
+
state = load_state(project_dir)
|
|
1232
|
+
stages_state: dict[str, JsonValue] = state.get("stages", {}) # type: ignore
|
|
1233
|
+
for s in stages_to_delete:
|
|
1234
|
+
if s in stages_state:
|
|
1235
|
+
del stages_state[s]
|
|
1236
|
+
save_state(project_dir, state)
|
|
1237
|
+
|
|
1238
|
+
console.print(f"\n[green]✓[/green] Rolled back to stage: [cyan]{stage}[/cyan]")
|
|
1239
|
+
print_info("Run 'bead workflow run --from-stage " + stage + "' to continue")
|
|
1240
|
+
|
|
1241
|
+
|
|
1242
|
+
@workflow.command(name="list-templates")
|
|
1243
|
+
def list_templates() -> None:
|
|
1244
|
+
"""List available workflow templates.
|
|
1245
|
+
|
|
1246
|
+
Shows all predefined workflow templates with descriptions and
|
|
1247
|
+
configuration requirements.
|
|
1248
|
+
|
|
1249
|
+
Examples
|
|
1250
|
+
--------
|
|
1251
|
+
List templates:
|
|
1252
|
+
$ bead workflow list-templates
|
|
1253
|
+
"""
|
|
1254
|
+
console.rule("[bold]Available Workflow Templates[/bold]")
|
|
1255
|
+
|
|
1256
|
+
table = Table(title="Workflow Templates")
|
|
1257
|
+
table.add_column("Template ID", style="cyan")
|
|
1258
|
+
table.add_column("Name", style="bold")
|
|
1259
|
+
table.add_column("Description")
|
|
1260
|
+
|
|
1261
|
+
for template_id, template_spec in WORKFLOW_TEMPLATES.items():
|
|
1262
|
+
table.add_row(
|
|
1263
|
+
template_id,
|
|
1264
|
+
str(template_spec["name"]),
|
|
1265
|
+
str(template_spec["description"]),
|
|
1266
|
+
)
|
|
1267
|
+
|
|
1268
|
+
console.print(table)
|
|
1269
|
+
|
|
1270
|
+
console.print("\n[bold]Usage:[/bold]")
|
|
1271
|
+
console.print(" bead workflow init <template-id>")
|
|
1272
|
+
console.print("\n[bold]Example:[/bold]")
|
|
1273
|
+
console.print(" bead workflow init acceptability-study")
|