bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/lists.py
ADDED
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
"""List partitioning commands for bead CLI.
|
|
2
|
+
|
|
3
|
+
This module provides commands for partitioning items into experiment lists
|
|
4
|
+
(Stage 4 of the bead pipeline).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import click
|
|
13
|
+
from pydantic import ValidationError
|
|
14
|
+
from rich.console import Console
|
|
15
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
16
|
+
from rich.table import Table
|
|
17
|
+
|
|
18
|
+
from bead.cli.utils import print_error, print_info, print_success
|
|
19
|
+
from bead.items.item import Item
|
|
20
|
+
from bead.lists import ExperimentList
|
|
21
|
+
from bead.lists.constraints import BatchConstraint, ListConstraint
|
|
22
|
+
from bead.lists.partitioner import ListPartitioner
|
|
23
|
+
|
|
24
|
+
console = Console()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@click.group()
|
|
28
|
+
def lists() -> None:
|
|
29
|
+
r"""List construction commands (Stage 4).
|
|
30
|
+
|
|
31
|
+
Commands for partitioning items into experiment lists.
|
|
32
|
+
|
|
33
|
+
\b
|
|
34
|
+
Examples:
|
|
35
|
+
$ bead lists partition items.jsonl lists/ --n-lists 5 --strategy balanced
|
|
36
|
+
$ bead lists list lists/
|
|
37
|
+
$ bead lists validate lists/list_0.jsonl
|
|
38
|
+
$ bead lists show-stats lists/
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@click.command()
|
|
43
|
+
@click.argument(
|
|
44
|
+
"items_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
|
|
45
|
+
)
|
|
46
|
+
@click.argument("output_file", type=click.Path(dir_okay=False, path_type=Path))
|
|
47
|
+
@click.option(
|
|
48
|
+
"--strategy",
|
|
49
|
+
type=click.Choice(["balanced", "random", "stratified"]),
|
|
50
|
+
default="balanced",
|
|
51
|
+
help="Partitioning strategy",
|
|
52
|
+
)
|
|
53
|
+
@click.option(
|
|
54
|
+
"--n-lists",
|
|
55
|
+
type=int,
|
|
56
|
+
required=True,
|
|
57
|
+
help="Number of lists to create",
|
|
58
|
+
)
|
|
59
|
+
@click.option(
|
|
60
|
+
"--list-constraints",
|
|
61
|
+
"list_constraint_files",
|
|
62
|
+
type=click.Path(exists=True, path_type=Path),
|
|
63
|
+
multiple=True,
|
|
64
|
+
help="List constraint files (JSONL, can specify multiple)",
|
|
65
|
+
)
|
|
66
|
+
@click.option(
|
|
67
|
+
"--batch-constraints",
|
|
68
|
+
"batch_constraint_files",
|
|
69
|
+
type=click.Path(exists=True, path_type=Path),
|
|
70
|
+
multiple=True,
|
|
71
|
+
help="Batch constraint files (JSONL, can specify multiple)",
|
|
72
|
+
)
|
|
73
|
+
@click.option(
|
|
74
|
+
"--max-iterations",
|
|
75
|
+
type=int,
|
|
76
|
+
default=1000,
|
|
77
|
+
help="Maximum iterations for batch constraint satisfaction (default: 1000)",
|
|
78
|
+
)
|
|
79
|
+
@click.option(
|
|
80
|
+
"--random-seed",
|
|
81
|
+
type=int,
|
|
82
|
+
help="Random seed for reproducibility",
|
|
83
|
+
)
|
|
84
|
+
@click.option(
|
|
85
|
+
"--dry-run",
|
|
86
|
+
is_flag=True,
|
|
87
|
+
help="Show what would be done without writing files",
|
|
88
|
+
)
|
|
89
|
+
@click.pass_context
|
|
90
|
+
def partition(
|
|
91
|
+
ctx: click.Context,
|
|
92
|
+
items_file: Path,
|
|
93
|
+
output_file: Path,
|
|
94
|
+
strategy: str,
|
|
95
|
+
n_lists: int,
|
|
96
|
+
list_constraint_files: tuple[Path, ...],
|
|
97
|
+
batch_constraint_files: tuple[Path, ...],
|
|
98
|
+
max_iterations: int,
|
|
99
|
+
random_seed: int | None,
|
|
100
|
+
dry_run: bool,
|
|
101
|
+
) -> None:
|
|
102
|
+
r"""Partition items into experiment lists.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
ctx : click.Context
|
|
107
|
+
Click context object.
|
|
108
|
+
items_file : Path
|
|
109
|
+
Path to items JSONL file.
|
|
110
|
+
output_file : Path
|
|
111
|
+
Output JSONL file for experiment lists (one list per line).
|
|
112
|
+
strategy : str
|
|
113
|
+
Partitioning strategy.
|
|
114
|
+
n_lists : int
|
|
115
|
+
Number of lists to create.
|
|
116
|
+
list_constraint_files : tuple[Path, ...]
|
|
117
|
+
List constraint files (JSONL).
|
|
118
|
+
batch_constraint_files : tuple[Path, ...]
|
|
119
|
+
Batch constraint files (JSONL).
|
|
120
|
+
max_iterations : int
|
|
121
|
+
Maximum iterations for batch constraint satisfaction.
|
|
122
|
+
random_seed : int | None
|
|
123
|
+
Random seed for reproducibility.
|
|
124
|
+
dry_run : bool
|
|
125
|
+
Show what would be done without writing files.
|
|
126
|
+
|
|
127
|
+
Examples
|
|
128
|
+
--------
|
|
129
|
+
# Balanced partitioning
|
|
130
|
+
$ bead lists partition items.jsonl lists.jsonl --n-lists 5 --strategy balanced
|
|
131
|
+
|
|
132
|
+
# With list constraints
|
|
133
|
+
$ bead lists partition items.jsonl lists.jsonl --n-lists 5 \\
|
|
134
|
+
--list-constraints constraints/unique.jsonl
|
|
135
|
+
|
|
136
|
+
# With batch constraints
|
|
137
|
+
$ bead lists partition items.jsonl lists.jsonl --n-lists 5 \\
|
|
138
|
+
--batch-constraints constraints/coverage.jsonl
|
|
139
|
+
|
|
140
|
+
# With both constraint types
|
|
141
|
+
$ bead lists partition items.jsonl lists.jsonl --n-lists 5 \\
|
|
142
|
+
--list-constraints constraints/unique.jsonl constraints/balance.jsonl \\
|
|
143
|
+
--batch-constraints constraints/coverage.jsonl \\
|
|
144
|
+
--max-iterations 10000
|
|
145
|
+
|
|
146
|
+
# Dry run to preview
|
|
147
|
+
$ bead lists partition items.jsonl lists.jsonl \\
|
|
148
|
+
--n-lists 5 --strategy balanced --dry-run
|
|
149
|
+
"""
|
|
150
|
+
try:
|
|
151
|
+
if n_lists < 1:
|
|
152
|
+
print_error("--n-lists must be >= 1")
|
|
153
|
+
ctx.exit(1)
|
|
154
|
+
|
|
155
|
+
# Load items
|
|
156
|
+
print_info(f"Loading items from {items_file}")
|
|
157
|
+
items: list[Item] = []
|
|
158
|
+
with open(items_file, encoding="utf-8") as f:
|
|
159
|
+
for line in f:
|
|
160
|
+
line = line.strip()
|
|
161
|
+
if not line:
|
|
162
|
+
continue
|
|
163
|
+
item_data = json.loads(line)
|
|
164
|
+
item = Item(**item_data)
|
|
165
|
+
items.append(item)
|
|
166
|
+
|
|
167
|
+
if len(items) == 0:
|
|
168
|
+
print_error("No items found in file")
|
|
169
|
+
ctx.exit(1)
|
|
170
|
+
|
|
171
|
+
print_info(f"Loaded {len(items)} items")
|
|
172
|
+
|
|
173
|
+
# Extract item UUIDs and create metadata dict with all item data
|
|
174
|
+
item_uuids = [item.id for item in items]
|
|
175
|
+
metadata = {}
|
|
176
|
+
for item in items:
|
|
177
|
+
item_meta = {
|
|
178
|
+
**item.item_metadata,
|
|
179
|
+
"template_id": str(item.item_template_id),
|
|
180
|
+
}
|
|
181
|
+
# Add task_type if it exists (optional field for backwards compatibility)
|
|
182
|
+
if hasattr(item, "task_type") and item.task_type is not None:
|
|
183
|
+
item_meta["task_type"] = item.task_type
|
|
184
|
+
metadata[item.id] = item_meta
|
|
185
|
+
|
|
186
|
+
# Load list constraints if provided
|
|
187
|
+
list_constraints = []
|
|
188
|
+
if list_constraint_files:
|
|
189
|
+
print_info(f"Loading {len(list_constraint_files)} list constraint file(s)")
|
|
190
|
+
for constraint_file in list_constraint_files:
|
|
191
|
+
with open(constraint_file, encoding="utf-8") as f:
|
|
192
|
+
for line in f:
|
|
193
|
+
line = line.strip()
|
|
194
|
+
if not line:
|
|
195
|
+
continue
|
|
196
|
+
constraint_data = json.loads(line)
|
|
197
|
+
constraint = ListConstraint(**constraint_data)
|
|
198
|
+
list_constraints.append(constraint)
|
|
199
|
+
print_info(f"Loaded {len(list_constraints)} list constraint(s)")
|
|
200
|
+
|
|
201
|
+
# Load batch constraints if provided
|
|
202
|
+
batch_constraints = []
|
|
203
|
+
if batch_constraint_files:
|
|
204
|
+
print_info(
|
|
205
|
+
f"Loading {len(batch_constraint_files)} batch constraint file(s)"
|
|
206
|
+
)
|
|
207
|
+
for constraint_file in batch_constraint_files:
|
|
208
|
+
with open(constraint_file, encoding="utf-8") as f:
|
|
209
|
+
for line in f:
|
|
210
|
+
line = line.strip()
|
|
211
|
+
if not line:
|
|
212
|
+
continue
|
|
213
|
+
constraint_data = json.loads(line)
|
|
214
|
+
constraint = BatchConstraint(**constraint_data)
|
|
215
|
+
batch_constraints.append(constraint)
|
|
216
|
+
print_info(f"Loaded {len(batch_constraints)} batch constraint(s)")
|
|
217
|
+
|
|
218
|
+
# Create partitioner
|
|
219
|
+
partitioner = ListPartitioner(random_seed=random_seed)
|
|
220
|
+
|
|
221
|
+
# Partition items (choose method based on constraints)
|
|
222
|
+
with Progress(
|
|
223
|
+
SpinnerColumn(),
|
|
224
|
+
TextColumn("[progress.description]{task.description}"),
|
|
225
|
+
console=console,
|
|
226
|
+
) as progress:
|
|
227
|
+
progress.add_task(
|
|
228
|
+
f"Partitioning {len(items)} items into {n_lists} lists...", total=None
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
if batch_constraints:
|
|
232
|
+
# Use batch-constrained partitioning
|
|
233
|
+
experiment_lists = partitioner.partition_with_batch_constraints(
|
|
234
|
+
items=item_uuids,
|
|
235
|
+
n_lists=n_lists,
|
|
236
|
+
list_constraints=list_constraints if list_constraints else None,
|
|
237
|
+
batch_constraints=batch_constraints,
|
|
238
|
+
strategy=strategy,
|
|
239
|
+
metadata=metadata,
|
|
240
|
+
max_iterations=max_iterations,
|
|
241
|
+
)
|
|
242
|
+
else:
|
|
243
|
+
# Use standard partitioning (with optional list constraints)
|
|
244
|
+
experiment_lists = partitioner.partition(
|
|
245
|
+
items=item_uuids,
|
|
246
|
+
n_lists=n_lists,
|
|
247
|
+
constraints=list_constraints if list_constraints else None,
|
|
248
|
+
strategy=strategy,
|
|
249
|
+
metadata=metadata,
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# Save lists (or show dry-run preview)
|
|
253
|
+
if dry_run:
|
|
254
|
+
print_info(f"[DRY RUN] Would write {len(experiment_lists)} lists to:")
|
|
255
|
+
console.print(f" [dim]{output_file}[/dim]")
|
|
256
|
+
for exp_list in experiment_lists:
|
|
257
|
+
console.print(
|
|
258
|
+
f" list_{exp_list.list_number}: {len(exp_list.item_refs)} items"
|
|
259
|
+
)
|
|
260
|
+
print_info(
|
|
261
|
+
f"[DRY RUN] Total: {len(experiment_lists)} lists, {len(items)} items"
|
|
262
|
+
)
|
|
263
|
+
else:
|
|
264
|
+
# Ensure parent directory exists
|
|
265
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
266
|
+
# Write all lists to single JSONL file (one list per line)
|
|
267
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
268
|
+
for exp_list in experiment_lists:
|
|
269
|
+
f.write(exp_list.model_dump_json() + "\n")
|
|
270
|
+
|
|
271
|
+
print_success(
|
|
272
|
+
f"Created {len(experiment_lists)} lists "
|
|
273
|
+
f"with {len(items)} items: {output_file}"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Show distribution
|
|
277
|
+
console.print("\n[cyan]Distribution:[/cyan]")
|
|
278
|
+
for exp_list in experiment_lists:
|
|
279
|
+
console.print(
|
|
280
|
+
f" list_{exp_list.list_number}: {len(exp_list.item_refs)} items"
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
except ValidationError as e:
|
|
284
|
+
print_error(f"Validation error: {e}")
|
|
285
|
+
ctx.exit(1)
|
|
286
|
+
except Exception as e:
|
|
287
|
+
print_error(f"Failed to partition items: {e}")
|
|
288
|
+
ctx.exit(1)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
@click.command(name="list")
|
|
292
|
+
@click.argument(
|
|
293
|
+
"lists_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
|
|
294
|
+
)
|
|
295
|
+
@click.pass_context
|
|
296
|
+
def list_lists(
|
|
297
|
+
ctx: click.Context,
|
|
298
|
+
lists_file: Path,
|
|
299
|
+
) -> None:
|
|
300
|
+
"""List experiment lists in a JSONL file.
|
|
301
|
+
|
|
302
|
+
Parameters
|
|
303
|
+
----------
|
|
304
|
+
ctx : click.Context
|
|
305
|
+
Click context object.
|
|
306
|
+
lists_file : Path
|
|
307
|
+
JSONL file containing experiment lists (one list per line).
|
|
308
|
+
|
|
309
|
+
Examples
|
|
310
|
+
--------
|
|
311
|
+
$ bead lists list lists.jsonl
|
|
312
|
+
"""
|
|
313
|
+
try:
|
|
314
|
+
table = Table(title=f"Experiment Lists in {lists_file}")
|
|
315
|
+
table.add_column("List #", justify="right", style="yellow")
|
|
316
|
+
table.add_column("Name", style="cyan")
|
|
317
|
+
table.add_column("Items", justify="right", style="green")
|
|
318
|
+
|
|
319
|
+
with open(lists_file, encoding="utf-8") as f:
|
|
320
|
+
for line in f:
|
|
321
|
+
line = line.strip()
|
|
322
|
+
if not line:
|
|
323
|
+
continue
|
|
324
|
+
try:
|
|
325
|
+
list_data = json.loads(line)
|
|
326
|
+
exp_list = ExperimentList(**list_data)
|
|
327
|
+
table.add_row(
|
|
328
|
+
str(exp_list.list_number),
|
|
329
|
+
exp_list.name,
|
|
330
|
+
str(len(exp_list.item_refs)),
|
|
331
|
+
)
|
|
332
|
+
except Exception:
|
|
333
|
+
continue
|
|
334
|
+
|
|
335
|
+
console.print(table)
|
|
336
|
+
|
|
337
|
+
except Exception as e:
|
|
338
|
+
print_error(f"Failed to list experiment lists: {e}")
|
|
339
|
+
ctx.exit(1)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
@click.command()
|
|
343
|
+
@click.argument("list_file", type=click.Path(exists=True, path_type=Path))
|
|
344
|
+
@click.pass_context
|
|
345
|
+
def validate(ctx: click.Context, list_file: Path) -> None:
|
|
346
|
+
"""Validate an experiment list file.
|
|
347
|
+
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
ctx : click.Context
|
|
351
|
+
Click context object.
|
|
352
|
+
list_file : Path
|
|
353
|
+
Path to experiment list file.
|
|
354
|
+
|
|
355
|
+
Examples
|
|
356
|
+
--------
|
|
357
|
+
$ bead lists validate list_0.jsonl
|
|
358
|
+
"""
|
|
359
|
+
try:
|
|
360
|
+
print_info(f"Validating experiment list: {list_file}")
|
|
361
|
+
|
|
362
|
+
with open(list_file, encoding="utf-8") as f:
|
|
363
|
+
first_line = f.readline().strip()
|
|
364
|
+
if not first_line:
|
|
365
|
+
print_error("File is empty")
|
|
366
|
+
ctx.exit(1)
|
|
367
|
+
|
|
368
|
+
list_data = json.loads(first_line)
|
|
369
|
+
exp_list = ExperimentList(**list_data)
|
|
370
|
+
|
|
371
|
+
print_success(
|
|
372
|
+
f"Experiment list is valid: {exp_list.name} "
|
|
373
|
+
f"({len(exp_list.item_refs)} items)"
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
except json.JSONDecodeError as e:
|
|
377
|
+
print_error(f"Invalid JSON: {e}")
|
|
378
|
+
ctx.exit(1)
|
|
379
|
+
except ValidationError as e:
|
|
380
|
+
print_error(f"Validation error: {e}")
|
|
381
|
+
ctx.exit(1)
|
|
382
|
+
except Exception as e:
|
|
383
|
+
print_error(f"Failed to validate experiment list: {e}")
|
|
384
|
+
ctx.exit(1)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
@click.command()
|
|
388
|
+
@click.argument(
|
|
389
|
+
"lists_file", type=click.Path(exists=True, dir_okay=False, path_type=Path)
|
|
390
|
+
)
|
|
391
|
+
@click.pass_context
|
|
392
|
+
def show_stats(ctx: click.Context, lists_file: Path) -> None:
|
|
393
|
+
"""Show statistics about experiment lists in a JSONL file.
|
|
394
|
+
|
|
395
|
+
Parameters
|
|
396
|
+
----------
|
|
397
|
+
ctx : click.Context
|
|
398
|
+
Click context object.
|
|
399
|
+
lists_file : Path
|
|
400
|
+
JSONL file containing experiment lists (one list per line).
|
|
401
|
+
|
|
402
|
+
Examples
|
|
403
|
+
--------
|
|
404
|
+
$ bead lists show-stats lists.jsonl
|
|
405
|
+
"""
|
|
406
|
+
try:
|
|
407
|
+
print_info(f"Analyzing experiment lists in: {lists_file}")
|
|
408
|
+
|
|
409
|
+
lists_data: list[ExperimentList] = []
|
|
410
|
+
with open(lists_file, encoding="utf-8") as f:
|
|
411
|
+
for line in f:
|
|
412
|
+
line = line.strip()
|
|
413
|
+
if not line:
|
|
414
|
+
continue
|
|
415
|
+
try:
|
|
416
|
+
list_data = json.loads(line)
|
|
417
|
+
exp_list = ExperimentList(**list_data)
|
|
418
|
+
lists_data.append(exp_list)
|
|
419
|
+
except Exception:
|
|
420
|
+
continue
|
|
421
|
+
|
|
422
|
+
if not lists_data:
|
|
423
|
+
print_error("No valid experiment lists found")
|
|
424
|
+
ctx.exit(1)
|
|
425
|
+
|
|
426
|
+
# Calculate statistics
|
|
427
|
+
total_lists = len(lists_data)
|
|
428
|
+
item_counts = [len(exp_list.item_refs) for exp_list in lists_data]
|
|
429
|
+
total_items = sum(item_counts)
|
|
430
|
+
avg_items = total_items / total_lists if total_lists > 0 else 0
|
|
431
|
+
min_items = min(item_counts) if item_counts else 0
|
|
432
|
+
max_items = max(item_counts) if item_counts else 0
|
|
433
|
+
|
|
434
|
+
# Display statistics
|
|
435
|
+
table = Table(title="Experiment List Statistics")
|
|
436
|
+
table.add_column("Metric", style="cyan")
|
|
437
|
+
table.add_column("Value", style="green", justify="right")
|
|
438
|
+
|
|
439
|
+
table.add_row("Total Lists", str(total_lists))
|
|
440
|
+
table.add_row("Total Items", str(total_items))
|
|
441
|
+
table.add_row("", "") # Separator
|
|
442
|
+
table.add_row("Avg Items per List", f"{avg_items:.1f}")
|
|
443
|
+
table.add_row("Min Items per List", str(min_items))
|
|
444
|
+
table.add_row("Max Items per List", str(max_items))
|
|
445
|
+
|
|
446
|
+
console.print(table)
|
|
447
|
+
|
|
448
|
+
# Show per-list breakdown
|
|
449
|
+
console.print("\n[cyan]Per-List Breakdown:[/cyan]")
|
|
450
|
+
for exp_list in sorted(lists_data, key=lambda x: x.list_number):
|
|
451
|
+
console.print(f" {exp_list.name}: {len(exp_list.item_refs)} items")
|
|
452
|
+
|
|
453
|
+
except Exception as e:
|
|
454
|
+
print_error(f"Failed to show statistics: {e}")
|
|
455
|
+
ctx.exit(1)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
# Import constraint builder commands
|
|
459
|
+
from bead.cli.list_constraints import ( # noqa: E402
|
|
460
|
+
create_balance,
|
|
461
|
+
create_batch_balance,
|
|
462
|
+
create_batch_coverage,
|
|
463
|
+
create_batch_diversity,
|
|
464
|
+
create_batch_min_occurrence,
|
|
465
|
+
create_diversity,
|
|
466
|
+
create_grouped_quantile,
|
|
467
|
+
create_quantile,
|
|
468
|
+
create_size,
|
|
469
|
+
create_uniqueness,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
# Register core commands
|
|
473
|
+
lists.add_command(partition)
|
|
474
|
+
lists.add_command(list_lists)
|
|
475
|
+
lists.add_command(validate)
|
|
476
|
+
lists.add_command(show_stats)
|
|
477
|
+
|
|
478
|
+
# Register list constraint commands
|
|
479
|
+
lists.add_command(create_uniqueness, name="create-uniqueness")
|
|
480
|
+
lists.add_command(create_balance, name="create-balance")
|
|
481
|
+
lists.add_command(create_quantile, name="create-quantile")
|
|
482
|
+
lists.add_command(create_grouped_quantile, name="create-grouped-quantile")
|
|
483
|
+
lists.add_command(create_diversity, name="create-diversity")
|
|
484
|
+
lists.add_command(create_size, name="create-size")
|
|
485
|
+
|
|
486
|
+
# Register batch constraint commands
|
|
487
|
+
lists.add_command(create_batch_coverage, name="create-batch-coverage")
|
|
488
|
+
lists.add_command(create_batch_balance, name="create-batch-balance")
|
|
489
|
+
lists.add_command(create_batch_diversity, name="create-batch-diversity")
|
|
490
|
+
lists.add_command(create_batch_min_occurrence, name="create-batch-min-occurrence")
|