bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""Experiment list data model for organizing experimental items.
|
|
2
|
+
|
|
3
|
+
This module provides the ExperimentList model for organizing experimental items
|
|
4
|
+
into lists for presentation to participants. Lists use stand-off annotation with
|
|
5
|
+
UUID references to items rather than embedding full item objects.
|
|
6
|
+
|
|
7
|
+
The model supports:
|
|
8
|
+
- Item assignment tracking via UUIDs
|
|
9
|
+
- Presentation order specification
|
|
10
|
+
- Constraint satisfaction tracking
|
|
11
|
+
- Balance metrics computation
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import random
|
|
17
|
+
from typing import TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from bead.items.item_template import MetadataValue
|
|
21
|
+
else:
|
|
22
|
+
# Recursive type for metadata values
|
|
23
|
+
type MetadataValue = (
|
|
24
|
+
str | int | float | bool | None | dict[str, MetadataValue] | list[MetadataValue]
|
|
25
|
+
)
|
|
26
|
+
from uuid import UUID
|
|
27
|
+
|
|
28
|
+
from pydantic import Field, field_validator, model_validator
|
|
29
|
+
|
|
30
|
+
from bead.data.base import BeadBaseModel
|
|
31
|
+
from bead.lists.constraints import ListConstraint
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Factory functions for default values
|
|
35
|
+
def _empty_uuid_list() -> list[UUID]:
|
|
36
|
+
"""Return empty UUID list."""
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _empty_constraint_list() -> list[ListConstraint]:
|
|
41
|
+
"""Return empty ListConstraint list."""
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _empty_uuid_bool_dict() -> dict[UUID, bool]:
|
|
46
|
+
"""Return empty UUID-to-bool dict."""
|
|
47
|
+
return {}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _empty_metadata_dict() -> dict[str, MetadataValue]:
|
|
51
|
+
"""Return empty metadata dictionary."""
|
|
52
|
+
return {}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ExperimentList(BeadBaseModel):
|
|
56
|
+
"""A list of experimental items for participant presentation.
|
|
57
|
+
|
|
58
|
+
Uses stand-off annotation - stores only item UUIDs, not full items.
|
|
59
|
+
Items can be looked up by UUID from an ItemCollection or Repository.
|
|
60
|
+
|
|
61
|
+
Attributes
|
|
62
|
+
----------
|
|
63
|
+
name : str
|
|
64
|
+
Name of this list (e.g., "list_0", "practice_list").
|
|
65
|
+
list_number : int
|
|
66
|
+
Numeric identifier for this list (must be >= 0).
|
|
67
|
+
item_refs : list[UUID]
|
|
68
|
+
UUIDs of items in this list (stand-off annotation).
|
|
69
|
+
list_constraints : list[ListConstraint]
|
|
70
|
+
Constraints this list must satisfy.
|
|
71
|
+
constraint_satisfaction : dict[UUID, bool]
|
|
72
|
+
Map of constraint UUIDs to satisfaction status.
|
|
73
|
+
presentation_order : list[UUID] | None
|
|
74
|
+
Explicit presentation order (if None, use item_refs order).
|
|
75
|
+
Must contain exactly the same UUIDs as item_refs.
|
|
76
|
+
list_metadata : dict[str, Any]
|
|
77
|
+
Metadata for this list.
|
|
78
|
+
balance_metrics : dict[str, Any]
|
|
79
|
+
Metrics about list balance (e.g., distribution statistics).
|
|
80
|
+
|
|
81
|
+
Examples
|
|
82
|
+
--------
|
|
83
|
+
>>> from uuid import uuid4
|
|
84
|
+
>>> exp_list = ExperimentList(
|
|
85
|
+
... name="list_0",
|
|
86
|
+
... list_number=0
|
|
87
|
+
... )
|
|
88
|
+
>>> item_id = uuid4()
|
|
89
|
+
>>> exp_list.add_item(item_id)
|
|
90
|
+
>>> len(exp_list.item_refs)
|
|
91
|
+
1
|
|
92
|
+
>>> exp_list.shuffle_order(seed=42)
|
|
93
|
+
>>> exp_list.get_presentation_order()[0] == item_id
|
|
94
|
+
True
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
name: str = Field(..., description="List name")
|
|
98
|
+
list_number: int = Field(..., ge=0, description="Numeric list identifier")
|
|
99
|
+
item_refs: list[UUID] = Field(
|
|
100
|
+
default_factory=_empty_uuid_list, description="Item UUIDs (stand-off)"
|
|
101
|
+
)
|
|
102
|
+
list_constraints: list[ListConstraint] = Field(
|
|
103
|
+
default_factory=_empty_constraint_list, description="List constraints"
|
|
104
|
+
)
|
|
105
|
+
constraint_satisfaction: dict[UUID, bool] = Field(
|
|
106
|
+
default_factory=_empty_uuid_bool_dict,
|
|
107
|
+
description="Constraint satisfaction status",
|
|
108
|
+
)
|
|
109
|
+
presentation_order: list[UUID] | None = Field(
|
|
110
|
+
default=None, description="Explicit presentation order"
|
|
111
|
+
)
|
|
112
|
+
list_metadata: dict[str, MetadataValue] = Field(
|
|
113
|
+
default_factory=_empty_metadata_dict, description="List metadata"
|
|
114
|
+
)
|
|
115
|
+
balance_metrics: dict[str, MetadataValue] = Field(
|
|
116
|
+
default_factory=_empty_metadata_dict, description="Balance metrics"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
@field_validator("name")
|
|
120
|
+
@classmethod
|
|
121
|
+
def validate_name(cls, v: str) -> str:
|
|
122
|
+
"""Validate name is non-empty.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
v : str
|
|
127
|
+
Name to validate.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
str
|
|
132
|
+
Validated name (whitespace stripped).
|
|
133
|
+
|
|
134
|
+
Raises
|
|
135
|
+
------
|
|
136
|
+
ValueError
|
|
137
|
+
If name is empty or contains only whitespace.
|
|
138
|
+
"""
|
|
139
|
+
if not v or not v.strip():
|
|
140
|
+
raise ValueError("name must be non-empty")
|
|
141
|
+
return v.strip()
|
|
142
|
+
|
|
143
|
+
@model_validator(mode="after")
|
|
144
|
+
def validate_presentation_order(self) -> ExperimentList:
|
|
145
|
+
"""Validate presentation_order matches item_refs.
|
|
146
|
+
|
|
147
|
+
If presentation_order is set, it must contain exactly the same UUIDs
|
|
148
|
+
as item_refs (no more, no less, no duplicates).
|
|
149
|
+
|
|
150
|
+
Returns
|
|
151
|
+
-------
|
|
152
|
+
ExperimentList
|
|
153
|
+
Validated list.
|
|
154
|
+
|
|
155
|
+
Raises
|
|
156
|
+
------
|
|
157
|
+
ValueError
|
|
158
|
+
If presentation_order doesn't match item_refs.
|
|
159
|
+
"""
|
|
160
|
+
if self.presentation_order is None:
|
|
161
|
+
return self
|
|
162
|
+
|
|
163
|
+
# Check for duplicates in presentation_order
|
|
164
|
+
if len(self.presentation_order) != len(set(self.presentation_order)):
|
|
165
|
+
raise ValueError("presentation_order contains duplicate UUIDs")
|
|
166
|
+
|
|
167
|
+
# Check that sets match
|
|
168
|
+
item_set = set(self.item_refs)
|
|
169
|
+
order_set = set(self.presentation_order)
|
|
170
|
+
|
|
171
|
+
if order_set != item_set:
|
|
172
|
+
extra = order_set - item_set
|
|
173
|
+
missing = item_set - order_set
|
|
174
|
+
|
|
175
|
+
error_parts: list[str] = []
|
|
176
|
+
if extra:
|
|
177
|
+
error_parts.append(f"extra UUIDs: {extra}")
|
|
178
|
+
if missing:
|
|
179
|
+
error_parts.append(f"missing UUIDs: {missing}")
|
|
180
|
+
|
|
181
|
+
raise ValueError(
|
|
182
|
+
f"presentation_order must contain exactly same UUIDs "
|
|
183
|
+
f"as item_refs ({', '.join(error_parts)})"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
return self
|
|
187
|
+
|
|
188
|
+
def add_item(self, item_id: UUID) -> None:
|
|
189
|
+
"""Add an item to this list.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
item_id : UUID
|
|
194
|
+
UUID of item to add.
|
|
195
|
+
|
|
196
|
+
Examples
|
|
197
|
+
--------
|
|
198
|
+
>>> from uuid import uuid4
|
|
199
|
+
>>> exp_list = ExperimentList(name="test", list_number=0)
|
|
200
|
+
>>> item_id = uuid4()
|
|
201
|
+
>>> exp_list.add_item(item_id)
|
|
202
|
+
>>> item_id in exp_list.item_refs
|
|
203
|
+
True
|
|
204
|
+
"""
|
|
205
|
+
self.item_refs.append(item_id)
|
|
206
|
+
self.update_modified_time()
|
|
207
|
+
|
|
208
|
+
def remove_item(self, item_id: UUID) -> None:
|
|
209
|
+
"""Remove an item from this list.
|
|
210
|
+
|
|
211
|
+
Parameters
|
|
212
|
+
----------
|
|
213
|
+
item_id : UUID
|
|
214
|
+
UUID of item to remove.
|
|
215
|
+
|
|
216
|
+
Raises
|
|
217
|
+
------
|
|
218
|
+
ValueError
|
|
219
|
+
If item_id is not in the list.
|
|
220
|
+
|
|
221
|
+
Examples
|
|
222
|
+
--------
|
|
223
|
+
>>> from uuid import uuid4
|
|
224
|
+
>>> exp_list = ExperimentList(name="test", list_number=0)
|
|
225
|
+
>>> item_id = uuid4()
|
|
226
|
+
>>> exp_list.add_item(item_id)
|
|
227
|
+
>>> exp_list.remove_item(item_id)
|
|
228
|
+
>>> item_id in exp_list.item_refs
|
|
229
|
+
False
|
|
230
|
+
"""
|
|
231
|
+
if item_id not in self.item_refs:
|
|
232
|
+
raise ValueError(f"Item {item_id} not found in list")
|
|
233
|
+
self.item_refs.remove(item_id)
|
|
234
|
+
|
|
235
|
+
# Also remove from presentation_order if present
|
|
236
|
+
if self.presentation_order is not None and item_id in self.presentation_order:
|
|
237
|
+
self.presentation_order.remove(item_id)
|
|
238
|
+
|
|
239
|
+
self.update_modified_time()
|
|
240
|
+
|
|
241
|
+
def shuffle_order(self, seed: int | None = None) -> None:
|
|
242
|
+
"""Shuffle presentation order.
|
|
243
|
+
|
|
244
|
+
Creates a randomized presentation order from item_refs.
|
|
245
|
+
Uses random.Random(seed) for reproducible shuffling.
|
|
246
|
+
|
|
247
|
+
Parameters
|
|
248
|
+
----------
|
|
249
|
+
seed : int | None
|
|
250
|
+
Random seed for reproducibility.
|
|
251
|
+
|
|
252
|
+
Examples
|
|
253
|
+
--------
|
|
254
|
+
>>> from uuid import uuid4
|
|
255
|
+
>>> exp_list = ExperimentList(name="test", list_number=0)
|
|
256
|
+
>>> exp_list.add_item(uuid4())
|
|
257
|
+
>>> exp_list.add_item(uuid4())
|
|
258
|
+
>>> exp_list.shuffle_order(seed=42)
|
|
259
|
+
>>> exp_list.presentation_order is not None
|
|
260
|
+
True
|
|
261
|
+
"""
|
|
262
|
+
rng = random.Random(seed)
|
|
263
|
+
self.presentation_order = self.item_refs.copy()
|
|
264
|
+
rng.shuffle(self.presentation_order)
|
|
265
|
+
self.update_modified_time()
|
|
266
|
+
|
|
267
|
+
def get_presentation_order(self) -> list[UUID]:
|
|
268
|
+
"""Get the presentation order.
|
|
269
|
+
|
|
270
|
+
Returns presentation_order if set, otherwise returns item_refs.
|
|
271
|
+
|
|
272
|
+
Returns
|
|
273
|
+
-------
|
|
274
|
+
list[UUID]
|
|
275
|
+
UUIDs in presentation order.
|
|
276
|
+
|
|
277
|
+
Examples
|
|
278
|
+
--------
|
|
279
|
+
>>> from uuid import uuid4
|
|
280
|
+
>>> exp_list = ExperimentList(name="test", list_number=0)
|
|
281
|
+
>>> item_id = uuid4()
|
|
282
|
+
>>> exp_list.add_item(item_id)
|
|
283
|
+
>>> exp_list.get_presentation_order()[0] == item_id
|
|
284
|
+
True
|
|
285
|
+
"""
|
|
286
|
+
return self.presentation_order if self.presentation_order else self.item_refs
|
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
"""List collection data model for managing multiple experimental lists.
|
|
2
|
+
|
|
3
|
+
This module provides the ListCollection model for managing multiple ExperimentList
|
|
4
|
+
instances along with metadata about the partitioning process that created them.
|
|
5
|
+
|
|
6
|
+
The model supports:
|
|
7
|
+
- Multiple experimental lists
|
|
8
|
+
- Partitioning metadata tracking
|
|
9
|
+
- Coverage validation (ensuring all items are assigned exactly once)
|
|
10
|
+
- List lookup by number
|
|
11
|
+
- JSONL serialization (one list per line)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
19
|
+
from uuid import UUID
|
|
20
|
+
|
|
21
|
+
from pydantic import Field, field_validator
|
|
22
|
+
|
|
23
|
+
from bead.data.base import BeadBaseModel
|
|
24
|
+
from bead.lists.experiment_list import ExperimentList
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from bead.items.item_template import MetadataValue
|
|
28
|
+
else:
|
|
29
|
+
# Recursive type for metadata values
|
|
30
|
+
type MetadataValue = (
|
|
31
|
+
str | int | float | bool | None | dict[str, MetadataValue] | list[MetadataValue]
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CoverageValidationResult(TypedDict):
|
|
36
|
+
"""Result of coverage validation."""
|
|
37
|
+
|
|
38
|
+
valid: bool
|
|
39
|
+
missing_items: list[UUID]
|
|
40
|
+
duplicate_items: list[UUID]
|
|
41
|
+
total_assigned: int
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# Factory functions for default values
|
|
45
|
+
def _empty_experiment_list_list() -> list[ExperimentList]:
|
|
46
|
+
"""Return empty ExperimentList list."""
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _empty_metadata_dict() -> dict[str, MetadataValue]:
|
|
51
|
+
"""Return empty metadata dictionary."""
|
|
52
|
+
return {}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ListCollection(BeadBaseModel):
|
|
56
|
+
"""A collection of experimental lists.
|
|
57
|
+
|
|
58
|
+
Contains multiple ExperimentList instances along with metadata about
|
|
59
|
+
the partitioning process that created them.
|
|
60
|
+
|
|
61
|
+
Attributes
|
|
62
|
+
----------
|
|
63
|
+
name : str
|
|
64
|
+
Name of this collection.
|
|
65
|
+
source_items_id : UUID
|
|
66
|
+
UUID of source ItemCollection.
|
|
67
|
+
lists : list[ExperimentList]
|
|
68
|
+
The experimental lists.
|
|
69
|
+
partitioning_strategy : str
|
|
70
|
+
Strategy used for partitioning (e.g., "balanced", "random", "stratified").
|
|
71
|
+
partitioning_config : dict[str, Any]
|
|
72
|
+
Configuration for partitioning.
|
|
73
|
+
partitioning_stats : dict[str, Any]
|
|
74
|
+
Statistics about the partitioning process.
|
|
75
|
+
|
|
76
|
+
Examples
|
|
77
|
+
--------
|
|
78
|
+
>>> from uuid import uuid4
|
|
79
|
+
>>> collection = ListCollection(
|
|
80
|
+
... name="my_lists",
|
|
81
|
+
... source_items_id=uuid4(),
|
|
82
|
+
... partitioning_strategy="balanced"
|
|
83
|
+
... )
|
|
84
|
+
>>> exp_list = ExperimentList(name="list_0", list_number=0)
|
|
85
|
+
>>> collection.add_list(exp_list)
|
|
86
|
+
>>> len(collection.lists)
|
|
87
|
+
1
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
name: str = Field(..., description="Collection name")
|
|
91
|
+
source_items_id: UUID = Field(..., description="Source ItemCollection UUID")
|
|
92
|
+
lists: list[ExperimentList] = Field(
|
|
93
|
+
default_factory=_empty_experiment_list_list, description="Experimental lists"
|
|
94
|
+
)
|
|
95
|
+
partitioning_strategy: str = Field(..., description="Partitioning strategy used")
|
|
96
|
+
partitioning_config: dict[str, MetadataValue] = Field(
|
|
97
|
+
default_factory=_empty_metadata_dict, description="Partitioning configuration"
|
|
98
|
+
)
|
|
99
|
+
partitioning_stats: dict[str, MetadataValue] = Field(
|
|
100
|
+
default_factory=_empty_metadata_dict, description="Partitioning statistics"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
@field_validator("name", "partitioning_strategy")
|
|
104
|
+
@classmethod
|
|
105
|
+
def validate_non_empty_string(cls, v: str) -> str:
|
|
106
|
+
"""Validate string fields are non-empty.
|
|
107
|
+
|
|
108
|
+
Parameters
|
|
109
|
+
----------
|
|
110
|
+
v : str
|
|
111
|
+
String to validate.
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
str
|
|
116
|
+
Validated string (whitespace stripped).
|
|
117
|
+
|
|
118
|
+
Raises
|
|
119
|
+
------
|
|
120
|
+
ValueError
|
|
121
|
+
If string is empty or contains only whitespace.
|
|
122
|
+
"""
|
|
123
|
+
if not v or not v.strip():
|
|
124
|
+
raise ValueError("Field must be non-empty")
|
|
125
|
+
return v.strip()
|
|
126
|
+
|
|
127
|
+
@field_validator("lists")
|
|
128
|
+
@classmethod
|
|
129
|
+
def validate_unique_list_numbers(
|
|
130
|
+
cls, v: list[ExperimentList]
|
|
131
|
+
) -> list[ExperimentList]:
|
|
132
|
+
"""Validate all list_numbers are unique.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
v : list[ExperimentList]
|
|
137
|
+
Lists to validate.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
list[ExperimentList]
|
|
142
|
+
Validated lists.
|
|
143
|
+
|
|
144
|
+
Raises
|
|
145
|
+
------
|
|
146
|
+
ValueError
|
|
147
|
+
If duplicate list_numbers found.
|
|
148
|
+
"""
|
|
149
|
+
if not v:
|
|
150
|
+
return v
|
|
151
|
+
|
|
152
|
+
list_numbers = [exp_list.list_number for exp_list in v]
|
|
153
|
+
if len(list_numbers) != len(set(list_numbers)):
|
|
154
|
+
duplicates = [num for num in list_numbers if list_numbers.count(num) > 1]
|
|
155
|
+
raise ValueError(f"Duplicate list_numbers found: {set(duplicates)}")
|
|
156
|
+
|
|
157
|
+
return v
|
|
158
|
+
|
|
159
|
+
def add_list(self, exp_list: ExperimentList) -> None:
|
|
160
|
+
"""Add a list to the collection.
|
|
161
|
+
|
|
162
|
+
Parameters
|
|
163
|
+
----------
|
|
164
|
+
exp_list : ExperimentList
|
|
165
|
+
List to add.
|
|
166
|
+
|
|
167
|
+
Examples
|
|
168
|
+
--------
|
|
169
|
+
>>> from uuid import uuid4
|
|
170
|
+
>>> collection = ListCollection(
|
|
171
|
+
... name="test",
|
|
172
|
+
... source_items_id=uuid4(),
|
|
173
|
+
... partitioning_strategy="balanced"
|
|
174
|
+
... )
|
|
175
|
+
>>> exp_list = ExperimentList(name="list_0", list_number=0)
|
|
176
|
+
>>> collection.add_list(exp_list)
|
|
177
|
+
>>> len(collection.lists)
|
|
178
|
+
1
|
|
179
|
+
"""
|
|
180
|
+
self.lists.append(exp_list)
|
|
181
|
+
self.update_modified_time()
|
|
182
|
+
|
|
183
|
+
def get_list_by_number(self, list_number: int) -> ExperimentList | None:
|
|
184
|
+
"""Get a list by its number.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
list_number : int
|
|
189
|
+
List number to search for.
|
|
190
|
+
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
ExperimentList | None
|
|
194
|
+
List with matching number, or None if not found.
|
|
195
|
+
|
|
196
|
+
Examples
|
|
197
|
+
--------
|
|
198
|
+
>>> from uuid import uuid4
|
|
199
|
+
>>> collection = ListCollection(
|
|
200
|
+
... name="test",
|
|
201
|
+
... source_items_id=uuid4(),
|
|
202
|
+
... partitioning_strategy="balanced"
|
|
203
|
+
... )
|
|
204
|
+
>>> exp_list = ExperimentList(name="list_0", list_number=0)
|
|
205
|
+
>>> collection.add_list(exp_list)
|
|
206
|
+
>>> found = collection.get_list_by_number(0)
|
|
207
|
+
>>> found is not None
|
|
208
|
+
True
|
|
209
|
+
"""
|
|
210
|
+
for exp_list in self.lists:
|
|
211
|
+
if exp_list.list_number == list_number:
|
|
212
|
+
return exp_list
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
def get_all_item_refs(self) -> list[UUID]:
|
|
216
|
+
"""Return all unique item UUIDs across all lists.
|
|
217
|
+
|
|
218
|
+
Returns
|
|
219
|
+
-------
|
|
220
|
+
list[UUID]
|
|
221
|
+
All unique item UUIDs.
|
|
222
|
+
|
|
223
|
+
Examples
|
|
224
|
+
--------
|
|
225
|
+
>>> from uuid import uuid4
|
|
226
|
+
>>> collection = ListCollection(
|
|
227
|
+
... name="test",
|
|
228
|
+
... source_items_id=uuid4(),
|
|
229
|
+
... partitioning_strategy="balanced"
|
|
230
|
+
... )
|
|
231
|
+
>>> exp_list = ExperimentList(name="list_0", list_number=0)
|
|
232
|
+
>>> item_id = uuid4()
|
|
233
|
+
>>> exp_list.add_item(item_id)
|
|
234
|
+
>>> collection.add_list(exp_list)
|
|
235
|
+
>>> item_id in collection.get_all_item_refs()
|
|
236
|
+
True
|
|
237
|
+
"""
|
|
238
|
+
all_refs: set[UUID] = set()
|
|
239
|
+
for exp_list in self.lists:
|
|
240
|
+
all_refs.update(exp_list.item_refs)
|
|
241
|
+
return list(all_refs)
|
|
242
|
+
|
|
243
|
+
def validate_coverage(self, all_item_ids: set[UUID]) -> CoverageValidationResult:
|
|
244
|
+
"""Check that all items are assigned exactly once.
|
|
245
|
+
|
|
246
|
+
Validates that:
|
|
247
|
+
- All items in all_item_ids are assigned to at least one list
|
|
248
|
+
- No item appears in multiple lists (items assigned exactly once)
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
all_item_ids : set[UUID]
|
|
253
|
+
Set of all item UUIDs that should be assigned.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
CoverageValidationResult
|
|
258
|
+
Validation report with keys:
|
|
259
|
+
- "valid": bool - Whether validation passed
|
|
260
|
+
- "missing_items": list[UUID] - Items not assigned to any list
|
|
261
|
+
- "duplicate_items": list[UUID] - Items assigned to multiple lists
|
|
262
|
+
- "total_assigned": int - Total assignments across all lists
|
|
263
|
+
|
|
264
|
+
Examples
|
|
265
|
+
--------
|
|
266
|
+
>>> from uuid import uuid4
|
|
267
|
+
>>> collection = ListCollection(
|
|
268
|
+
... name="test",
|
|
269
|
+
... source_items_id=uuid4(),
|
|
270
|
+
... partitioning_strategy="balanced"
|
|
271
|
+
... )
|
|
272
|
+
>>> item_id = uuid4()
|
|
273
|
+
>>> exp_list = ExperimentList(name="list_0", list_number=0)
|
|
274
|
+
>>> exp_list.add_item(item_id)
|
|
275
|
+
>>> collection.add_list(exp_list)
|
|
276
|
+
>>> result = collection.validate_coverage({item_id})
|
|
277
|
+
>>> result["valid"]
|
|
278
|
+
True
|
|
279
|
+
"""
|
|
280
|
+
# Count assignments for each item
|
|
281
|
+
item_counts: dict[UUID, int] = {}
|
|
282
|
+
for exp_list in self.lists:
|
|
283
|
+
for item_id in exp_list.item_refs:
|
|
284
|
+
item_counts[item_id] = item_counts.get(item_id, 0) + 1
|
|
285
|
+
|
|
286
|
+
# Find missing items (in all_item_ids but not assigned)
|
|
287
|
+
assigned_items = set(item_counts.keys())
|
|
288
|
+
missing_items = list(all_item_ids - assigned_items)
|
|
289
|
+
|
|
290
|
+
# Find duplicate items (assigned more than once)
|
|
291
|
+
duplicate_items = [
|
|
292
|
+
item_id for item_id, count in item_counts.items() if count > 1
|
|
293
|
+
]
|
|
294
|
+
|
|
295
|
+
# Validation passes if no missing and no duplicates
|
|
296
|
+
valid = len(missing_items) == 0 and len(duplicate_items) == 0
|
|
297
|
+
|
|
298
|
+
return {
|
|
299
|
+
"valid": valid,
|
|
300
|
+
"missing_items": missing_items,
|
|
301
|
+
"duplicate_items": duplicate_items,
|
|
302
|
+
"total_assigned": sum(item_counts.values()),
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
def to_jsonl(self, path: Path | str) -> None:
|
|
306
|
+
"""Write lists to a JSONL file (one list per line).
|
|
307
|
+
|
|
308
|
+
Parameters
|
|
309
|
+
----------
|
|
310
|
+
path : Path | str
|
|
311
|
+
Path to output JSONL file.
|
|
312
|
+
|
|
313
|
+
Examples
|
|
314
|
+
--------
|
|
315
|
+
>>> from uuid import uuid4
|
|
316
|
+
>>> collection = ListCollection(
|
|
317
|
+
... name="test",
|
|
318
|
+
... source_items_id=uuid4(),
|
|
319
|
+
... partitioning_strategy="balanced"
|
|
320
|
+
... )
|
|
321
|
+
>>> exp_list = ExperimentList(name="list_0", list_number=0)
|
|
322
|
+
>>> collection.add_list(exp_list)
|
|
323
|
+
>>> collection.to_jsonl("lists.jsonl") # doctest: +SKIP
|
|
324
|
+
"""
|
|
325
|
+
path = Path(path)
|
|
326
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
327
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
328
|
+
for exp_list in self.lists:
|
|
329
|
+
f.write(exp_list.model_dump_json() + "\n")
|
|
330
|
+
|
|
331
|
+
@classmethod
|
|
332
|
+
def from_jsonl(
|
|
333
|
+
cls,
|
|
334
|
+
path: Path | str,
|
|
335
|
+
name: str = "loaded_lists",
|
|
336
|
+
source_items_id: UUID | None = None,
|
|
337
|
+
partitioning_strategy: str = "unknown",
|
|
338
|
+
) -> ListCollection:
|
|
339
|
+
"""Load lists from a JSONL file (one list per line).
|
|
340
|
+
|
|
341
|
+
Parameters
|
|
342
|
+
----------
|
|
343
|
+
path : Path | str
|
|
344
|
+
Path to JSONL file containing experiment lists.
|
|
345
|
+
name : str
|
|
346
|
+
Name for the collection (default: "loaded_lists").
|
|
347
|
+
source_items_id : UUID | None
|
|
348
|
+
Source items UUID. If None, uses a nil UUID.
|
|
349
|
+
partitioning_strategy : str
|
|
350
|
+
Strategy name (default: "unknown").
|
|
351
|
+
|
|
352
|
+
Returns
|
|
353
|
+
-------
|
|
354
|
+
ListCollection
|
|
355
|
+
Collection containing the loaded lists.
|
|
356
|
+
|
|
357
|
+
Examples
|
|
358
|
+
--------
|
|
359
|
+
>>> collection = ListCollection.from_jsonl("lists.jsonl") # doctest: +SKIP
|
|
360
|
+
"""
|
|
361
|
+
path = Path(path)
|
|
362
|
+
lists: list[ExperimentList] = []
|
|
363
|
+
|
|
364
|
+
with open(path, encoding="utf-8") as f:
|
|
365
|
+
for line in f:
|
|
366
|
+
line = line.strip()
|
|
367
|
+
if not line:
|
|
368
|
+
continue
|
|
369
|
+
list_data = json.loads(line)
|
|
370
|
+
exp_list = ExperimentList(**list_data)
|
|
371
|
+
lists.append(exp_list)
|
|
372
|
+
|
|
373
|
+
return cls(
|
|
374
|
+
name=name,
|
|
375
|
+
source_items_id=source_items_id or UUID(int=0),
|
|
376
|
+
lists=lists,
|
|
377
|
+
partitioning_strategy=partitioning_strategy,
|
|
378
|
+
)
|