bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,1067 @@
|
|
|
1
|
+
"""Constraint models for experimental list composition.
|
|
2
|
+
|
|
3
|
+
This module defines constraints that can be applied to experimental lists
|
|
4
|
+
to ensure balanced, well-distributed item selections. Constraints can specify:
|
|
5
|
+
- Uniqueness: No duplicate property values
|
|
6
|
+
- Balance: Balanced distribution across categories
|
|
7
|
+
- Quantile: Uniform distribution across quantiles
|
|
8
|
+
- Size: List size requirements
|
|
9
|
+
- Ordering: Item presentation order constraints (runtime enforcement)
|
|
10
|
+
|
|
11
|
+
All constraints inherit from BeadBaseModel and use Pydantic discriminated unions
|
|
12
|
+
for type-safe deserialization.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from typing import Annotated, Literal
|
|
18
|
+
from uuid import UUID
|
|
19
|
+
|
|
20
|
+
from pydantic import Field, field_validator, model_validator
|
|
21
|
+
|
|
22
|
+
from bead.data.base import BeadBaseModel
|
|
23
|
+
from bead.resources.constraints import ContextValue
|
|
24
|
+
|
|
25
|
+
# type alias for list constraint types
|
|
26
|
+
ListConstraintType = Literal[
|
|
27
|
+
"uniqueness", # No duplicate property values
|
|
28
|
+
"conditional_uniqueness", # Conditional uniqueness based on DSL expression
|
|
29
|
+
"balance", # Balanced distribution of property
|
|
30
|
+
"quantile", # Uniform across quantiles
|
|
31
|
+
"grouped_quantile", # Quantile distribution within groups
|
|
32
|
+
"diversity", # Minimum unique values for property
|
|
33
|
+
"size", # List size constraints
|
|
34
|
+
"ordering", # Presentation order constraints (runtime enforcement)
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
# type alias for batch constraint types
|
|
38
|
+
BatchConstraintType = Literal[
|
|
39
|
+
"coverage", # All values appear somewhere in batch
|
|
40
|
+
"balance", # Balanced distribution across entire batch
|
|
41
|
+
"diversity", # Prevent values appearing in too many lists
|
|
42
|
+
"min_occurrence", # Minimum occurrences per value across batch
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class UniquenessConstraint(BeadBaseModel):
|
|
47
|
+
"""Constraint requiring unique values for a property.
|
|
48
|
+
|
|
49
|
+
Ensures that no two items in a list have the same value for the
|
|
50
|
+
specified property. Useful for preventing duplicate target verbs,
|
|
51
|
+
sentence structures, or other experimental materials.
|
|
52
|
+
|
|
53
|
+
Attributes
|
|
54
|
+
----------
|
|
55
|
+
constraint_type : Literal["uniqueness"]
|
|
56
|
+
Discriminator field for constraint type (always "uniqueness").
|
|
57
|
+
property_expression : str
|
|
58
|
+
DSL expression that extracts the value that must be unique.
|
|
59
|
+
The item is available as 'item' in the expression.
|
|
60
|
+
Examples: "item.metadata.target_verb", "item.templates.sentence.text"
|
|
61
|
+
context : dict[str, ContextValue]
|
|
62
|
+
Additional context variables for DSL evaluation.
|
|
63
|
+
allow_null : bool, default=False
|
|
64
|
+
Whether to allow null/None values. If False, None values count
|
|
65
|
+
as duplicates. If True, multiple None values are allowed.
|
|
66
|
+
priority : int, default=1
|
|
67
|
+
Constraint priority (higher = more important). When partitioning,
|
|
68
|
+
violations of higher-priority constraints are penalized more heavily.
|
|
69
|
+
|
|
70
|
+
Examples
|
|
71
|
+
--------
|
|
72
|
+
>>> # No two items with same target verb (high priority)
|
|
73
|
+
>>> constraint = UniquenessConstraint(
|
|
74
|
+
... property_expression="item.metadata.target_verb",
|
|
75
|
+
... allow_null=False,
|
|
76
|
+
... priority=5
|
|
77
|
+
... )
|
|
78
|
+
>>> constraint.priority
|
|
79
|
+
5
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
constraint_type: Literal["uniqueness"] = "uniqueness"
|
|
83
|
+
property_expression: str = Field(
|
|
84
|
+
..., description="DSL expression for value to check"
|
|
85
|
+
)
|
|
86
|
+
context: dict[str, ContextValue] = Field(
|
|
87
|
+
default_factory=dict, description="Additional context variables"
|
|
88
|
+
)
|
|
89
|
+
allow_null: bool = Field(
|
|
90
|
+
default=False, description="Whether to allow multiple null values"
|
|
91
|
+
)
|
|
92
|
+
priority: int = Field(
|
|
93
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
@field_validator("property_expression")
|
|
97
|
+
@classmethod
|
|
98
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
99
|
+
"""Validate property expression is non-empty.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
v : str
|
|
104
|
+
Property expression to validate.
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
str
|
|
109
|
+
Validated property expression.
|
|
110
|
+
|
|
111
|
+
Raises
|
|
112
|
+
------
|
|
113
|
+
ValueError
|
|
114
|
+
If property expression is empty or contains only whitespace.
|
|
115
|
+
"""
|
|
116
|
+
if not v or not v.strip():
|
|
117
|
+
raise ValueError("property_expression must be non-empty")
|
|
118
|
+
return v.strip()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class BalanceConstraint(BeadBaseModel):
|
|
122
|
+
"""Constraint requiring balanced distribution.
|
|
123
|
+
|
|
124
|
+
Ensures balanced distribution of a categorical property across items
|
|
125
|
+
in a list. Can specify target counts for each category or request
|
|
126
|
+
equal distribution.
|
|
127
|
+
|
|
128
|
+
Attributes
|
|
129
|
+
----------
|
|
130
|
+
constraint_type : Literal["balance"]
|
|
131
|
+
Discriminator field for constraint type (always "balance").
|
|
132
|
+
property_expression : str
|
|
133
|
+
DSL expression that extracts the category value to balance.
|
|
134
|
+
The item is available as 'item' in the expression.
|
|
135
|
+
Example: "item.metadata.transitivity"
|
|
136
|
+
context : dict[str, ContextValue]
|
|
137
|
+
Additional context variables for DSL evaluation.
|
|
138
|
+
target_counts : dict[str, int] | None, default=None
|
|
139
|
+
Target counts for each category value. If None, equal distribution
|
|
140
|
+
is assumed. Keys are category values, values are target counts.
|
|
141
|
+
tolerance : float, default=0.1
|
|
142
|
+
Allowed deviation from target as a proportion (0.0-1.0).
|
|
143
|
+
For example, 0.1 means up to 10% deviation is acceptable.
|
|
144
|
+
priority : int, default=1
|
|
145
|
+
Constraint priority (higher = more important). When partitioning,
|
|
146
|
+
violations of higher-priority constraints are penalized more heavily.
|
|
147
|
+
|
|
148
|
+
Examples
|
|
149
|
+
--------
|
|
150
|
+
>>> # Equal number of transitive and intransitive verbs
|
|
151
|
+
>>> constraint = BalanceConstraint(
|
|
152
|
+
... property_expression="item.metadata.transitivity",
|
|
153
|
+
... tolerance=0.1
|
|
154
|
+
... )
|
|
155
|
+
>>> # 2:1 ratio with high priority
|
|
156
|
+
>>> constraint2 = BalanceConstraint(
|
|
157
|
+
... property_expression="item.metadata.grammatical",
|
|
158
|
+
... target_counts={"true": 20, "false": 10},
|
|
159
|
+
... tolerance=0.05,
|
|
160
|
+
... priority=3
|
|
161
|
+
... )
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
constraint_type: Literal["balance"] = "balance"
|
|
165
|
+
property_expression: str = Field(
|
|
166
|
+
..., description="DSL expression for category value"
|
|
167
|
+
)
|
|
168
|
+
context: dict[str, ContextValue] = Field(
|
|
169
|
+
default_factory=dict, description="Additional context variables"
|
|
170
|
+
)
|
|
171
|
+
target_counts: dict[str, int] | None = Field(
|
|
172
|
+
default=None, description="Target counts per category (None = equal)"
|
|
173
|
+
)
|
|
174
|
+
tolerance: float = Field(
|
|
175
|
+
default=0.1, ge=0.0, le=1.0, description="Allowed deviation from target"
|
|
176
|
+
)
|
|
177
|
+
priority: int = Field(
|
|
178
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
@field_validator("property_expression")
|
|
182
|
+
@classmethod
|
|
183
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
184
|
+
"""Validate property expression is non-empty.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
v : str
|
|
189
|
+
Property expression to validate.
|
|
190
|
+
|
|
191
|
+
Returns
|
|
192
|
+
-------
|
|
193
|
+
str
|
|
194
|
+
Validated property expression.
|
|
195
|
+
|
|
196
|
+
Raises
|
|
197
|
+
------
|
|
198
|
+
ValueError
|
|
199
|
+
If property expression is empty or contains only whitespace.
|
|
200
|
+
"""
|
|
201
|
+
if not v or not v.strip():
|
|
202
|
+
raise ValueError("property_expression must be non-empty")
|
|
203
|
+
return v.strip()
|
|
204
|
+
|
|
205
|
+
@field_validator("target_counts")
|
|
206
|
+
@classmethod
|
|
207
|
+
def validate_target_counts(cls, v: dict[str, int] | None) -> dict[str, int] | None:
|
|
208
|
+
"""Validate target counts are non-negative.
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
v : dict[str, int] | None
|
|
213
|
+
Target counts to validate.
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
dict[str, int] | None
|
|
218
|
+
Validated target counts.
|
|
219
|
+
|
|
220
|
+
Raises
|
|
221
|
+
------
|
|
222
|
+
ValueError
|
|
223
|
+
If any count is negative.
|
|
224
|
+
"""
|
|
225
|
+
if v is not None:
|
|
226
|
+
for category, count in v.items():
|
|
227
|
+
if count < 0:
|
|
228
|
+
raise ValueError(
|
|
229
|
+
f"target_counts values must be non-negative, "
|
|
230
|
+
f"got {count} for '{category}'"
|
|
231
|
+
)
|
|
232
|
+
return v
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
class QuantileConstraint(BeadBaseModel):
|
|
236
|
+
"""Constraint requiring uniform distribution across quantiles.
|
|
237
|
+
|
|
238
|
+
Ensures uniform distribution of items across quantiles of a numeric
|
|
239
|
+
property. Useful for balancing language model probabilities, word
|
|
240
|
+
frequencies, or other continuous variables. Supports complex DSL
|
|
241
|
+
expressions for computing derived metrics.
|
|
242
|
+
|
|
243
|
+
Attributes
|
|
244
|
+
----------
|
|
245
|
+
constraint_type : Literal["quantile"]
|
|
246
|
+
Discriminator field for constraint type (always "quantile").
|
|
247
|
+
property_expression : str
|
|
248
|
+
DSL expression that computes the numeric value to quantile.
|
|
249
|
+
The item is available as 'item' in the expression.
|
|
250
|
+
Can be simple (e.g., "item.metadata.lm_prob") or complex
|
|
251
|
+
(e.g., "variance([item['val1'], item['val2'], item['val3']])")
|
|
252
|
+
context : dict[str, ContextValue]
|
|
253
|
+
Additional context variables for DSL evaluation.
|
|
254
|
+
Example: {"hyp_keys": ["hyp1", "hyp2", "hyp3"]}
|
|
255
|
+
n_quantiles : int, default=5
|
|
256
|
+
Number of quantiles to create (must be >= 2).
|
|
257
|
+
items_per_quantile : int, default=2
|
|
258
|
+
Target number of items per quantile (must be >= 1).
|
|
259
|
+
priority : int, default=1
|
|
260
|
+
Constraint priority (higher = more important). When partitioning,
|
|
261
|
+
violations of higher-priority constraints are penalized more heavily.
|
|
262
|
+
|
|
263
|
+
Examples
|
|
264
|
+
--------
|
|
265
|
+
>>> # Uniform distribution of LM probabilities across 5 quantiles
|
|
266
|
+
>>> constraint = QuantileConstraint(
|
|
267
|
+
... property_expression="item.metadata.lm_prob",
|
|
268
|
+
... n_quantiles=5,
|
|
269
|
+
... items_per_quantile=2
|
|
270
|
+
... )
|
|
271
|
+
>>> # Variance of precomputed NLI scores
|
|
272
|
+
>>> constraint2 = QuantileConstraint(
|
|
273
|
+
... property_expression="item['nli_variance']",
|
|
274
|
+
... n_quantiles=5,
|
|
275
|
+
... items_per_quantile=2
|
|
276
|
+
... )
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
constraint_type: Literal["quantile"] = "quantile"
|
|
280
|
+
property_expression: str = Field(
|
|
281
|
+
..., description="DSL expression for numeric value"
|
|
282
|
+
)
|
|
283
|
+
context: dict[str, ContextValue] = Field(
|
|
284
|
+
default_factory=dict, description="Additional context variables"
|
|
285
|
+
)
|
|
286
|
+
n_quantiles: int = Field(default=5, ge=2, description="Number of quantiles")
|
|
287
|
+
items_per_quantile: int = Field(default=2, ge=1, description="Items per quantile")
|
|
288
|
+
priority: int = Field(
|
|
289
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
@field_validator("property_expression")
|
|
293
|
+
@classmethod
|
|
294
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
295
|
+
"""Validate property expression is non-empty.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
v : str
|
|
300
|
+
Property expression to validate.
|
|
301
|
+
|
|
302
|
+
Returns
|
|
303
|
+
-------
|
|
304
|
+
str
|
|
305
|
+
Validated property expression.
|
|
306
|
+
|
|
307
|
+
Raises
|
|
308
|
+
------
|
|
309
|
+
ValueError
|
|
310
|
+
If property expression is empty or contains only whitespace.
|
|
311
|
+
"""
|
|
312
|
+
if not v or not v.strip():
|
|
313
|
+
raise ValueError("property_expression must be non-empty")
|
|
314
|
+
return v.strip()
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class GroupedQuantileConstraint(BeadBaseModel):
|
|
318
|
+
"""Constraint requiring uniform quantile distribution within groups.
|
|
319
|
+
|
|
320
|
+
Ensures uniform distribution across quantiles of a numeric property
|
|
321
|
+
within each group defined by a grouping property. Useful for balancing
|
|
322
|
+
a continuous variable independently within categorical groups.
|
|
323
|
+
|
|
324
|
+
Attributes
|
|
325
|
+
----------
|
|
326
|
+
constraint_type : Literal["grouped_quantile"]
|
|
327
|
+
Discriminator field for constraint type (always "grouped_quantile").
|
|
328
|
+
property_expression : str
|
|
329
|
+
DSL expression that computes the numeric value to quantile.
|
|
330
|
+
The item is available as 'item' in the expression.
|
|
331
|
+
Example: "item.metadata.lm_prob"
|
|
332
|
+
group_by_expression : str
|
|
333
|
+
DSL expression that computes the grouping key.
|
|
334
|
+
The item is available as 'item' in the expression.
|
|
335
|
+
Example: "item.metadata.condition"
|
|
336
|
+
context : dict[str, ContextValue]
|
|
337
|
+
Additional context variables for DSL evaluation.
|
|
338
|
+
n_quantiles : int, default=5
|
|
339
|
+
Number of quantiles to create per group (must be >= 2).
|
|
340
|
+
items_per_quantile : int, default=2
|
|
341
|
+
Target number of items per quantile per group (must be >= 1).
|
|
342
|
+
priority : int, default=1
|
|
343
|
+
Constraint priority (higher = more important). When partitioning,
|
|
344
|
+
violations of higher-priority constraints are penalized more heavily.
|
|
345
|
+
|
|
346
|
+
Examples
|
|
347
|
+
--------
|
|
348
|
+
>>> # Balance LM probability quantiles within each condition
|
|
349
|
+
>>> constraint = GroupedQuantileConstraint(
|
|
350
|
+
... property_expression="item.metadata.lm_prob",
|
|
351
|
+
... group_by_expression="item.metadata.condition",
|
|
352
|
+
... n_quantiles=5,
|
|
353
|
+
... items_per_quantile=2
|
|
354
|
+
... )
|
|
355
|
+
>>> # Balance embedding similarity IQR within semantic categories
|
|
356
|
+
>>> constraint2 = GroupedQuantileConstraint(
|
|
357
|
+
... property_expression="item['embedding_iqr']",
|
|
358
|
+
... group_by_expression="item['semantic_category']",
|
|
359
|
+
... n_quantiles=4,
|
|
360
|
+
... items_per_quantile=3
|
|
361
|
+
... )
|
|
362
|
+
"""
|
|
363
|
+
|
|
364
|
+
constraint_type: Literal["grouped_quantile"] = "grouped_quantile"
|
|
365
|
+
property_expression: str = Field(
|
|
366
|
+
..., description="DSL expression for numeric value"
|
|
367
|
+
)
|
|
368
|
+
group_by_expression: str = Field(..., description="DSL expression for grouping key")
|
|
369
|
+
context: dict[str, ContextValue] = Field(
|
|
370
|
+
default_factory=dict, description="Additional context variables"
|
|
371
|
+
)
|
|
372
|
+
n_quantiles: int = Field(
|
|
373
|
+
default=5, ge=2, description="Number of quantiles per group"
|
|
374
|
+
)
|
|
375
|
+
items_per_quantile: int = Field(
|
|
376
|
+
default=2, ge=1, description="Items per quantile per group"
|
|
377
|
+
)
|
|
378
|
+
priority: int = Field(
|
|
379
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
@field_validator("property_expression", "group_by_expression")
|
|
383
|
+
@classmethod
|
|
384
|
+
def validate_expression(cls, v: str) -> str:
|
|
385
|
+
"""Validate expression is non-empty.
|
|
386
|
+
|
|
387
|
+
Parameters
|
|
388
|
+
----------
|
|
389
|
+
v : str
|
|
390
|
+
Expression to validate.
|
|
391
|
+
|
|
392
|
+
Returns
|
|
393
|
+
-------
|
|
394
|
+
str
|
|
395
|
+
Validated expression.
|
|
396
|
+
|
|
397
|
+
Raises
|
|
398
|
+
------
|
|
399
|
+
ValueError
|
|
400
|
+
If expression is empty or contains only whitespace.
|
|
401
|
+
"""
|
|
402
|
+
if not v or not v.strip():
|
|
403
|
+
raise ValueError("expression must be non-empty")
|
|
404
|
+
return v.strip()
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
class ConditionalUniquenessConstraint(BeadBaseModel):
|
|
408
|
+
"""Constraint requiring uniqueness when a condition is met.
|
|
409
|
+
|
|
410
|
+
Ensures that values are unique only when a boolean condition is satisfied.
|
|
411
|
+
Useful for enforcing uniqueness on a subset of items while allowing
|
|
412
|
+
duplicates in others.
|
|
413
|
+
|
|
414
|
+
Attributes
|
|
415
|
+
----------
|
|
416
|
+
constraint_type : Literal["conditional_uniqueness"]
|
|
417
|
+
Discriminator field for constraint type (always "conditional_uniqueness").
|
|
418
|
+
property_expression : str
|
|
419
|
+
DSL expression that computes the value that must be unique.
|
|
420
|
+
The item is available as 'item' in the expression.
|
|
421
|
+
Example: "item.metadata.target_word"
|
|
422
|
+
condition_expression : str
|
|
423
|
+
DSL boolean expression that determines if constraint applies.
|
|
424
|
+
The item is available as 'item' in the expression.
|
|
425
|
+
Example: "item.metadata.is_critical == True"
|
|
426
|
+
context : dict[str, ContextValue]
|
|
427
|
+
Additional context variables for DSL evaluation.
|
|
428
|
+
allow_null : bool, default=False
|
|
429
|
+
Whether to allow multiple null values when condition is true.
|
|
430
|
+
priority : int, default=1
|
|
431
|
+
Constraint priority (higher = more important). When partitioning,
|
|
432
|
+
violations of higher-priority constraints are penalized more heavily.
|
|
433
|
+
|
|
434
|
+
Examples
|
|
435
|
+
--------
|
|
436
|
+
>>> # Unique target words only for critical items
|
|
437
|
+
>>> constraint = ConditionalUniquenessConstraint(
|
|
438
|
+
... property_expression="item.metadata.target_word",
|
|
439
|
+
... condition_expression="item.metadata.is_critical == True",
|
|
440
|
+
... allow_null=False,
|
|
441
|
+
... priority=3
|
|
442
|
+
... )
|
|
443
|
+
>>> # Unique sentences only when grammaticality is tested
|
|
444
|
+
>>> constraint2 = ConditionalUniquenessConstraint(
|
|
445
|
+
... property_expression="item.templates.sentence.text",
|
|
446
|
+
... condition_expression="item.metadata.test_type in test_grammaticality",
|
|
447
|
+
... context={"test_grammaticality": {"gram", "acceptability"}},
|
|
448
|
+
... allow_null=True
|
|
449
|
+
... )
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
constraint_type: Literal["conditional_uniqueness"] = "conditional_uniqueness"
|
|
453
|
+
property_expression: str = Field(
|
|
454
|
+
..., description="DSL expression for value to check"
|
|
455
|
+
)
|
|
456
|
+
condition_expression: str = Field(
|
|
457
|
+
..., description="DSL boolean expression for when to apply constraint"
|
|
458
|
+
)
|
|
459
|
+
context: dict[str, ContextValue] = Field(
|
|
460
|
+
default_factory=dict, description="Additional context variables"
|
|
461
|
+
)
|
|
462
|
+
allow_null: bool = Field(
|
|
463
|
+
default=False, description="Whether to allow multiple null values"
|
|
464
|
+
)
|
|
465
|
+
priority: int = Field(
|
|
466
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
@field_validator("property_expression", "condition_expression")
|
|
470
|
+
@classmethod
|
|
471
|
+
def validate_expression(cls, v: str) -> str:
|
|
472
|
+
"""Validate expression is non-empty.
|
|
473
|
+
|
|
474
|
+
Parameters
|
|
475
|
+
----------
|
|
476
|
+
v : str
|
|
477
|
+
Expression to validate.
|
|
478
|
+
|
|
479
|
+
Returns
|
|
480
|
+
-------
|
|
481
|
+
str
|
|
482
|
+
Validated expression.
|
|
483
|
+
|
|
484
|
+
Raises
|
|
485
|
+
------
|
|
486
|
+
ValueError
|
|
487
|
+
If expression is empty or contains only whitespace.
|
|
488
|
+
"""
|
|
489
|
+
if not v or not v.strip():
|
|
490
|
+
raise ValueError("expression must be non-empty")
|
|
491
|
+
return v.strip()
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
class DiversityConstraint(BeadBaseModel):
|
|
495
|
+
"""Constraint requiring minimum diversity (unique values) for a property.
|
|
496
|
+
|
|
497
|
+
Ensures that a list contains at least a minimum number of unique values
|
|
498
|
+
for a specified property. Useful for ensuring template diversity, verb
|
|
499
|
+
diversity, or other experimental richness requirements.
|
|
500
|
+
|
|
501
|
+
Attributes
|
|
502
|
+
----------
|
|
503
|
+
constraint_type : Literal["diversity"]
|
|
504
|
+
Discriminator field for constraint type (always "diversity").
|
|
505
|
+
property_expression : str
|
|
506
|
+
DSL expression that extracts the value to count for diversity.
|
|
507
|
+
The item is available as 'item' in the expression.
|
|
508
|
+
Examples: "item.metadata.template_id", "item.metadata.verb_lemma"
|
|
509
|
+
min_unique_values : int
|
|
510
|
+
Minimum number of unique values required in the list.
|
|
511
|
+
context : dict[str, ContextValue]
|
|
512
|
+
Additional context variables for DSL evaluation.
|
|
513
|
+
priority : int, default=1
|
|
514
|
+
Constraint priority (higher = more important). When partitioning,
|
|
515
|
+
violations of higher-priority constraints are penalized more heavily.
|
|
516
|
+
|
|
517
|
+
Examples
|
|
518
|
+
--------
|
|
519
|
+
>>> # Ensure at least 15 unique templates per list
|
|
520
|
+
>>> constraint = DiversityConstraint(
|
|
521
|
+
... property_expression="item.metadata.template_id",
|
|
522
|
+
... min_unique_values=15,
|
|
523
|
+
... priority=2
|
|
524
|
+
... )
|
|
525
|
+
>>> constraint.min_unique_values
|
|
526
|
+
15
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
constraint_type: Literal["diversity"] = "diversity"
|
|
530
|
+
property_expression: str = Field(
|
|
531
|
+
..., description="DSL expression for value to check for diversity"
|
|
532
|
+
)
|
|
533
|
+
min_unique_values: int = Field(
|
|
534
|
+
..., ge=1, description="Minimum number of unique values required"
|
|
535
|
+
)
|
|
536
|
+
context: dict[str, ContextValue] = Field(
|
|
537
|
+
default_factory=dict, description="Additional context variables"
|
|
538
|
+
)
|
|
539
|
+
priority: int = Field(
|
|
540
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
@field_validator("property_expression")
|
|
544
|
+
@classmethod
|
|
545
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
546
|
+
"""Validate property expression is non-empty.
|
|
547
|
+
|
|
548
|
+
Parameters
|
|
549
|
+
----------
|
|
550
|
+
v : str
|
|
551
|
+
Property expression to validate.
|
|
552
|
+
|
|
553
|
+
Returns
|
|
554
|
+
-------
|
|
555
|
+
str
|
|
556
|
+
Validated property expression.
|
|
557
|
+
|
|
558
|
+
Raises
|
|
559
|
+
------
|
|
560
|
+
ValueError
|
|
561
|
+
If property expression is empty or contains only whitespace.
|
|
562
|
+
"""
|
|
563
|
+
if not v or not v.strip():
|
|
564
|
+
raise ValueError("property_expression must be non-empty")
|
|
565
|
+
return v.strip()
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
class SizeConstraint(BeadBaseModel):
|
|
569
|
+
"""Constraint on list size.
|
|
570
|
+
|
|
571
|
+
Specifies size requirements for a list. Can specify exact size,
|
|
572
|
+
minimum size, maximum size, or a range (min and max).
|
|
573
|
+
|
|
574
|
+
Often used with high priority to ensure participants do equal work.
|
|
575
|
+
|
|
576
|
+
Attributes
|
|
577
|
+
----------
|
|
578
|
+
constraint_type : Literal["size"]
|
|
579
|
+
Discriminator field for constraint type (always "size").
|
|
580
|
+
min_size : int | None, default=None
|
|
581
|
+
Minimum list size (must be >= 0 if set).
|
|
582
|
+
max_size : int | None, default=None
|
|
583
|
+
Maximum list size (must be >= 0 if set).
|
|
584
|
+
exact_size : int | None, default=None
|
|
585
|
+
Exact required size (must be >= 0 if set).
|
|
586
|
+
Cannot be used with min_size or max_size.
|
|
587
|
+
priority : int, default=1
|
|
588
|
+
Constraint priority (higher = more important). When partitioning,
|
|
589
|
+
violations of higher-priority constraints are penalized more heavily.
|
|
590
|
+
Size constraints often use high priority (e.g., 10) to ensure
|
|
591
|
+
participants do exactly equal amounts of work.
|
|
592
|
+
|
|
593
|
+
Examples
|
|
594
|
+
--------
|
|
595
|
+
>>> # Exactly 40 items per list (highest priority)
|
|
596
|
+
>>> constraint = SizeConstraint(exact_size=40, priority=10)
|
|
597
|
+
>>> # Between 30-50 items per list
|
|
598
|
+
>>> constraint2 = SizeConstraint(min_size=30, max_size=50)
|
|
599
|
+
>>> # At least 20 items
|
|
600
|
+
>>> constraint3 = SizeConstraint(min_size=20)
|
|
601
|
+
>>> # At most 100 items
|
|
602
|
+
>>> constraint4 = SizeConstraint(max_size=100)
|
|
603
|
+
"""
|
|
604
|
+
|
|
605
|
+
constraint_type: Literal["size"] = "size"
|
|
606
|
+
min_size: int | None = Field(default=None, ge=0, description="Minimum list size")
|
|
607
|
+
max_size: int | None = Field(default=None, ge=0, description="Maximum list size")
|
|
608
|
+
exact_size: int | None = Field(
|
|
609
|
+
default=None, ge=0, description="Exact required size"
|
|
610
|
+
)
|
|
611
|
+
priority: int = Field(
|
|
612
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
@model_validator(mode="after")
|
|
616
|
+
def validate_size_params(self) -> SizeConstraint:
|
|
617
|
+
"""Validate size parameter combinations.
|
|
618
|
+
|
|
619
|
+
Ensures that:
|
|
620
|
+
- At least one size parameter is set
|
|
621
|
+
- exact_size is not used with min_size or max_size
|
|
622
|
+
- min_size <= max_size if both are set
|
|
623
|
+
|
|
624
|
+
Returns
|
|
625
|
+
-------
|
|
626
|
+
SizeConstraint
|
|
627
|
+
Validated constraint.
|
|
628
|
+
|
|
629
|
+
Raises
|
|
630
|
+
------
|
|
631
|
+
ValueError
|
|
632
|
+
If validation fails.
|
|
633
|
+
"""
|
|
634
|
+
# check that at least one parameter is set
|
|
635
|
+
if self.exact_size is None and self.min_size is None and self.max_size is None:
|
|
636
|
+
raise ValueError(
|
|
637
|
+
"Must specify at least one of: min_size, max_size, exact_size"
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
# check that exact_size is not used with min/max
|
|
641
|
+
if self.exact_size is not None:
|
|
642
|
+
if self.min_size is not None or self.max_size is not None:
|
|
643
|
+
raise ValueError("exact_size cannot be used with min_size or max_size")
|
|
644
|
+
|
|
645
|
+
# check that min <= max if both are set
|
|
646
|
+
if self.min_size is not None and self.max_size is not None:
|
|
647
|
+
if self.min_size > self.max_size:
|
|
648
|
+
raise ValueError("min_size must be <= max_size")
|
|
649
|
+
|
|
650
|
+
return self
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
class OrderingConstraint(BeadBaseModel):
|
|
654
|
+
"""Constraint on item presentation order.
|
|
655
|
+
|
|
656
|
+
**CRITICAL**: This constraint is primarily enforced at **jsPsych runtime**,
|
|
657
|
+
not during static list construction. The Python data model stores the
|
|
658
|
+
constraint specification, which is then translated to JavaScript code
|
|
659
|
+
for runtime enforcement during per-participant randomization.
|
|
660
|
+
|
|
661
|
+
Attributes
|
|
662
|
+
----------
|
|
663
|
+
constraint_type : Literal["ordering"]
|
|
664
|
+
Discriminator for constraint type.
|
|
665
|
+
precedence_pairs : list[tuple[UUID, UUID]]
|
|
666
|
+
Pairs of (item_a_id, item_b_id) where item_a must appear before item_b.
|
|
667
|
+
no_adjacent_property : str | None
|
|
668
|
+
Property path; items with same value cannot be adjacent.
|
|
669
|
+
Example: "item_metadata.condition" prevents AA, BB patterns.
|
|
670
|
+
block_by_property : str | None
|
|
671
|
+
Property path to group items into contiguous blocks.
|
|
672
|
+
Example: "item_metadata.block_type" creates blocked design.
|
|
673
|
+
min_distance : int | None
|
|
674
|
+
Minimum number of items between items with same no_adjacent_property value.
|
|
675
|
+
max_distance : int | None
|
|
676
|
+
Maximum number of items between start and end of items with same
|
|
677
|
+
block_by_property value (enforces tight blocking).
|
|
678
|
+
practice_item_property : str | None
|
|
679
|
+
Property path identifying practice items (should appear first).
|
|
680
|
+
Example: "item_metadata.is_practice" with value True.
|
|
681
|
+
randomize_within_blocks : bool
|
|
682
|
+
Whether to randomize order within blocks (default True).
|
|
683
|
+
Only applies when block_by_property is set.
|
|
684
|
+
|
|
685
|
+
Examples
|
|
686
|
+
--------
|
|
687
|
+
>>> # No adjacent items with same condition
|
|
688
|
+
>>> constraint = OrderingConstraint(
|
|
689
|
+
... no_adjacent_property="item_metadata.condition"
|
|
690
|
+
... )
|
|
691
|
+
|
|
692
|
+
>>> # Practice items first, then main items
|
|
693
|
+
>>> constraint = OrderingConstraint(
|
|
694
|
+
... practice_item_property="item_metadata.is_practice"
|
|
695
|
+
... )
|
|
696
|
+
|
|
697
|
+
>>> # Blocked by condition, randomized within blocks
|
|
698
|
+
>>> constraint = OrderingConstraint(
|
|
699
|
+
... block_by_property="item_metadata.condition",
|
|
700
|
+
... randomize_within_blocks=True
|
|
701
|
+
... )
|
|
702
|
+
|
|
703
|
+
>>> # Item A before Item B
|
|
704
|
+
>>> from uuid import uuid4
|
|
705
|
+
>>> item_a, item_b = uuid4(), uuid4()
|
|
706
|
+
>>> constraint = OrderingConstraint(
|
|
707
|
+
... precedence_pairs=[(item_a, item_b)]
|
|
708
|
+
... )
|
|
709
|
+
"""
|
|
710
|
+
|
|
711
|
+
constraint_type: Literal["ordering"] = "ordering"
|
|
712
|
+
precedence_pairs: list[tuple[UUID, UUID]] = Field(
|
|
713
|
+
default_factory=lambda: [], description="Pairs (a,b) where a must precede b"
|
|
714
|
+
)
|
|
715
|
+
no_adjacent_property: str | None = Field(
|
|
716
|
+
default=None,
|
|
717
|
+
description="Property that cannot have same value in adjacent items",
|
|
718
|
+
)
|
|
719
|
+
block_by_property: str | None = Field(
|
|
720
|
+
default=None, description="Property to group into contiguous blocks"
|
|
721
|
+
)
|
|
722
|
+
min_distance: int | None = Field(
|
|
723
|
+
default=None,
|
|
724
|
+
ge=1,
|
|
725
|
+
description="Minimum items between same no_adjacent_property values",
|
|
726
|
+
)
|
|
727
|
+
max_distance: int | None = Field(
|
|
728
|
+
default=None, ge=1, description="Maximum distance for blocked items"
|
|
729
|
+
)
|
|
730
|
+
practice_item_property: str | None = Field(
|
|
731
|
+
default=None, description="Property identifying practice items (shown first)"
|
|
732
|
+
)
|
|
733
|
+
randomize_within_blocks: bool = Field(
|
|
734
|
+
default=True, description="Whether to randomize within blocks"
|
|
735
|
+
)
|
|
736
|
+
priority: int = Field(
|
|
737
|
+
default=1,
|
|
738
|
+
ge=1,
|
|
739
|
+
description="Constraint priority (not used for static partitioning)",
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
@model_validator(mode="after")
|
|
743
|
+
def validate_distance_constraints(self) -> OrderingConstraint:
|
|
744
|
+
"""Validate distance constraint combinations.
|
|
745
|
+
|
|
746
|
+
Returns
|
|
747
|
+
-------
|
|
748
|
+
OrderingConstraint
|
|
749
|
+
Validated constraint.
|
|
750
|
+
|
|
751
|
+
Raises
|
|
752
|
+
------
|
|
753
|
+
ValueError
|
|
754
|
+
If validation fails.
|
|
755
|
+
"""
|
|
756
|
+
if self.min_distance is not None and self.no_adjacent_property is None:
|
|
757
|
+
raise ValueError("min_distance requires no_adjacent_property to be set")
|
|
758
|
+
if self.max_distance is not None and self.block_by_property is None:
|
|
759
|
+
raise ValueError("max_distance requires block_by_property to be set")
|
|
760
|
+
if (
|
|
761
|
+
self.min_distance
|
|
762
|
+
and self.max_distance
|
|
763
|
+
and self.min_distance > self.max_distance
|
|
764
|
+
):
|
|
765
|
+
raise ValueError("min_distance cannot be greater than max_distance")
|
|
766
|
+
return self
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
# discriminated union for all list constraints
|
|
770
|
+
ListConstraint = Annotated[
|
|
771
|
+
UniquenessConstraint
|
|
772
|
+
| ConditionalUniquenessConstraint
|
|
773
|
+
| BalanceConstraint
|
|
774
|
+
| QuantileConstraint
|
|
775
|
+
| GroupedQuantileConstraint
|
|
776
|
+
| DiversityConstraint
|
|
777
|
+
| SizeConstraint
|
|
778
|
+
| OrderingConstraint,
|
|
779
|
+
Field(discriminator="constraint_type"),
|
|
780
|
+
]
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
# ============================================================================
|
|
784
|
+
# batch-level constraints
|
|
785
|
+
# ============================================================================
|
|
786
|
+
|
|
787
|
+
|
|
788
|
+
class BatchCoverageConstraint(BeadBaseModel):
|
|
789
|
+
"""Constraint ensuring all values appear somewhere in the batch.
|
|
790
|
+
|
|
791
|
+
Ensures that all values of a property appear across the collection of lists.
|
|
792
|
+
Useful for guaranteeing coverage of experimental conditions, templates, or
|
|
793
|
+
stimulus categories across all participants.
|
|
794
|
+
|
|
795
|
+
Attributes
|
|
796
|
+
----------
|
|
797
|
+
constraint_type : Literal["coverage"]
|
|
798
|
+
Discriminator field for constraint type (always "coverage").
|
|
799
|
+
property_expression : str
|
|
800
|
+
DSL expression that extracts the property value to check coverage.
|
|
801
|
+
The item is available as 'item' in the expression (metadata dict).
|
|
802
|
+
Example: "item['template_id']"
|
|
803
|
+
context : dict[str, ContextValue]
|
|
804
|
+
Additional context variables for DSL evaluation.
|
|
805
|
+
target_values : list[str | int | float] | None
|
|
806
|
+
Target values that must be covered. If None, uses all observed values.
|
|
807
|
+
min_coverage : float, default=1.0
|
|
808
|
+
Minimum coverage fraction (0.0-1.0). 1.0 means 100% of target values
|
|
809
|
+
must appear.
|
|
810
|
+
priority : int, default=1
|
|
811
|
+
Constraint priority (higher = more important).
|
|
812
|
+
|
|
813
|
+
Examples
|
|
814
|
+
--------
|
|
815
|
+
>>> # Ensure all 26 templates appear across all lists
|
|
816
|
+
>>> constraint = BatchCoverageConstraint(
|
|
817
|
+
... property_expression="item['template_id']",
|
|
818
|
+
... target_values=list(range(26)),
|
|
819
|
+
... min_coverage=1.0
|
|
820
|
+
... )
|
|
821
|
+
>>> # Ensure at least 90% of verbs are covered
|
|
822
|
+
>>> constraint = BatchCoverageConstraint(
|
|
823
|
+
... property_expression="item['verb_lemma']",
|
|
824
|
+
... target_values=["run", "jump", "eat", "sleep", "think"],
|
|
825
|
+
... min_coverage=0.9
|
|
826
|
+
... )
|
|
827
|
+
"""
|
|
828
|
+
|
|
829
|
+
constraint_type: Literal["coverage"] = "coverage"
|
|
830
|
+
property_expression: str = Field(
|
|
831
|
+
..., description="DSL expression for property to check coverage"
|
|
832
|
+
)
|
|
833
|
+
context: dict[str, ContextValue] = Field(
|
|
834
|
+
default_factory=dict, description="Additional context variables"
|
|
835
|
+
)
|
|
836
|
+
target_values: list[str | int | float] | None = Field(
|
|
837
|
+
default=None, description="Target values that must be covered"
|
|
838
|
+
)
|
|
839
|
+
min_coverage: float = Field(
|
|
840
|
+
default=1.0, ge=0.0, le=1.0, description="Minimum coverage fraction"
|
|
841
|
+
)
|
|
842
|
+
priority: int = Field(
|
|
843
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
@field_validator("property_expression")
|
|
847
|
+
@classmethod
|
|
848
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
849
|
+
"""Validate property expression is non-empty."""
|
|
850
|
+
if not v or not v.strip():
|
|
851
|
+
raise ValueError("property_expression must be non-empty")
|
|
852
|
+
return v.strip()
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
class BatchBalanceConstraint(BeadBaseModel):
|
|
856
|
+
"""Constraint ensuring balanced distribution across the entire batch.
|
|
857
|
+
|
|
858
|
+
Ensures balanced distribution of a categorical property across all lists
|
|
859
|
+
combined. Unlike per-list balance constraints, this operates on the
|
|
860
|
+
aggregate distribution across the entire batch.
|
|
861
|
+
|
|
862
|
+
Attributes
|
|
863
|
+
----------
|
|
864
|
+
constraint_type : Literal["balance"]
|
|
865
|
+
Discriminator field for constraint type (always "balance").
|
|
866
|
+
property_expression : str
|
|
867
|
+
DSL expression that extracts the category value to balance.
|
|
868
|
+
Example: "item['pair_type']"
|
|
869
|
+
context : dict[str, ContextValue]
|
|
870
|
+
Additional context variables for DSL evaluation.
|
|
871
|
+
target_distribution : dict[str, float]
|
|
872
|
+
Target distribution (values sum to 1.0). Keys are category values,
|
|
873
|
+
values are target proportions.
|
|
874
|
+
tolerance : float, default=0.1
|
|
875
|
+
Allowed deviation from target as a proportion (0.0-1.0).
|
|
876
|
+
priority : int, default=1
|
|
877
|
+
Constraint priority (higher = more important).
|
|
878
|
+
|
|
879
|
+
Examples
|
|
880
|
+
--------
|
|
881
|
+
>>> # Ensure 50/50 balance of pair types across all lists
|
|
882
|
+
>>> constraint = BatchBalanceConstraint(
|
|
883
|
+
... property_expression="item['pair_type']",
|
|
884
|
+
... target_distribution={"same_verb": 0.5, "different_verb": 0.5},
|
|
885
|
+
... tolerance=0.05
|
|
886
|
+
... )
|
|
887
|
+
>>> # Three-way split across conditions
|
|
888
|
+
>>> constraint = BatchBalanceConstraint(
|
|
889
|
+
... property_expression="item['condition']",
|
|
890
|
+
... target_distribution={"A": 0.333, "B": 0.333, "C": 0.334},
|
|
891
|
+
... tolerance=0.1
|
|
892
|
+
... )
|
|
893
|
+
"""
|
|
894
|
+
|
|
895
|
+
constraint_type: Literal["balance"] = "balance"
|
|
896
|
+
property_expression: str = Field(
|
|
897
|
+
..., description="DSL expression for category value"
|
|
898
|
+
)
|
|
899
|
+
context: dict[str, ContextValue] = Field(
|
|
900
|
+
default_factory=dict, description="Additional context variables"
|
|
901
|
+
)
|
|
902
|
+
target_distribution: dict[str, float] = Field(
|
|
903
|
+
..., description="Target distribution (values sum to 1.0)"
|
|
904
|
+
)
|
|
905
|
+
tolerance: float = Field(
|
|
906
|
+
default=0.1, ge=0.0, le=1.0, description="Allowed deviation from target"
|
|
907
|
+
)
|
|
908
|
+
priority: int = Field(
|
|
909
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
@field_validator("property_expression")
|
|
913
|
+
@classmethod
|
|
914
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
915
|
+
"""Validate property expression is non-empty."""
|
|
916
|
+
if not v or not v.strip():
|
|
917
|
+
raise ValueError("property_expression must be non-empty")
|
|
918
|
+
return v.strip()
|
|
919
|
+
|
|
920
|
+
@field_validator("target_distribution")
|
|
921
|
+
@classmethod
|
|
922
|
+
def validate_target_distribution(cls, v: dict[str, float]) -> dict[str, float]:
|
|
923
|
+
"""Validate target distribution sums to ~1.0 and values are in [0, 1]."""
|
|
924
|
+
if not v:
|
|
925
|
+
raise ValueError("target_distribution must not be empty")
|
|
926
|
+
|
|
927
|
+
for category, prob in v.items():
|
|
928
|
+
if not 0.0 <= prob <= 1.0:
|
|
929
|
+
raise ValueError(
|
|
930
|
+
f"target_distribution values must be in [0, 1], "
|
|
931
|
+
f"got {prob} for '{category}'"
|
|
932
|
+
)
|
|
933
|
+
|
|
934
|
+
total = sum(v.values())
|
|
935
|
+
if not 0.99 <= total <= 1.01:
|
|
936
|
+
raise ValueError(
|
|
937
|
+
f"target_distribution values must sum to ~1.0, got {total}"
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
return v
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
class BatchDiversityConstraint(BeadBaseModel):
|
|
944
|
+
"""Constraint preventing values from appearing in too many lists.
|
|
945
|
+
|
|
946
|
+
Ensures that no single value of a property appears in too many lists,
|
|
947
|
+
promoting diversity across lists. Useful for ensuring that stimuli
|
|
948
|
+
(e.g., verbs, nouns) are distributed across participants rather than
|
|
949
|
+
concentrated in a few lists.
|
|
950
|
+
|
|
951
|
+
Attributes
|
|
952
|
+
----------
|
|
953
|
+
constraint_type : Literal["diversity"]
|
|
954
|
+
Discriminator field for constraint type (always "diversity").
|
|
955
|
+
property_expression : str
|
|
956
|
+
DSL expression that extracts the property value to check diversity.
|
|
957
|
+
Example: "item['verb_lemma']"
|
|
958
|
+
context : dict[str, ContextValue]
|
|
959
|
+
Additional context variables for DSL evaluation.
|
|
960
|
+
max_lists_per_value : int
|
|
961
|
+
Maximum number of lists any value can appear in.
|
|
962
|
+
priority : int, default=1
|
|
963
|
+
Constraint priority (higher = more important).
|
|
964
|
+
|
|
965
|
+
Examples
|
|
966
|
+
--------
|
|
967
|
+
>>> # No verb should appear in more than 3 out of 8 lists
|
|
968
|
+
>>> constraint = BatchDiversityConstraint(
|
|
969
|
+
... property_expression="item['verb_lemma']",
|
|
970
|
+
... max_lists_per_value=3
|
|
971
|
+
... )
|
|
972
|
+
>>> # No template in more than half the lists
|
|
973
|
+
>>> constraint = BatchDiversityConstraint(
|
|
974
|
+
... property_expression="item['template_id']",
|
|
975
|
+
... max_lists_per_value=4
|
|
976
|
+
... )
|
|
977
|
+
"""
|
|
978
|
+
|
|
979
|
+
constraint_type: Literal["diversity"] = "diversity"
|
|
980
|
+
property_expression: str = Field(
|
|
981
|
+
..., description="DSL expression for property value"
|
|
982
|
+
)
|
|
983
|
+
context: dict[str, ContextValue] = Field(
|
|
984
|
+
default_factory=dict, description="Additional context variables"
|
|
985
|
+
)
|
|
986
|
+
max_lists_per_value: int = Field(
|
|
987
|
+
..., ge=1, description="Maximum lists any value can appear in"
|
|
988
|
+
)
|
|
989
|
+
priority: int = Field(
|
|
990
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
@field_validator("property_expression")
|
|
994
|
+
@classmethod
|
|
995
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
996
|
+
"""Validate property expression is non-empty."""
|
|
997
|
+
if not v or not v.strip():
|
|
998
|
+
raise ValueError("property_expression must be non-empty")
|
|
999
|
+
return v.strip()
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
class BatchMinOccurrenceConstraint(BeadBaseModel):
|
|
1003
|
+
"""Constraint ensuring minimum representation across the batch.
|
|
1004
|
+
|
|
1005
|
+
Ensures that each value of a property appears at least a minimum number
|
|
1006
|
+
of times across all lists. Useful for guaranteeing sufficient data for
|
|
1007
|
+
each experimental condition or stimulus category.
|
|
1008
|
+
|
|
1009
|
+
Attributes
|
|
1010
|
+
----------
|
|
1011
|
+
constraint_type : Literal["min_occurrence"]
|
|
1012
|
+
Discriminator field for constraint type (always "min_occurrence").
|
|
1013
|
+
property_expression : str
|
|
1014
|
+
DSL expression that extracts the property value to check occurrences.
|
|
1015
|
+
Example: "item['quantile']"
|
|
1016
|
+
context : dict[str, ContextValue]
|
|
1017
|
+
Additional context variables for DSL evaluation.
|
|
1018
|
+
min_occurrences : int
|
|
1019
|
+
Minimum number of times each value must appear across all lists.
|
|
1020
|
+
priority : int, default=1
|
|
1021
|
+
Constraint priority (higher = more important).
|
|
1022
|
+
|
|
1023
|
+
Examples
|
|
1024
|
+
--------
|
|
1025
|
+
>>> # Each quantile appears at least 50 times across all lists
|
|
1026
|
+
>>> constraint = BatchMinOccurrenceConstraint(
|
|
1027
|
+
... property_expression="item['quantile']",
|
|
1028
|
+
... min_occurrences=50
|
|
1029
|
+
... )
|
|
1030
|
+
>>> # Each template at least 5 times
|
|
1031
|
+
>>> constraint = BatchMinOccurrenceConstraint(
|
|
1032
|
+
... property_expression="item['template_id']",
|
|
1033
|
+
... min_occurrences=5
|
|
1034
|
+
... )
|
|
1035
|
+
"""
|
|
1036
|
+
|
|
1037
|
+
constraint_type: Literal["min_occurrence"] = "min_occurrence"
|
|
1038
|
+
property_expression: str = Field(
|
|
1039
|
+
..., description="DSL expression for property value"
|
|
1040
|
+
)
|
|
1041
|
+
context: dict[str, ContextValue] = Field(
|
|
1042
|
+
default_factory=dict, description="Additional context variables"
|
|
1043
|
+
)
|
|
1044
|
+
min_occurrences: int = Field(
|
|
1045
|
+
..., ge=1, description="Minimum occurrences per value across batch"
|
|
1046
|
+
)
|
|
1047
|
+
priority: int = Field(
|
|
1048
|
+
default=1, ge=1, description="Constraint priority (higher = more important)"
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
@field_validator("property_expression")
|
|
1052
|
+
@classmethod
|
|
1053
|
+
def validate_property_expression(cls, v: str) -> str:
|
|
1054
|
+
"""Validate property expression is non-empty."""
|
|
1055
|
+
if not v or not v.strip():
|
|
1056
|
+
raise ValueError("property_expression must be non-empty")
|
|
1057
|
+
return v.strip()
|
|
1058
|
+
|
|
1059
|
+
|
|
1060
|
+
# discriminated union for all batch constraints
|
|
1061
|
+
BatchConstraint = Annotated[
|
|
1062
|
+
BatchCoverageConstraint
|
|
1063
|
+
| BatchBalanceConstraint
|
|
1064
|
+
| BatchDiversityConstraint
|
|
1065
|
+
| BatchMinOccurrenceConstraint,
|
|
1066
|
+
Field(discriminator="constraint_type"),
|
|
1067
|
+
]
|