bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,1067 @@
1
+ """Constraint models for experimental list composition.
2
+
3
+ This module defines constraints that can be applied to experimental lists
4
+ to ensure balanced, well-distributed item selections. Constraints can specify:
5
+ - Uniqueness: No duplicate property values
6
+ - Balance: Balanced distribution across categories
7
+ - Quantile: Uniform distribution across quantiles
8
+ - Size: List size requirements
9
+ - Ordering: Item presentation order constraints (runtime enforcement)
10
+
11
+ All constraints inherit from BeadBaseModel and use Pydantic discriminated unions
12
+ for type-safe deserialization.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from typing import Annotated, Literal
18
+ from uuid import UUID
19
+
20
+ from pydantic import Field, field_validator, model_validator
21
+
22
+ from bead.data.base import BeadBaseModel
23
+ from bead.resources.constraints import ContextValue
24
+
25
+ # type alias for list constraint types
26
+ ListConstraintType = Literal[
27
+ "uniqueness", # No duplicate property values
28
+ "conditional_uniqueness", # Conditional uniqueness based on DSL expression
29
+ "balance", # Balanced distribution of property
30
+ "quantile", # Uniform across quantiles
31
+ "grouped_quantile", # Quantile distribution within groups
32
+ "diversity", # Minimum unique values for property
33
+ "size", # List size constraints
34
+ "ordering", # Presentation order constraints (runtime enforcement)
35
+ ]
36
+
37
+ # type alias for batch constraint types
38
+ BatchConstraintType = Literal[
39
+ "coverage", # All values appear somewhere in batch
40
+ "balance", # Balanced distribution across entire batch
41
+ "diversity", # Prevent values appearing in too many lists
42
+ "min_occurrence", # Minimum occurrences per value across batch
43
+ ]
44
+
45
+
46
+ class UniquenessConstraint(BeadBaseModel):
47
+ """Constraint requiring unique values for a property.
48
+
49
+ Ensures that no two items in a list have the same value for the
50
+ specified property. Useful for preventing duplicate target verbs,
51
+ sentence structures, or other experimental materials.
52
+
53
+ Attributes
54
+ ----------
55
+ constraint_type : Literal["uniqueness"]
56
+ Discriminator field for constraint type (always "uniqueness").
57
+ property_expression : str
58
+ DSL expression that extracts the value that must be unique.
59
+ The item is available as 'item' in the expression.
60
+ Examples: "item.metadata.target_verb", "item.templates.sentence.text"
61
+ context : dict[str, ContextValue]
62
+ Additional context variables for DSL evaluation.
63
+ allow_null : bool, default=False
64
+ Whether to allow null/None values. If False, None values count
65
+ as duplicates. If True, multiple None values are allowed.
66
+ priority : int, default=1
67
+ Constraint priority (higher = more important). When partitioning,
68
+ violations of higher-priority constraints are penalized more heavily.
69
+
70
+ Examples
71
+ --------
72
+ >>> # No two items with same target verb (high priority)
73
+ >>> constraint = UniquenessConstraint(
74
+ ... property_expression="item.metadata.target_verb",
75
+ ... allow_null=False,
76
+ ... priority=5
77
+ ... )
78
+ >>> constraint.priority
79
+ 5
80
+ """
81
+
82
+ constraint_type: Literal["uniqueness"] = "uniqueness"
83
+ property_expression: str = Field(
84
+ ..., description="DSL expression for value to check"
85
+ )
86
+ context: dict[str, ContextValue] = Field(
87
+ default_factory=dict, description="Additional context variables"
88
+ )
89
+ allow_null: bool = Field(
90
+ default=False, description="Whether to allow multiple null values"
91
+ )
92
+ priority: int = Field(
93
+ default=1, ge=1, description="Constraint priority (higher = more important)"
94
+ )
95
+
96
+ @field_validator("property_expression")
97
+ @classmethod
98
+ def validate_property_expression(cls, v: str) -> str:
99
+ """Validate property expression is non-empty.
100
+
101
+ Parameters
102
+ ----------
103
+ v : str
104
+ Property expression to validate.
105
+
106
+ Returns
107
+ -------
108
+ str
109
+ Validated property expression.
110
+
111
+ Raises
112
+ ------
113
+ ValueError
114
+ If property expression is empty or contains only whitespace.
115
+ """
116
+ if not v or not v.strip():
117
+ raise ValueError("property_expression must be non-empty")
118
+ return v.strip()
119
+
120
+
121
+ class BalanceConstraint(BeadBaseModel):
122
+ """Constraint requiring balanced distribution.
123
+
124
+ Ensures balanced distribution of a categorical property across items
125
+ in a list. Can specify target counts for each category or request
126
+ equal distribution.
127
+
128
+ Attributes
129
+ ----------
130
+ constraint_type : Literal["balance"]
131
+ Discriminator field for constraint type (always "balance").
132
+ property_expression : str
133
+ DSL expression that extracts the category value to balance.
134
+ The item is available as 'item' in the expression.
135
+ Example: "item.metadata.transitivity"
136
+ context : dict[str, ContextValue]
137
+ Additional context variables for DSL evaluation.
138
+ target_counts : dict[str, int] | None, default=None
139
+ Target counts for each category value. If None, equal distribution
140
+ is assumed. Keys are category values, values are target counts.
141
+ tolerance : float, default=0.1
142
+ Allowed deviation from target as a proportion (0.0-1.0).
143
+ For example, 0.1 means up to 10% deviation is acceptable.
144
+ priority : int, default=1
145
+ Constraint priority (higher = more important). When partitioning,
146
+ violations of higher-priority constraints are penalized more heavily.
147
+
148
+ Examples
149
+ --------
150
+ >>> # Equal number of transitive and intransitive verbs
151
+ >>> constraint = BalanceConstraint(
152
+ ... property_expression="item.metadata.transitivity",
153
+ ... tolerance=0.1
154
+ ... )
155
+ >>> # 2:1 ratio with high priority
156
+ >>> constraint2 = BalanceConstraint(
157
+ ... property_expression="item.metadata.grammatical",
158
+ ... target_counts={"true": 20, "false": 10},
159
+ ... tolerance=0.05,
160
+ ... priority=3
161
+ ... )
162
+ """
163
+
164
+ constraint_type: Literal["balance"] = "balance"
165
+ property_expression: str = Field(
166
+ ..., description="DSL expression for category value"
167
+ )
168
+ context: dict[str, ContextValue] = Field(
169
+ default_factory=dict, description="Additional context variables"
170
+ )
171
+ target_counts: dict[str, int] | None = Field(
172
+ default=None, description="Target counts per category (None = equal)"
173
+ )
174
+ tolerance: float = Field(
175
+ default=0.1, ge=0.0, le=1.0, description="Allowed deviation from target"
176
+ )
177
+ priority: int = Field(
178
+ default=1, ge=1, description="Constraint priority (higher = more important)"
179
+ )
180
+
181
+ @field_validator("property_expression")
182
+ @classmethod
183
+ def validate_property_expression(cls, v: str) -> str:
184
+ """Validate property expression is non-empty.
185
+
186
+ Parameters
187
+ ----------
188
+ v : str
189
+ Property expression to validate.
190
+
191
+ Returns
192
+ -------
193
+ str
194
+ Validated property expression.
195
+
196
+ Raises
197
+ ------
198
+ ValueError
199
+ If property expression is empty or contains only whitespace.
200
+ """
201
+ if not v or not v.strip():
202
+ raise ValueError("property_expression must be non-empty")
203
+ return v.strip()
204
+
205
+ @field_validator("target_counts")
206
+ @classmethod
207
+ def validate_target_counts(cls, v: dict[str, int] | None) -> dict[str, int] | None:
208
+ """Validate target counts are non-negative.
209
+
210
+ Parameters
211
+ ----------
212
+ v : dict[str, int] | None
213
+ Target counts to validate.
214
+
215
+ Returns
216
+ -------
217
+ dict[str, int] | None
218
+ Validated target counts.
219
+
220
+ Raises
221
+ ------
222
+ ValueError
223
+ If any count is negative.
224
+ """
225
+ if v is not None:
226
+ for category, count in v.items():
227
+ if count < 0:
228
+ raise ValueError(
229
+ f"target_counts values must be non-negative, "
230
+ f"got {count} for '{category}'"
231
+ )
232
+ return v
233
+
234
+
235
+ class QuantileConstraint(BeadBaseModel):
236
+ """Constraint requiring uniform distribution across quantiles.
237
+
238
+ Ensures uniform distribution of items across quantiles of a numeric
239
+ property. Useful for balancing language model probabilities, word
240
+ frequencies, or other continuous variables. Supports complex DSL
241
+ expressions for computing derived metrics.
242
+
243
+ Attributes
244
+ ----------
245
+ constraint_type : Literal["quantile"]
246
+ Discriminator field for constraint type (always "quantile").
247
+ property_expression : str
248
+ DSL expression that computes the numeric value to quantile.
249
+ The item is available as 'item' in the expression.
250
+ Can be simple (e.g., "item.metadata.lm_prob") or complex
251
+ (e.g., "variance([item['val1'], item['val2'], item['val3']])")
252
+ context : dict[str, ContextValue]
253
+ Additional context variables for DSL evaluation.
254
+ Example: {"hyp_keys": ["hyp1", "hyp2", "hyp3"]}
255
+ n_quantiles : int, default=5
256
+ Number of quantiles to create (must be >= 2).
257
+ items_per_quantile : int, default=2
258
+ Target number of items per quantile (must be >= 1).
259
+ priority : int, default=1
260
+ Constraint priority (higher = more important). When partitioning,
261
+ violations of higher-priority constraints are penalized more heavily.
262
+
263
+ Examples
264
+ --------
265
+ >>> # Uniform distribution of LM probabilities across 5 quantiles
266
+ >>> constraint = QuantileConstraint(
267
+ ... property_expression="item.metadata.lm_prob",
268
+ ... n_quantiles=5,
269
+ ... items_per_quantile=2
270
+ ... )
271
+ >>> # Variance of precomputed NLI scores
272
+ >>> constraint2 = QuantileConstraint(
273
+ ... property_expression="item['nli_variance']",
274
+ ... n_quantiles=5,
275
+ ... items_per_quantile=2
276
+ ... )
277
+ """
278
+
279
+ constraint_type: Literal["quantile"] = "quantile"
280
+ property_expression: str = Field(
281
+ ..., description="DSL expression for numeric value"
282
+ )
283
+ context: dict[str, ContextValue] = Field(
284
+ default_factory=dict, description="Additional context variables"
285
+ )
286
+ n_quantiles: int = Field(default=5, ge=2, description="Number of quantiles")
287
+ items_per_quantile: int = Field(default=2, ge=1, description="Items per quantile")
288
+ priority: int = Field(
289
+ default=1, ge=1, description="Constraint priority (higher = more important)"
290
+ )
291
+
292
+ @field_validator("property_expression")
293
+ @classmethod
294
+ def validate_property_expression(cls, v: str) -> str:
295
+ """Validate property expression is non-empty.
296
+
297
+ Parameters
298
+ ----------
299
+ v : str
300
+ Property expression to validate.
301
+
302
+ Returns
303
+ -------
304
+ str
305
+ Validated property expression.
306
+
307
+ Raises
308
+ ------
309
+ ValueError
310
+ If property expression is empty or contains only whitespace.
311
+ """
312
+ if not v or not v.strip():
313
+ raise ValueError("property_expression must be non-empty")
314
+ return v.strip()
315
+
316
+
317
+ class GroupedQuantileConstraint(BeadBaseModel):
318
+ """Constraint requiring uniform quantile distribution within groups.
319
+
320
+ Ensures uniform distribution across quantiles of a numeric property
321
+ within each group defined by a grouping property. Useful for balancing
322
+ a continuous variable independently within categorical groups.
323
+
324
+ Attributes
325
+ ----------
326
+ constraint_type : Literal["grouped_quantile"]
327
+ Discriminator field for constraint type (always "grouped_quantile").
328
+ property_expression : str
329
+ DSL expression that computes the numeric value to quantile.
330
+ The item is available as 'item' in the expression.
331
+ Example: "item.metadata.lm_prob"
332
+ group_by_expression : str
333
+ DSL expression that computes the grouping key.
334
+ The item is available as 'item' in the expression.
335
+ Example: "item.metadata.condition"
336
+ context : dict[str, ContextValue]
337
+ Additional context variables for DSL evaluation.
338
+ n_quantiles : int, default=5
339
+ Number of quantiles to create per group (must be >= 2).
340
+ items_per_quantile : int, default=2
341
+ Target number of items per quantile per group (must be >= 1).
342
+ priority : int, default=1
343
+ Constraint priority (higher = more important). When partitioning,
344
+ violations of higher-priority constraints are penalized more heavily.
345
+
346
+ Examples
347
+ --------
348
+ >>> # Balance LM probability quantiles within each condition
349
+ >>> constraint = GroupedQuantileConstraint(
350
+ ... property_expression="item.metadata.lm_prob",
351
+ ... group_by_expression="item.metadata.condition",
352
+ ... n_quantiles=5,
353
+ ... items_per_quantile=2
354
+ ... )
355
+ >>> # Balance embedding similarity IQR within semantic categories
356
+ >>> constraint2 = GroupedQuantileConstraint(
357
+ ... property_expression="item['embedding_iqr']",
358
+ ... group_by_expression="item['semantic_category']",
359
+ ... n_quantiles=4,
360
+ ... items_per_quantile=3
361
+ ... )
362
+ """
363
+
364
+ constraint_type: Literal["grouped_quantile"] = "grouped_quantile"
365
+ property_expression: str = Field(
366
+ ..., description="DSL expression for numeric value"
367
+ )
368
+ group_by_expression: str = Field(..., description="DSL expression for grouping key")
369
+ context: dict[str, ContextValue] = Field(
370
+ default_factory=dict, description="Additional context variables"
371
+ )
372
+ n_quantiles: int = Field(
373
+ default=5, ge=2, description="Number of quantiles per group"
374
+ )
375
+ items_per_quantile: int = Field(
376
+ default=2, ge=1, description="Items per quantile per group"
377
+ )
378
+ priority: int = Field(
379
+ default=1, ge=1, description="Constraint priority (higher = more important)"
380
+ )
381
+
382
+ @field_validator("property_expression", "group_by_expression")
383
+ @classmethod
384
+ def validate_expression(cls, v: str) -> str:
385
+ """Validate expression is non-empty.
386
+
387
+ Parameters
388
+ ----------
389
+ v : str
390
+ Expression to validate.
391
+
392
+ Returns
393
+ -------
394
+ str
395
+ Validated expression.
396
+
397
+ Raises
398
+ ------
399
+ ValueError
400
+ If expression is empty or contains only whitespace.
401
+ """
402
+ if not v or not v.strip():
403
+ raise ValueError("expression must be non-empty")
404
+ return v.strip()
405
+
406
+
407
+ class ConditionalUniquenessConstraint(BeadBaseModel):
408
+ """Constraint requiring uniqueness when a condition is met.
409
+
410
+ Ensures that values are unique only when a boolean condition is satisfied.
411
+ Useful for enforcing uniqueness on a subset of items while allowing
412
+ duplicates in others.
413
+
414
+ Attributes
415
+ ----------
416
+ constraint_type : Literal["conditional_uniqueness"]
417
+ Discriminator field for constraint type (always "conditional_uniqueness").
418
+ property_expression : str
419
+ DSL expression that computes the value that must be unique.
420
+ The item is available as 'item' in the expression.
421
+ Example: "item.metadata.target_word"
422
+ condition_expression : str
423
+ DSL boolean expression that determines if constraint applies.
424
+ The item is available as 'item' in the expression.
425
+ Example: "item.metadata.is_critical == True"
426
+ context : dict[str, ContextValue]
427
+ Additional context variables for DSL evaluation.
428
+ allow_null : bool, default=False
429
+ Whether to allow multiple null values when condition is true.
430
+ priority : int, default=1
431
+ Constraint priority (higher = more important). When partitioning,
432
+ violations of higher-priority constraints are penalized more heavily.
433
+
434
+ Examples
435
+ --------
436
+ >>> # Unique target words only for critical items
437
+ >>> constraint = ConditionalUniquenessConstraint(
438
+ ... property_expression="item.metadata.target_word",
439
+ ... condition_expression="item.metadata.is_critical == True",
440
+ ... allow_null=False,
441
+ ... priority=3
442
+ ... )
443
+ >>> # Unique sentences only when grammaticality is tested
444
+ >>> constraint2 = ConditionalUniquenessConstraint(
445
+ ... property_expression="item.templates.sentence.text",
446
+ ... condition_expression="item.metadata.test_type in test_grammaticality",
447
+ ... context={"test_grammaticality": {"gram", "acceptability"}},
448
+ ... allow_null=True
449
+ ... )
450
+ """
451
+
452
+ constraint_type: Literal["conditional_uniqueness"] = "conditional_uniqueness"
453
+ property_expression: str = Field(
454
+ ..., description="DSL expression for value to check"
455
+ )
456
+ condition_expression: str = Field(
457
+ ..., description="DSL boolean expression for when to apply constraint"
458
+ )
459
+ context: dict[str, ContextValue] = Field(
460
+ default_factory=dict, description="Additional context variables"
461
+ )
462
+ allow_null: bool = Field(
463
+ default=False, description="Whether to allow multiple null values"
464
+ )
465
+ priority: int = Field(
466
+ default=1, ge=1, description="Constraint priority (higher = more important)"
467
+ )
468
+
469
+ @field_validator("property_expression", "condition_expression")
470
+ @classmethod
471
+ def validate_expression(cls, v: str) -> str:
472
+ """Validate expression is non-empty.
473
+
474
+ Parameters
475
+ ----------
476
+ v : str
477
+ Expression to validate.
478
+
479
+ Returns
480
+ -------
481
+ str
482
+ Validated expression.
483
+
484
+ Raises
485
+ ------
486
+ ValueError
487
+ If expression is empty or contains only whitespace.
488
+ """
489
+ if not v or not v.strip():
490
+ raise ValueError("expression must be non-empty")
491
+ return v.strip()
492
+
493
+
494
+ class DiversityConstraint(BeadBaseModel):
495
+ """Constraint requiring minimum diversity (unique values) for a property.
496
+
497
+ Ensures that a list contains at least a minimum number of unique values
498
+ for a specified property. Useful for ensuring template diversity, verb
499
+ diversity, or other experimental richness requirements.
500
+
501
+ Attributes
502
+ ----------
503
+ constraint_type : Literal["diversity"]
504
+ Discriminator field for constraint type (always "diversity").
505
+ property_expression : str
506
+ DSL expression that extracts the value to count for diversity.
507
+ The item is available as 'item' in the expression.
508
+ Examples: "item.metadata.template_id", "item.metadata.verb_lemma"
509
+ min_unique_values : int
510
+ Minimum number of unique values required in the list.
511
+ context : dict[str, ContextValue]
512
+ Additional context variables for DSL evaluation.
513
+ priority : int, default=1
514
+ Constraint priority (higher = more important). When partitioning,
515
+ violations of higher-priority constraints are penalized more heavily.
516
+
517
+ Examples
518
+ --------
519
+ >>> # Ensure at least 15 unique templates per list
520
+ >>> constraint = DiversityConstraint(
521
+ ... property_expression="item.metadata.template_id",
522
+ ... min_unique_values=15,
523
+ ... priority=2
524
+ ... )
525
+ >>> constraint.min_unique_values
526
+ 15
527
+ """
528
+
529
+ constraint_type: Literal["diversity"] = "diversity"
530
+ property_expression: str = Field(
531
+ ..., description="DSL expression for value to check for diversity"
532
+ )
533
+ min_unique_values: int = Field(
534
+ ..., ge=1, description="Minimum number of unique values required"
535
+ )
536
+ context: dict[str, ContextValue] = Field(
537
+ default_factory=dict, description="Additional context variables"
538
+ )
539
+ priority: int = Field(
540
+ default=1, ge=1, description="Constraint priority (higher = more important)"
541
+ )
542
+
543
+ @field_validator("property_expression")
544
+ @classmethod
545
+ def validate_property_expression(cls, v: str) -> str:
546
+ """Validate property expression is non-empty.
547
+
548
+ Parameters
549
+ ----------
550
+ v : str
551
+ Property expression to validate.
552
+
553
+ Returns
554
+ -------
555
+ str
556
+ Validated property expression.
557
+
558
+ Raises
559
+ ------
560
+ ValueError
561
+ If property expression is empty or contains only whitespace.
562
+ """
563
+ if not v or not v.strip():
564
+ raise ValueError("property_expression must be non-empty")
565
+ return v.strip()
566
+
567
+
568
+ class SizeConstraint(BeadBaseModel):
569
+ """Constraint on list size.
570
+
571
+ Specifies size requirements for a list. Can specify exact size,
572
+ minimum size, maximum size, or a range (min and max).
573
+
574
+ Often used with high priority to ensure participants do equal work.
575
+
576
+ Attributes
577
+ ----------
578
+ constraint_type : Literal["size"]
579
+ Discriminator field for constraint type (always "size").
580
+ min_size : int | None, default=None
581
+ Minimum list size (must be >= 0 if set).
582
+ max_size : int | None, default=None
583
+ Maximum list size (must be >= 0 if set).
584
+ exact_size : int | None, default=None
585
+ Exact required size (must be >= 0 if set).
586
+ Cannot be used with min_size or max_size.
587
+ priority : int, default=1
588
+ Constraint priority (higher = more important). When partitioning,
589
+ violations of higher-priority constraints are penalized more heavily.
590
+ Size constraints often use high priority (e.g., 10) to ensure
591
+ participants do exactly equal amounts of work.
592
+
593
+ Examples
594
+ --------
595
+ >>> # Exactly 40 items per list (highest priority)
596
+ >>> constraint = SizeConstraint(exact_size=40, priority=10)
597
+ >>> # Between 30-50 items per list
598
+ >>> constraint2 = SizeConstraint(min_size=30, max_size=50)
599
+ >>> # At least 20 items
600
+ >>> constraint3 = SizeConstraint(min_size=20)
601
+ >>> # At most 100 items
602
+ >>> constraint4 = SizeConstraint(max_size=100)
603
+ """
604
+
605
+ constraint_type: Literal["size"] = "size"
606
+ min_size: int | None = Field(default=None, ge=0, description="Minimum list size")
607
+ max_size: int | None = Field(default=None, ge=0, description="Maximum list size")
608
+ exact_size: int | None = Field(
609
+ default=None, ge=0, description="Exact required size"
610
+ )
611
+ priority: int = Field(
612
+ default=1, ge=1, description="Constraint priority (higher = more important)"
613
+ )
614
+
615
+ @model_validator(mode="after")
616
+ def validate_size_params(self) -> SizeConstraint:
617
+ """Validate size parameter combinations.
618
+
619
+ Ensures that:
620
+ - At least one size parameter is set
621
+ - exact_size is not used with min_size or max_size
622
+ - min_size <= max_size if both are set
623
+
624
+ Returns
625
+ -------
626
+ SizeConstraint
627
+ Validated constraint.
628
+
629
+ Raises
630
+ ------
631
+ ValueError
632
+ If validation fails.
633
+ """
634
+ # check that at least one parameter is set
635
+ if self.exact_size is None and self.min_size is None and self.max_size is None:
636
+ raise ValueError(
637
+ "Must specify at least one of: min_size, max_size, exact_size"
638
+ )
639
+
640
+ # check that exact_size is not used with min/max
641
+ if self.exact_size is not None:
642
+ if self.min_size is not None or self.max_size is not None:
643
+ raise ValueError("exact_size cannot be used with min_size or max_size")
644
+
645
+ # check that min <= max if both are set
646
+ if self.min_size is not None and self.max_size is not None:
647
+ if self.min_size > self.max_size:
648
+ raise ValueError("min_size must be <= max_size")
649
+
650
+ return self
651
+
652
+
653
+ class OrderingConstraint(BeadBaseModel):
654
+ """Constraint on item presentation order.
655
+
656
+ **CRITICAL**: This constraint is primarily enforced at **jsPsych runtime**,
657
+ not during static list construction. The Python data model stores the
658
+ constraint specification, which is then translated to JavaScript code
659
+ for runtime enforcement during per-participant randomization.
660
+
661
+ Attributes
662
+ ----------
663
+ constraint_type : Literal["ordering"]
664
+ Discriminator for constraint type.
665
+ precedence_pairs : list[tuple[UUID, UUID]]
666
+ Pairs of (item_a_id, item_b_id) where item_a must appear before item_b.
667
+ no_adjacent_property : str | None
668
+ Property path; items with same value cannot be adjacent.
669
+ Example: "item_metadata.condition" prevents AA, BB patterns.
670
+ block_by_property : str | None
671
+ Property path to group items into contiguous blocks.
672
+ Example: "item_metadata.block_type" creates blocked design.
673
+ min_distance : int | None
674
+ Minimum number of items between items with same no_adjacent_property value.
675
+ max_distance : int | None
676
+ Maximum number of items between start and end of items with same
677
+ block_by_property value (enforces tight blocking).
678
+ practice_item_property : str | None
679
+ Property path identifying practice items (should appear first).
680
+ Example: "item_metadata.is_practice" with value True.
681
+ randomize_within_blocks : bool
682
+ Whether to randomize order within blocks (default True).
683
+ Only applies when block_by_property is set.
684
+
685
+ Examples
686
+ --------
687
+ >>> # No adjacent items with same condition
688
+ >>> constraint = OrderingConstraint(
689
+ ... no_adjacent_property="item_metadata.condition"
690
+ ... )
691
+
692
+ >>> # Practice items first, then main items
693
+ >>> constraint = OrderingConstraint(
694
+ ... practice_item_property="item_metadata.is_practice"
695
+ ... )
696
+
697
+ >>> # Blocked by condition, randomized within blocks
698
+ >>> constraint = OrderingConstraint(
699
+ ... block_by_property="item_metadata.condition",
700
+ ... randomize_within_blocks=True
701
+ ... )
702
+
703
+ >>> # Item A before Item B
704
+ >>> from uuid import uuid4
705
+ >>> item_a, item_b = uuid4(), uuid4()
706
+ >>> constraint = OrderingConstraint(
707
+ ... precedence_pairs=[(item_a, item_b)]
708
+ ... )
709
+ """
710
+
711
+ constraint_type: Literal["ordering"] = "ordering"
712
+ precedence_pairs: list[tuple[UUID, UUID]] = Field(
713
+ default_factory=lambda: [], description="Pairs (a,b) where a must precede b"
714
+ )
715
+ no_adjacent_property: str | None = Field(
716
+ default=None,
717
+ description="Property that cannot have same value in adjacent items",
718
+ )
719
+ block_by_property: str | None = Field(
720
+ default=None, description="Property to group into contiguous blocks"
721
+ )
722
+ min_distance: int | None = Field(
723
+ default=None,
724
+ ge=1,
725
+ description="Minimum items between same no_adjacent_property values",
726
+ )
727
+ max_distance: int | None = Field(
728
+ default=None, ge=1, description="Maximum distance for blocked items"
729
+ )
730
+ practice_item_property: str | None = Field(
731
+ default=None, description="Property identifying practice items (shown first)"
732
+ )
733
+ randomize_within_blocks: bool = Field(
734
+ default=True, description="Whether to randomize within blocks"
735
+ )
736
+ priority: int = Field(
737
+ default=1,
738
+ ge=1,
739
+ description="Constraint priority (not used for static partitioning)",
740
+ )
741
+
742
+ @model_validator(mode="after")
743
+ def validate_distance_constraints(self) -> OrderingConstraint:
744
+ """Validate distance constraint combinations.
745
+
746
+ Returns
747
+ -------
748
+ OrderingConstraint
749
+ Validated constraint.
750
+
751
+ Raises
752
+ ------
753
+ ValueError
754
+ If validation fails.
755
+ """
756
+ if self.min_distance is not None and self.no_adjacent_property is None:
757
+ raise ValueError("min_distance requires no_adjacent_property to be set")
758
+ if self.max_distance is not None and self.block_by_property is None:
759
+ raise ValueError("max_distance requires block_by_property to be set")
760
+ if (
761
+ self.min_distance
762
+ and self.max_distance
763
+ and self.min_distance > self.max_distance
764
+ ):
765
+ raise ValueError("min_distance cannot be greater than max_distance")
766
+ return self
767
+
768
+
769
+ # discriminated union for all list constraints
770
+ ListConstraint = Annotated[
771
+ UniquenessConstraint
772
+ | ConditionalUniquenessConstraint
773
+ | BalanceConstraint
774
+ | QuantileConstraint
775
+ | GroupedQuantileConstraint
776
+ | DiversityConstraint
777
+ | SizeConstraint
778
+ | OrderingConstraint,
779
+ Field(discriminator="constraint_type"),
780
+ ]
781
+
782
+
783
+ # ============================================================================
784
+ # batch-level constraints
785
+ # ============================================================================
786
+
787
+
788
+ class BatchCoverageConstraint(BeadBaseModel):
789
+ """Constraint ensuring all values appear somewhere in the batch.
790
+
791
+ Ensures that all values of a property appear across the collection of lists.
792
+ Useful for guaranteeing coverage of experimental conditions, templates, or
793
+ stimulus categories across all participants.
794
+
795
+ Attributes
796
+ ----------
797
+ constraint_type : Literal["coverage"]
798
+ Discriminator field for constraint type (always "coverage").
799
+ property_expression : str
800
+ DSL expression that extracts the property value to check coverage.
801
+ The item is available as 'item' in the expression (metadata dict).
802
+ Example: "item['template_id']"
803
+ context : dict[str, ContextValue]
804
+ Additional context variables for DSL evaluation.
805
+ target_values : list[str | int | float] | None
806
+ Target values that must be covered. If None, uses all observed values.
807
+ min_coverage : float, default=1.0
808
+ Minimum coverage fraction (0.0-1.0). 1.0 means 100% of target values
809
+ must appear.
810
+ priority : int, default=1
811
+ Constraint priority (higher = more important).
812
+
813
+ Examples
814
+ --------
815
+ >>> # Ensure all 26 templates appear across all lists
816
+ >>> constraint = BatchCoverageConstraint(
817
+ ... property_expression="item['template_id']",
818
+ ... target_values=list(range(26)),
819
+ ... min_coverage=1.0
820
+ ... )
821
+ >>> # Ensure at least 90% of verbs are covered
822
+ >>> constraint = BatchCoverageConstraint(
823
+ ... property_expression="item['verb_lemma']",
824
+ ... target_values=["run", "jump", "eat", "sleep", "think"],
825
+ ... min_coverage=0.9
826
+ ... )
827
+ """
828
+
829
+ constraint_type: Literal["coverage"] = "coverage"
830
+ property_expression: str = Field(
831
+ ..., description="DSL expression for property to check coverage"
832
+ )
833
+ context: dict[str, ContextValue] = Field(
834
+ default_factory=dict, description="Additional context variables"
835
+ )
836
+ target_values: list[str | int | float] | None = Field(
837
+ default=None, description="Target values that must be covered"
838
+ )
839
+ min_coverage: float = Field(
840
+ default=1.0, ge=0.0, le=1.0, description="Minimum coverage fraction"
841
+ )
842
+ priority: int = Field(
843
+ default=1, ge=1, description="Constraint priority (higher = more important)"
844
+ )
845
+
846
+ @field_validator("property_expression")
847
+ @classmethod
848
+ def validate_property_expression(cls, v: str) -> str:
849
+ """Validate property expression is non-empty."""
850
+ if not v or not v.strip():
851
+ raise ValueError("property_expression must be non-empty")
852
+ return v.strip()
853
+
854
+
855
+ class BatchBalanceConstraint(BeadBaseModel):
856
+ """Constraint ensuring balanced distribution across the entire batch.
857
+
858
+ Ensures balanced distribution of a categorical property across all lists
859
+ combined. Unlike per-list balance constraints, this operates on the
860
+ aggregate distribution across the entire batch.
861
+
862
+ Attributes
863
+ ----------
864
+ constraint_type : Literal["balance"]
865
+ Discriminator field for constraint type (always "balance").
866
+ property_expression : str
867
+ DSL expression that extracts the category value to balance.
868
+ Example: "item['pair_type']"
869
+ context : dict[str, ContextValue]
870
+ Additional context variables for DSL evaluation.
871
+ target_distribution : dict[str, float]
872
+ Target distribution (values sum to 1.0). Keys are category values,
873
+ values are target proportions.
874
+ tolerance : float, default=0.1
875
+ Allowed deviation from target as a proportion (0.0-1.0).
876
+ priority : int, default=1
877
+ Constraint priority (higher = more important).
878
+
879
+ Examples
880
+ --------
881
+ >>> # Ensure 50/50 balance of pair types across all lists
882
+ >>> constraint = BatchBalanceConstraint(
883
+ ... property_expression="item['pair_type']",
884
+ ... target_distribution={"same_verb": 0.5, "different_verb": 0.5},
885
+ ... tolerance=0.05
886
+ ... )
887
+ >>> # Three-way split across conditions
888
+ >>> constraint = BatchBalanceConstraint(
889
+ ... property_expression="item['condition']",
890
+ ... target_distribution={"A": 0.333, "B": 0.333, "C": 0.334},
891
+ ... tolerance=0.1
892
+ ... )
893
+ """
894
+
895
+ constraint_type: Literal["balance"] = "balance"
896
+ property_expression: str = Field(
897
+ ..., description="DSL expression for category value"
898
+ )
899
+ context: dict[str, ContextValue] = Field(
900
+ default_factory=dict, description="Additional context variables"
901
+ )
902
+ target_distribution: dict[str, float] = Field(
903
+ ..., description="Target distribution (values sum to 1.0)"
904
+ )
905
+ tolerance: float = Field(
906
+ default=0.1, ge=0.0, le=1.0, description="Allowed deviation from target"
907
+ )
908
+ priority: int = Field(
909
+ default=1, ge=1, description="Constraint priority (higher = more important)"
910
+ )
911
+
912
+ @field_validator("property_expression")
913
+ @classmethod
914
+ def validate_property_expression(cls, v: str) -> str:
915
+ """Validate property expression is non-empty."""
916
+ if not v or not v.strip():
917
+ raise ValueError("property_expression must be non-empty")
918
+ return v.strip()
919
+
920
+ @field_validator("target_distribution")
921
+ @classmethod
922
+ def validate_target_distribution(cls, v: dict[str, float]) -> dict[str, float]:
923
+ """Validate target distribution sums to ~1.0 and values are in [0, 1]."""
924
+ if not v:
925
+ raise ValueError("target_distribution must not be empty")
926
+
927
+ for category, prob in v.items():
928
+ if not 0.0 <= prob <= 1.0:
929
+ raise ValueError(
930
+ f"target_distribution values must be in [0, 1], "
931
+ f"got {prob} for '{category}'"
932
+ )
933
+
934
+ total = sum(v.values())
935
+ if not 0.99 <= total <= 1.01:
936
+ raise ValueError(
937
+ f"target_distribution values must sum to ~1.0, got {total}"
938
+ )
939
+
940
+ return v
941
+
942
+
943
+ class BatchDiversityConstraint(BeadBaseModel):
944
+ """Constraint preventing values from appearing in too many lists.
945
+
946
+ Ensures that no single value of a property appears in too many lists,
947
+ promoting diversity across lists. Useful for ensuring that stimuli
948
+ (e.g., verbs, nouns) are distributed across participants rather than
949
+ concentrated in a few lists.
950
+
951
+ Attributes
952
+ ----------
953
+ constraint_type : Literal["diversity"]
954
+ Discriminator field for constraint type (always "diversity").
955
+ property_expression : str
956
+ DSL expression that extracts the property value to check diversity.
957
+ Example: "item['verb_lemma']"
958
+ context : dict[str, ContextValue]
959
+ Additional context variables for DSL evaluation.
960
+ max_lists_per_value : int
961
+ Maximum number of lists any value can appear in.
962
+ priority : int, default=1
963
+ Constraint priority (higher = more important).
964
+
965
+ Examples
966
+ --------
967
+ >>> # No verb should appear in more than 3 out of 8 lists
968
+ >>> constraint = BatchDiversityConstraint(
969
+ ... property_expression="item['verb_lemma']",
970
+ ... max_lists_per_value=3
971
+ ... )
972
+ >>> # No template in more than half the lists
973
+ >>> constraint = BatchDiversityConstraint(
974
+ ... property_expression="item['template_id']",
975
+ ... max_lists_per_value=4
976
+ ... )
977
+ """
978
+
979
+ constraint_type: Literal["diversity"] = "diversity"
980
+ property_expression: str = Field(
981
+ ..., description="DSL expression for property value"
982
+ )
983
+ context: dict[str, ContextValue] = Field(
984
+ default_factory=dict, description="Additional context variables"
985
+ )
986
+ max_lists_per_value: int = Field(
987
+ ..., ge=1, description="Maximum lists any value can appear in"
988
+ )
989
+ priority: int = Field(
990
+ default=1, ge=1, description="Constraint priority (higher = more important)"
991
+ )
992
+
993
+ @field_validator("property_expression")
994
+ @classmethod
995
+ def validate_property_expression(cls, v: str) -> str:
996
+ """Validate property expression is non-empty."""
997
+ if not v or not v.strip():
998
+ raise ValueError("property_expression must be non-empty")
999
+ return v.strip()
1000
+
1001
+
1002
+ class BatchMinOccurrenceConstraint(BeadBaseModel):
1003
+ """Constraint ensuring minimum representation across the batch.
1004
+
1005
+ Ensures that each value of a property appears at least a minimum number
1006
+ of times across all lists. Useful for guaranteeing sufficient data for
1007
+ each experimental condition or stimulus category.
1008
+
1009
+ Attributes
1010
+ ----------
1011
+ constraint_type : Literal["min_occurrence"]
1012
+ Discriminator field for constraint type (always "min_occurrence").
1013
+ property_expression : str
1014
+ DSL expression that extracts the property value to check occurrences.
1015
+ Example: "item['quantile']"
1016
+ context : dict[str, ContextValue]
1017
+ Additional context variables for DSL evaluation.
1018
+ min_occurrences : int
1019
+ Minimum number of times each value must appear across all lists.
1020
+ priority : int, default=1
1021
+ Constraint priority (higher = more important).
1022
+
1023
+ Examples
1024
+ --------
1025
+ >>> # Each quantile appears at least 50 times across all lists
1026
+ >>> constraint = BatchMinOccurrenceConstraint(
1027
+ ... property_expression="item['quantile']",
1028
+ ... min_occurrences=50
1029
+ ... )
1030
+ >>> # Each template at least 5 times
1031
+ >>> constraint = BatchMinOccurrenceConstraint(
1032
+ ... property_expression="item['template_id']",
1033
+ ... min_occurrences=5
1034
+ ... )
1035
+ """
1036
+
1037
+ constraint_type: Literal["min_occurrence"] = "min_occurrence"
1038
+ property_expression: str = Field(
1039
+ ..., description="DSL expression for property value"
1040
+ )
1041
+ context: dict[str, ContextValue] = Field(
1042
+ default_factory=dict, description="Additional context variables"
1043
+ )
1044
+ min_occurrences: int = Field(
1045
+ ..., ge=1, description="Minimum occurrences per value across batch"
1046
+ )
1047
+ priority: int = Field(
1048
+ default=1, ge=1, description="Constraint priority (higher = more important)"
1049
+ )
1050
+
1051
+ @field_validator("property_expression")
1052
+ @classmethod
1053
+ def validate_property_expression(cls, v: str) -> str:
1054
+ """Validate property expression is non-empty."""
1055
+ if not v or not v.strip():
1056
+ raise ValueError("property_expression must be non-empty")
1057
+ return v.strip()
1058
+
1059
+
1060
+ # discriminated union for all batch constraints
1061
+ BatchConstraint = Annotated[
1062
+ BatchCoverageConstraint
1063
+ | BatchBalanceConstraint
1064
+ | BatchDiversityConstraint
1065
+ | BatchMinOccurrenceConstraint,
1066
+ Field(discriminator="constraint_type"),
1067
+ ]