bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,286 @@
1
+ """Experiment list data model for organizing experimental items.
2
+
3
+ This module provides the ExperimentList model for organizing experimental items
4
+ into lists for presentation to participants. Lists use stand-off annotation with
5
+ UUID references to items rather than embedding full item objects.
6
+
7
+ The model supports:
8
+ - Item assignment tracking via UUIDs
9
+ - Presentation order specification
10
+ - Constraint satisfaction tracking
11
+ - Balance metrics computation
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import random
17
+ from typing import TYPE_CHECKING
18
+
19
+ if TYPE_CHECKING:
20
+ from bead.items.item_template import MetadataValue
21
+ else:
22
+ # Recursive type for metadata values
23
+ type MetadataValue = (
24
+ str | int | float | bool | None | dict[str, MetadataValue] | list[MetadataValue]
25
+ )
26
+ from uuid import UUID
27
+
28
+ from pydantic import Field, field_validator, model_validator
29
+
30
+ from bead.data.base import BeadBaseModel
31
+ from bead.lists.constraints import ListConstraint
32
+
33
+
34
+ # Factory functions for default values
35
+ def _empty_uuid_list() -> list[UUID]:
36
+ """Return empty UUID list."""
37
+ return []
38
+
39
+
40
+ def _empty_constraint_list() -> list[ListConstraint]:
41
+ """Return empty ListConstraint list."""
42
+ return []
43
+
44
+
45
+ def _empty_uuid_bool_dict() -> dict[UUID, bool]:
46
+ """Return empty UUID-to-bool dict."""
47
+ return {}
48
+
49
+
50
+ def _empty_metadata_dict() -> dict[str, MetadataValue]:
51
+ """Return empty metadata dictionary."""
52
+ return {}
53
+
54
+
55
+ class ExperimentList(BeadBaseModel):
56
+ """A list of experimental items for participant presentation.
57
+
58
+ Uses stand-off annotation - stores only item UUIDs, not full items.
59
+ Items can be looked up by UUID from an ItemCollection or Repository.
60
+
61
+ Attributes
62
+ ----------
63
+ name : str
64
+ Name of this list (e.g., "list_0", "practice_list").
65
+ list_number : int
66
+ Numeric identifier for this list (must be >= 0).
67
+ item_refs : list[UUID]
68
+ UUIDs of items in this list (stand-off annotation).
69
+ list_constraints : list[ListConstraint]
70
+ Constraints this list must satisfy.
71
+ constraint_satisfaction : dict[UUID, bool]
72
+ Map of constraint UUIDs to satisfaction status.
73
+ presentation_order : list[UUID] | None
74
+ Explicit presentation order (if None, use item_refs order).
75
+ Must contain exactly the same UUIDs as item_refs.
76
+ list_metadata : dict[str, Any]
77
+ Metadata for this list.
78
+ balance_metrics : dict[str, Any]
79
+ Metrics about list balance (e.g., distribution statistics).
80
+
81
+ Examples
82
+ --------
83
+ >>> from uuid import uuid4
84
+ >>> exp_list = ExperimentList(
85
+ ... name="list_0",
86
+ ... list_number=0
87
+ ... )
88
+ >>> item_id = uuid4()
89
+ >>> exp_list.add_item(item_id)
90
+ >>> len(exp_list.item_refs)
91
+ 1
92
+ >>> exp_list.shuffle_order(seed=42)
93
+ >>> exp_list.get_presentation_order()[0] == item_id
94
+ True
95
+ """
96
+
97
+ name: str = Field(..., description="List name")
98
+ list_number: int = Field(..., ge=0, description="Numeric list identifier")
99
+ item_refs: list[UUID] = Field(
100
+ default_factory=_empty_uuid_list, description="Item UUIDs (stand-off)"
101
+ )
102
+ list_constraints: list[ListConstraint] = Field(
103
+ default_factory=_empty_constraint_list, description="List constraints"
104
+ )
105
+ constraint_satisfaction: dict[UUID, bool] = Field(
106
+ default_factory=_empty_uuid_bool_dict,
107
+ description="Constraint satisfaction status",
108
+ )
109
+ presentation_order: list[UUID] | None = Field(
110
+ default=None, description="Explicit presentation order"
111
+ )
112
+ list_metadata: dict[str, MetadataValue] = Field(
113
+ default_factory=_empty_metadata_dict, description="List metadata"
114
+ )
115
+ balance_metrics: dict[str, MetadataValue] = Field(
116
+ default_factory=_empty_metadata_dict, description="Balance metrics"
117
+ )
118
+
119
+ @field_validator("name")
120
+ @classmethod
121
+ def validate_name(cls, v: str) -> str:
122
+ """Validate name is non-empty.
123
+
124
+ Parameters
125
+ ----------
126
+ v : str
127
+ Name to validate.
128
+
129
+ Returns
130
+ -------
131
+ str
132
+ Validated name (whitespace stripped).
133
+
134
+ Raises
135
+ ------
136
+ ValueError
137
+ If name is empty or contains only whitespace.
138
+ """
139
+ if not v or not v.strip():
140
+ raise ValueError("name must be non-empty")
141
+ return v.strip()
142
+
143
+ @model_validator(mode="after")
144
+ def validate_presentation_order(self) -> ExperimentList:
145
+ """Validate presentation_order matches item_refs.
146
+
147
+ If presentation_order is set, it must contain exactly the same UUIDs
148
+ as item_refs (no more, no less, no duplicates).
149
+
150
+ Returns
151
+ -------
152
+ ExperimentList
153
+ Validated list.
154
+
155
+ Raises
156
+ ------
157
+ ValueError
158
+ If presentation_order doesn't match item_refs.
159
+ """
160
+ if self.presentation_order is None:
161
+ return self
162
+
163
+ # Check for duplicates in presentation_order
164
+ if len(self.presentation_order) != len(set(self.presentation_order)):
165
+ raise ValueError("presentation_order contains duplicate UUIDs")
166
+
167
+ # Check that sets match
168
+ item_set = set(self.item_refs)
169
+ order_set = set(self.presentation_order)
170
+
171
+ if order_set != item_set:
172
+ extra = order_set - item_set
173
+ missing = item_set - order_set
174
+
175
+ error_parts: list[str] = []
176
+ if extra:
177
+ error_parts.append(f"extra UUIDs: {extra}")
178
+ if missing:
179
+ error_parts.append(f"missing UUIDs: {missing}")
180
+
181
+ raise ValueError(
182
+ f"presentation_order must contain exactly same UUIDs "
183
+ f"as item_refs ({', '.join(error_parts)})"
184
+ )
185
+
186
+ return self
187
+
188
+ def add_item(self, item_id: UUID) -> None:
189
+ """Add an item to this list.
190
+
191
+ Parameters
192
+ ----------
193
+ item_id : UUID
194
+ UUID of item to add.
195
+
196
+ Examples
197
+ --------
198
+ >>> from uuid import uuid4
199
+ >>> exp_list = ExperimentList(name="test", list_number=0)
200
+ >>> item_id = uuid4()
201
+ >>> exp_list.add_item(item_id)
202
+ >>> item_id in exp_list.item_refs
203
+ True
204
+ """
205
+ self.item_refs.append(item_id)
206
+ self.update_modified_time()
207
+
208
+ def remove_item(self, item_id: UUID) -> None:
209
+ """Remove an item from this list.
210
+
211
+ Parameters
212
+ ----------
213
+ item_id : UUID
214
+ UUID of item to remove.
215
+
216
+ Raises
217
+ ------
218
+ ValueError
219
+ If item_id is not in the list.
220
+
221
+ Examples
222
+ --------
223
+ >>> from uuid import uuid4
224
+ >>> exp_list = ExperimentList(name="test", list_number=0)
225
+ >>> item_id = uuid4()
226
+ >>> exp_list.add_item(item_id)
227
+ >>> exp_list.remove_item(item_id)
228
+ >>> item_id in exp_list.item_refs
229
+ False
230
+ """
231
+ if item_id not in self.item_refs:
232
+ raise ValueError(f"Item {item_id} not found in list")
233
+ self.item_refs.remove(item_id)
234
+
235
+ # Also remove from presentation_order if present
236
+ if self.presentation_order is not None and item_id in self.presentation_order:
237
+ self.presentation_order.remove(item_id)
238
+
239
+ self.update_modified_time()
240
+
241
+ def shuffle_order(self, seed: int | None = None) -> None:
242
+ """Shuffle presentation order.
243
+
244
+ Creates a randomized presentation order from item_refs.
245
+ Uses random.Random(seed) for reproducible shuffling.
246
+
247
+ Parameters
248
+ ----------
249
+ seed : int | None
250
+ Random seed for reproducibility.
251
+
252
+ Examples
253
+ --------
254
+ >>> from uuid import uuid4
255
+ >>> exp_list = ExperimentList(name="test", list_number=0)
256
+ >>> exp_list.add_item(uuid4())
257
+ >>> exp_list.add_item(uuid4())
258
+ >>> exp_list.shuffle_order(seed=42)
259
+ >>> exp_list.presentation_order is not None
260
+ True
261
+ """
262
+ rng = random.Random(seed)
263
+ self.presentation_order = self.item_refs.copy()
264
+ rng.shuffle(self.presentation_order)
265
+ self.update_modified_time()
266
+
267
+ def get_presentation_order(self) -> list[UUID]:
268
+ """Get the presentation order.
269
+
270
+ Returns presentation_order if set, otherwise returns item_refs.
271
+
272
+ Returns
273
+ -------
274
+ list[UUID]
275
+ UUIDs in presentation order.
276
+
277
+ Examples
278
+ --------
279
+ >>> from uuid import uuid4
280
+ >>> exp_list = ExperimentList(name="test", list_number=0)
281
+ >>> item_id = uuid4()
282
+ >>> exp_list.add_item(item_id)
283
+ >>> exp_list.get_presentation_order()[0] == item_id
284
+ True
285
+ """
286
+ return self.presentation_order if self.presentation_order else self.item_refs
@@ -0,0 +1,378 @@
1
+ """List collection data model for managing multiple experimental lists.
2
+
3
+ This module provides the ListCollection model for managing multiple ExperimentList
4
+ instances along with metadata about the partitioning process that created them.
5
+
6
+ The model supports:
7
+ - Multiple experimental lists
8
+ - Partitioning metadata tracking
9
+ - Coverage validation (ensuring all items are assigned exactly once)
10
+ - List lookup by number
11
+ - JSONL serialization (one list per line)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ from pathlib import Path
18
+ from typing import TYPE_CHECKING, TypedDict
19
+ from uuid import UUID
20
+
21
+ from pydantic import Field, field_validator
22
+
23
+ from bead.data.base import BeadBaseModel
24
+ from bead.lists.experiment_list import ExperimentList
25
+
26
+ if TYPE_CHECKING:
27
+ from bead.items.item_template import MetadataValue
28
+ else:
29
+ # Recursive type for metadata values
30
+ type MetadataValue = (
31
+ str | int | float | bool | None | dict[str, MetadataValue] | list[MetadataValue]
32
+ )
33
+
34
+
35
+ class CoverageValidationResult(TypedDict):
36
+ """Result of coverage validation."""
37
+
38
+ valid: bool
39
+ missing_items: list[UUID]
40
+ duplicate_items: list[UUID]
41
+ total_assigned: int
42
+
43
+
44
+ # Factory functions for default values
45
+ def _empty_experiment_list_list() -> list[ExperimentList]:
46
+ """Return empty ExperimentList list."""
47
+ return []
48
+
49
+
50
+ def _empty_metadata_dict() -> dict[str, MetadataValue]:
51
+ """Return empty metadata dictionary."""
52
+ return {}
53
+
54
+
55
+ class ListCollection(BeadBaseModel):
56
+ """A collection of experimental lists.
57
+
58
+ Contains multiple ExperimentList instances along with metadata about
59
+ the partitioning process that created them.
60
+
61
+ Attributes
62
+ ----------
63
+ name : str
64
+ Name of this collection.
65
+ source_items_id : UUID
66
+ UUID of source ItemCollection.
67
+ lists : list[ExperimentList]
68
+ The experimental lists.
69
+ partitioning_strategy : str
70
+ Strategy used for partitioning (e.g., "balanced", "random", "stratified").
71
+ partitioning_config : dict[str, Any]
72
+ Configuration for partitioning.
73
+ partitioning_stats : dict[str, Any]
74
+ Statistics about the partitioning process.
75
+
76
+ Examples
77
+ --------
78
+ >>> from uuid import uuid4
79
+ >>> collection = ListCollection(
80
+ ... name="my_lists",
81
+ ... source_items_id=uuid4(),
82
+ ... partitioning_strategy="balanced"
83
+ ... )
84
+ >>> exp_list = ExperimentList(name="list_0", list_number=0)
85
+ >>> collection.add_list(exp_list)
86
+ >>> len(collection.lists)
87
+ 1
88
+ """
89
+
90
+ name: str = Field(..., description="Collection name")
91
+ source_items_id: UUID = Field(..., description="Source ItemCollection UUID")
92
+ lists: list[ExperimentList] = Field(
93
+ default_factory=_empty_experiment_list_list, description="Experimental lists"
94
+ )
95
+ partitioning_strategy: str = Field(..., description="Partitioning strategy used")
96
+ partitioning_config: dict[str, MetadataValue] = Field(
97
+ default_factory=_empty_metadata_dict, description="Partitioning configuration"
98
+ )
99
+ partitioning_stats: dict[str, MetadataValue] = Field(
100
+ default_factory=_empty_metadata_dict, description="Partitioning statistics"
101
+ )
102
+
103
+ @field_validator("name", "partitioning_strategy")
104
+ @classmethod
105
+ def validate_non_empty_string(cls, v: str) -> str:
106
+ """Validate string fields are non-empty.
107
+
108
+ Parameters
109
+ ----------
110
+ v : str
111
+ String to validate.
112
+
113
+ Returns
114
+ -------
115
+ str
116
+ Validated string (whitespace stripped).
117
+
118
+ Raises
119
+ ------
120
+ ValueError
121
+ If string is empty or contains only whitespace.
122
+ """
123
+ if not v or not v.strip():
124
+ raise ValueError("Field must be non-empty")
125
+ return v.strip()
126
+
127
+ @field_validator("lists")
128
+ @classmethod
129
+ def validate_unique_list_numbers(
130
+ cls, v: list[ExperimentList]
131
+ ) -> list[ExperimentList]:
132
+ """Validate all list_numbers are unique.
133
+
134
+ Parameters
135
+ ----------
136
+ v : list[ExperimentList]
137
+ Lists to validate.
138
+
139
+ Returns
140
+ -------
141
+ list[ExperimentList]
142
+ Validated lists.
143
+
144
+ Raises
145
+ ------
146
+ ValueError
147
+ If duplicate list_numbers found.
148
+ """
149
+ if not v:
150
+ return v
151
+
152
+ list_numbers = [exp_list.list_number for exp_list in v]
153
+ if len(list_numbers) != len(set(list_numbers)):
154
+ duplicates = [num for num in list_numbers if list_numbers.count(num) > 1]
155
+ raise ValueError(f"Duplicate list_numbers found: {set(duplicates)}")
156
+
157
+ return v
158
+
159
+ def add_list(self, exp_list: ExperimentList) -> None:
160
+ """Add a list to the collection.
161
+
162
+ Parameters
163
+ ----------
164
+ exp_list : ExperimentList
165
+ List to add.
166
+
167
+ Examples
168
+ --------
169
+ >>> from uuid import uuid4
170
+ >>> collection = ListCollection(
171
+ ... name="test",
172
+ ... source_items_id=uuid4(),
173
+ ... partitioning_strategy="balanced"
174
+ ... )
175
+ >>> exp_list = ExperimentList(name="list_0", list_number=0)
176
+ >>> collection.add_list(exp_list)
177
+ >>> len(collection.lists)
178
+ 1
179
+ """
180
+ self.lists.append(exp_list)
181
+ self.update_modified_time()
182
+
183
+ def get_list_by_number(self, list_number: int) -> ExperimentList | None:
184
+ """Get a list by its number.
185
+
186
+ Parameters
187
+ ----------
188
+ list_number : int
189
+ List number to search for.
190
+
191
+ Returns
192
+ -------
193
+ ExperimentList | None
194
+ List with matching number, or None if not found.
195
+
196
+ Examples
197
+ --------
198
+ >>> from uuid import uuid4
199
+ >>> collection = ListCollection(
200
+ ... name="test",
201
+ ... source_items_id=uuid4(),
202
+ ... partitioning_strategy="balanced"
203
+ ... )
204
+ >>> exp_list = ExperimentList(name="list_0", list_number=0)
205
+ >>> collection.add_list(exp_list)
206
+ >>> found = collection.get_list_by_number(0)
207
+ >>> found is not None
208
+ True
209
+ """
210
+ for exp_list in self.lists:
211
+ if exp_list.list_number == list_number:
212
+ return exp_list
213
+ return None
214
+
215
+ def get_all_item_refs(self) -> list[UUID]:
216
+ """Return all unique item UUIDs across all lists.
217
+
218
+ Returns
219
+ -------
220
+ list[UUID]
221
+ All unique item UUIDs.
222
+
223
+ Examples
224
+ --------
225
+ >>> from uuid import uuid4
226
+ >>> collection = ListCollection(
227
+ ... name="test",
228
+ ... source_items_id=uuid4(),
229
+ ... partitioning_strategy="balanced"
230
+ ... )
231
+ >>> exp_list = ExperimentList(name="list_0", list_number=0)
232
+ >>> item_id = uuid4()
233
+ >>> exp_list.add_item(item_id)
234
+ >>> collection.add_list(exp_list)
235
+ >>> item_id in collection.get_all_item_refs()
236
+ True
237
+ """
238
+ all_refs: set[UUID] = set()
239
+ for exp_list in self.lists:
240
+ all_refs.update(exp_list.item_refs)
241
+ return list(all_refs)
242
+
243
+ def validate_coverage(self, all_item_ids: set[UUID]) -> CoverageValidationResult:
244
+ """Check that all items are assigned exactly once.
245
+
246
+ Validates that:
247
+ - All items in all_item_ids are assigned to at least one list
248
+ - No item appears in multiple lists (items assigned exactly once)
249
+
250
+ Parameters
251
+ ----------
252
+ all_item_ids : set[UUID]
253
+ Set of all item UUIDs that should be assigned.
254
+
255
+ Returns
256
+ -------
257
+ CoverageValidationResult
258
+ Validation report with keys:
259
+ - "valid": bool - Whether validation passed
260
+ - "missing_items": list[UUID] - Items not assigned to any list
261
+ - "duplicate_items": list[UUID] - Items assigned to multiple lists
262
+ - "total_assigned": int - Total assignments across all lists
263
+
264
+ Examples
265
+ --------
266
+ >>> from uuid import uuid4
267
+ >>> collection = ListCollection(
268
+ ... name="test",
269
+ ... source_items_id=uuid4(),
270
+ ... partitioning_strategy="balanced"
271
+ ... )
272
+ >>> item_id = uuid4()
273
+ >>> exp_list = ExperimentList(name="list_0", list_number=0)
274
+ >>> exp_list.add_item(item_id)
275
+ >>> collection.add_list(exp_list)
276
+ >>> result = collection.validate_coverage({item_id})
277
+ >>> result["valid"]
278
+ True
279
+ """
280
+ # Count assignments for each item
281
+ item_counts: dict[UUID, int] = {}
282
+ for exp_list in self.lists:
283
+ for item_id in exp_list.item_refs:
284
+ item_counts[item_id] = item_counts.get(item_id, 0) + 1
285
+
286
+ # Find missing items (in all_item_ids but not assigned)
287
+ assigned_items = set(item_counts.keys())
288
+ missing_items = list(all_item_ids - assigned_items)
289
+
290
+ # Find duplicate items (assigned more than once)
291
+ duplicate_items = [
292
+ item_id for item_id, count in item_counts.items() if count > 1
293
+ ]
294
+
295
+ # Validation passes if no missing and no duplicates
296
+ valid = len(missing_items) == 0 and len(duplicate_items) == 0
297
+
298
+ return {
299
+ "valid": valid,
300
+ "missing_items": missing_items,
301
+ "duplicate_items": duplicate_items,
302
+ "total_assigned": sum(item_counts.values()),
303
+ }
304
+
305
+ def to_jsonl(self, path: Path | str) -> None:
306
+ """Write lists to a JSONL file (one list per line).
307
+
308
+ Parameters
309
+ ----------
310
+ path : Path | str
311
+ Path to output JSONL file.
312
+
313
+ Examples
314
+ --------
315
+ >>> from uuid import uuid4
316
+ >>> collection = ListCollection(
317
+ ... name="test",
318
+ ... source_items_id=uuid4(),
319
+ ... partitioning_strategy="balanced"
320
+ ... )
321
+ >>> exp_list = ExperimentList(name="list_0", list_number=0)
322
+ >>> collection.add_list(exp_list)
323
+ >>> collection.to_jsonl("lists.jsonl") # doctest: +SKIP
324
+ """
325
+ path = Path(path)
326
+ path.parent.mkdir(parents=True, exist_ok=True)
327
+ with open(path, "w", encoding="utf-8") as f:
328
+ for exp_list in self.lists:
329
+ f.write(exp_list.model_dump_json() + "\n")
330
+
331
+ @classmethod
332
+ def from_jsonl(
333
+ cls,
334
+ path: Path | str,
335
+ name: str = "loaded_lists",
336
+ source_items_id: UUID | None = None,
337
+ partitioning_strategy: str = "unknown",
338
+ ) -> ListCollection:
339
+ """Load lists from a JSONL file (one list per line).
340
+
341
+ Parameters
342
+ ----------
343
+ path : Path | str
344
+ Path to JSONL file containing experiment lists.
345
+ name : str
346
+ Name for the collection (default: "loaded_lists").
347
+ source_items_id : UUID | None
348
+ Source items UUID. If None, uses a nil UUID.
349
+ partitioning_strategy : str
350
+ Strategy name (default: "unknown").
351
+
352
+ Returns
353
+ -------
354
+ ListCollection
355
+ Collection containing the loaded lists.
356
+
357
+ Examples
358
+ --------
359
+ >>> collection = ListCollection.from_jsonl("lists.jsonl") # doctest: +SKIP
360
+ """
361
+ path = Path(path)
362
+ lists: list[ExperimentList] = []
363
+
364
+ with open(path, encoding="utf-8") as f:
365
+ for line in f:
366
+ line = line.strip()
367
+ if not line:
368
+ continue
369
+ list_data = json.loads(line)
370
+ exp_list = ExperimentList(**list_data)
371
+ lists.append(exp_list)
372
+
373
+ return cls(
374
+ name=name,
375
+ source_items_id=source_items_id or UUID(int=0),
376
+ lists=lists,
377
+ partitioning_strategy=partitioning_strategy,
378
+ )