bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/items/cloze.py ADDED
@@ -0,0 +1,757 @@
1
+ """Utilities for creating cloze experimental items.
2
+
3
+ This module provides language-agnostic utilities for creating cloze
4
+ items where participants fill in missing words/phrases in partially-filled
5
+ templates.
6
+
7
+ **SPECIAL**: This is the ONLY task type that uses the Item.unfilled_slots field.
8
+
9
+ Cloze items are unique in that:
10
+ - They use partially-filled templates with specific slots left blank
11
+ - UI widgets are inferred from slot constraints at deployment time:
12
+ - Extensional constraint (finite set) → dropdown
13
+ - Intensional constraint (rules) → text input with validation
14
+ - No constraint → free text input
15
+ - Multiple slots can be unfilled in a single item
16
+
17
+ Integration Points
18
+ ------------------
19
+ - Active Learning: bead/active_learning/models/cloze.py
20
+ - Simulation: bead/simulation/strategies/cloze.py
21
+ - Deployment: bead/deployment/jspsych/ (dynamic widget generation)
22
+ - Resources: bead/resources/template.py (Template and Slot models)
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import random
28
+ import re
29
+ from collections import defaultdict
30
+ from collections.abc import Callable
31
+ from itertools import combinations
32
+ from typing import Any
33
+ from uuid import UUID, uuid4
34
+
35
+ from bead.items.item import Item, MetadataValue, UnfilledSlot
36
+
37
+
38
+ def create_cloze_item(
39
+ template: Any,
40
+ unfilled_slot_names: list[str],
41
+ filled_slots: dict[str, str] | None = None,
42
+ instructions: str | None = None,
43
+ *,
44
+ item_template_id: UUID | None = None,
45
+ metadata: dict[str, MetadataValue] | None = None,
46
+ ) -> Item:
47
+ """Create a cloze item from a template with specific slots unfilled.
48
+
49
+ Parameters
50
+ ----------
51
+ template : Template
52
+ Source template with slots.
53
+ unfilled_slot_names : list[str]
54
+ Names of slots to leave unfilled (must exist in template.slots).
55
+ filled_slots : dict[str, str] | None
56
+ Pre-filled slots (keys must be valid slot names, disjoint from unfilled).
57
+ instructions : str | None
58
+ Optional instructions for filling (e.g., "Fill in the verb").
59
+ item_template_id : UUID | None
60
+ Template ID for the item. If None, generates new UUID.
61
+ metadata : dict[str, MetadataValue] | None
62
+ Additional metadata for item_metadata field.
63
+
64
+ Returns
65
+ -------
66
+ Item
67
+ Cloze item with unfilled_slots populated.
68
+
69
+ Raises
70
+ ------
71
+ ValueError
72
+ If unfilled_slot_names not in template, if filled_slots not in template,
73
+ if unfilled and filled overlap, if no unfilled slots, or if validation fails.
74
+
75
+ Examples
76
+ --------
77
+ >>> from bead.resources.template import Template, Slot
78
+ >>> template = Template(
79
+ ... name="simple",
80
+ ... template_string="{det} {noun} {verb}.",
81
+ ... slots={
82
+ ... "det": Slot(name="det"),
83
+ ... "noun": Slot(name="noun"),
84
+ ... "verb": Slot(name="verb")
85
+ ... }
86
+ ... )
87
+ >>> item = create_cloze_item(
88
+ ... template,
89
+ ... unfilled_slot_names=["verb"],
90
+ ... filled_slots={"det": "The", "noun": "cat"}
91
+ ... )
92
+ >>> item.rendered_elements["text"]
93
+ 'The cat ___.'
94
+ >>> len(item.unfilled_slots)
95
+ 1
96
+ >>> item.unfilled_slots[0].slot_name
97
+ 'verb'
98
+ >>> item.unfilled_slots[0].position
99
+ 2
100
+ """
101
+ if filled_slots is None:
102
+ filled_slots = {}
103
+
104
+ # Validate parameters
105
+ _validate_cloze_parameters(template, unfilled_slot_names, filled_slots)
106
+
107
+ # Render template with filled values and "___" for unfilled slots
108
+ rendered_text = _render_template_for_cloze(
109
+ template.template_string, filled_slots, unfilled_slot_names
110
+ )
111
+
112
+ # Calculate positions for unfilled slots
113
+ positions = _calculate_positions(
114
+ template.template_string, unfilled_slot_names, filled_slots
115
+ )
116
+
117
+ # Extract constraint IDs for each unfilled slot
118
+ unfilled_slots_list: list[UnfilledSlot] = []
119
+ for slot_name in unfilled_slot_names:
120
+ position = positions[slot_name]
121
+ constraint_ids = _extract_constraint_ids(template, slot_name)
122
+ unfilled_slots_list.append(
123
+ UnfilledSlot(
124
+ slot_name=slot_name, position=position, constraint_ids=constraint_ids
125
+ )
126
+ )
127
+
128
+ # Build rendered_elements
129
+ rendered_elements: dict[str, str] = {"text": rendered_text}
130
+ if instructions:
131
+ rendered_elements["instructions"] = instructions
132
+
133
+ # Build item_metadata
134
+ # Convert filled_slots to MetadataValue format
135
+ filled_slots_metadata: dict[str, MetadataValue] = dict(filled_slots)
136
+ item_metadata: dict[str, MetadataValue] = {
137
+ "template_id": str(template.id),
138
+ "filled_slots": filled_slots_metadata,
139
+ "n_unfilled_slots": len(unfilled_slot_names),
140
+ }
141
+ if metadata:
142
+ item_metadata.update(metadata)
143
+
144
+ if item_template_id is None:
145
+ item_template_id = uuid4()
146
+
147
+ return Item(
148
+ item_template_id=item_template_id,
149
+ rendered_elements=rendered_elements,
150
+ unfilled_slots=unfilled_slots_list,
151
+ item_metadata=item_metadata,
152
+ )
153
+
154
+
155
+ def create_cloze_items_from_template(
156
+ template: Any,
157
+ n_unfilled: int = 1,
158
+ strategy: str = "all_combinations",
159
+ unfilled_combinations: list[list[str]] | None = None,
160
+ instructions: str | None = None,
161
+ *,
162
+ item_template_id: UUID | None = None,
163
+ metadata_fn: Callable[[list[str]], dict[str, MetadataValue]] | None = None,
164
+ ) -> list[Item]:
165
+ """Create multiple cloze items from a template, varying unfilled slots.
166
+
167
+ Parameters
168
+ ----------
169
+ template : Template
170
+ Source template.
171
+ n_unfilled : int
172
+ Number of slots to leave unfilled per item (default: 1).
173
+ strategy : str
174
+ How to choose unfilled slots:
175
+ - 'random': Randomly sample combinations
176
+ - 'all_combinations': Generate all C(n_slots, n_unfilled) combinations
177
+ - 'specified': Use provided list
178
+ unfilled_combinations : list[list[str]] | None
179
+ For strategy='specified', list of slot name combinations to unfill.
180
+ instructions : str | None
181
+ Instructions for all items.
182
+ item_template_id : UUID | None
183
+ Template ID for all items.
184
+ metadata_fn : Callable[[list[str]], dict[str, MetadataValue]] | None
185
+ Generate metadata from unfilled slot names.
186
+
187
+ Returns
188
+ -------
189
+ list[Item]
190
+ Cloze items with varying unfilled slots.
191
+
192
+ Raises
193
+ ------
194
+ ValueError
195
+ If n_unfilled invalid, if strategy='specified' without unfilled_combinations,
196
+ or if any combination contains invalid slots.
197
+
198
+ Examples
199
+ --------
200
+ >>> # Generate all single-slot cloze items
201
+ >>> items = create_cloze_items_from_template(
202
+ ... template, n_unfilled=1, strategy='all_combinations'
203
+ ... )
204
+ >>> len(items) # One for each slot
205
+ 3
206
+ """
207
+ slot_names = list(template.slots.keys())
208
+
209
+ # Validate n_unfilled
210
+ if n_unfilled < 1:
211
+ raise ValueError(
212
+ f"n_unfilled must be at least 1, got {n_unfilled}. "
213
+ f"Provide a positive number of slots to leave unfilled."
214
+ )
215
+
216
+ if n_unfilled >= len(slot_names):
217
+ raise ValueError(
218
+ f"n_unfilled ({n_unfilled}) must be less than total slots "
219
+ f"({len(slot_names)}). Cannot unfill all slots in a cloze item."
220
+ )
221
+
222
+ # Generate combinations based on strategy
223
+ if strategy == "all_combinations":
224
+ combos = list(combinations(slot_names, n_unfilled))
225
+ elif strategy == "specified":
226
+ if unfilled_combinations is None:
227
+ raise ValueError(
228
+ "strategy='specified' requires unfilled_combinations parameter. "
229
+ "Provide a list of slot name combinations to unfill."
230
+ )
231
+ combos = [tuple(c) for c in unfilled_combinations]
232
+ elif strategy == "random":
233
+ # Generate one random combination (can be extended to generate N random ones)
234
+ combos = [tuple(random.sample(slot_names, n_unfilled))]
235
+ else:
236
+ raise ValueError(
237
+ f"Invalid strategy '{strategy}'. "
238
+ f"Must be one of ['random', 'all_combinations', 'specified']."
239
+ )
240
+
241
+ # Validate all combinations
242
+ for combo in combos:
243
+ if len(combo) != n_unfilled:
244
+ raise ValueError(
245
+ f"Each combination must have exactly {n_unfilled} slots, "
246
+ f"but got {len(combo)}: {combo}"
247
+ )
248
+ for slot_name in combo:
249
+ if slot_name not in template.slots:
250
+ raise ValueError(
251
+ f"Slot '{slot_name}' in combination not found in template. "
252
+ f"Available slots: {list(template.slots.keys())}"
253
+ )
254
+
255
+ # Create items
256
+ items: list[Item] = []
257
+ for combo in combos:
258
+ unfilled_list = list(combo)
259
+
260
+ # Generate metadata if function provided
261
+ item_metadata = metadata_fn(unfilled_list) if metadata_fn else None
262
+
263
+ item = create_cloze_item(
264
+ template=template,
265
+ unfilled_slot_names=unfilled_list,
266
+ filled_slots=None, # Don't pre-fill any slots
267
+ instructions=instructions,
268
+ item_template_id=item_template_id,
269
+ metadata=item_metadata,
270
+ )
271
+ items.append(item)
272
+
273
+ return items
274
+
275
+
276
+ def create_simple_cloze_item(
277
+ text: str,
278
+ blank_positions: list[int],
279
+ blank_labels: list[str] | None = None,
280
+ instructions: str | None = None,
281
+ *,
282
+ item_template_id: UUID | None = None,
283
+ metadata: dict[str, MetadataValue] | None = None,
284
+ ) -> Item:
285
+ """Create a cloze item from plain text (no template).
286
+
287
+ Replaces words at specified positions with blanks. This is a simplified
288
+ helper for creating cloze items without the template infrastructure.
289
+
290
+ Parameters
291
+ ----------
292
+ text : str
293
+ Full text with no blanks.
294
+ blank_positions : list[int]
295
+ Word positions to blank (0-indexed).
296
+ blank_labels : list[str] | None
297
+ Optional labels for blanks (for slot_name field). If None, uses
298
+ generic labels like "blank_0", "blank_1".
299
+ instructions : str | None
300
+ Optional instructions.
301
+ item_template_id : UUID | None
302
+ Template ID for the item.
303
+ metadata : dict[str, MetadataValue] | None
304
+ Additional metadata.
305
+
306
+ Returns
307
+ -------
308
+ Item
309
+ Cloze item with text-based blanks.
310
+
311
+ Raises
312
+ ------
313
+ ValueError
314
+ If blank_positions out of range or if blank_labels length mismatch.
315
+
316
+ Examples
317
+ --------
318
+ >>> item = create_simple_cloze_item(
319
+ ... text="The quick brown fox",
320
+ ... blank_positions=[1], # "quick"
321
+ ... blank_labels=["adjective"]
322
+ ... )
323
+ >>> item.rendered_elements["text"]
324
+ 'The ___ brown fox'
325
+ >>> item.unfilled_slots[0].slot_name
326
+ 'adjective'
327
+ >>> item.unfilled_slots[0].position
328
+ 1
329
+ """
330
+ if not text or not text.strip():
331
+ raise ValueError("text cannot be empty")
332
+
333
+ if not blank_positions:
334
+ raise ValueError(
335
+ "blank_positions cannot be empty. "
336
+ "Provide at least one position to blank out."
337
+ )
338
+
339
+ # Tokenize text by whitespace
340
+ tokens = text.split()
341
+
342
+ # Validate positions
343
+ for pos in blank_positions:
344
+ if pos < 0 or pos >= len(tokens):
345
+ raise ValueError(
346
+ f"blank_position {pos} is out of range. "
347
+ f"Text has {len(tokens)} tokens (valid range: 0-{len(tokens) - 1})"
348
+ )
349
+
350
+ # Validate labels if provided
351
+ if blank_labels is not None:
352
+ if len(blank_labels) != len(blank_positions):
353
+ raise ValueError(
354
+ f"blank_labels length ({len(blank_labels)}) must match "
355
+ f"blank_positions length ({len(blank_positions)})"
356
+ )
357
+ else:
358
+ # Generate default labels
359
+ blank_labels = [f"blank_{i}" for i in range(len(blank_positions))]
360
+
361
+ # Create unfilled slots
362
+ unfilled_slots_list: list[UnfilledSlot] = []
363
+ for pos, label in zip(blank_positions, blank_labels, strict=True):
364
+ unfilled_slots_list.append(
365
+ UnfilledSlot(slot_name=label, position=pos, constraint_ids=[])
366
+ )
367
+
368
+ # Replace tokens at blank positions with "___"
369
+ blanked_tokens = tokens.copy()
370
+ for pos in blank_positions:
371
+ blanked_tokens[pos] = "___"
372
+ rendered_text = " ".join(blanked_tokens)
373
+
374
+ # Build rendered_elements
375
+ rendered_elements: dict[str, str] = {"text": rendered_text}
376
+ if instructions:
377
+ rendered_elements["instructions"] = instructions
378
+
379
+ # Build item_metadata
380
+ item_metadata: dict[str, MetadataValue] = {
381
+ "n_unfilled_slots": len(blank_positions),
382
+ "original_text": text,
383
+ }
384
+ if metadata:
385
+ item_metadata.update(metadata)
386
+
387
+ if item_template_id is None:
388
+ item_template_id = uuid4()
389
+
390
+ return Item(
391
+ item_template_id=item_template_id,
392
+ rendered_elements=rendered_elements,
393
+ unfilled_slots=unfilled_slots_list,
394
+ item_metadata=item_metadata,
395
+ )
396
+
397
+
398
+ def create_cloze_items_from_groups(
399
+ items: list[Item],
400
+ group_by: Callable[[Item], Any],
401
+ n_slots_to_unfill: int = 1,
402
+ *,
403
+ extract_text: Callable[[Item], str] | None = None,
404
+ include_group_metadata: bool = True,
405
+ item_template_id: UUID | None = None,
406
+ ) -> list[Item]:
407
+ """Create cloze items from grouped source items.
408
+
409
+ Groups items and creates cloze items from them. If source items have
410
+ template metadata, uses template-based cloze. Otherwise, falls back to
411
+ simple text-based cloze.
412
+
413
+ Parameters
414
+ ----------
415
+ items : list[Item]
416
+ Source items to group.
417
+ group_by : Callable[[Item], Any]
418
+ Grouping function.
419
+ n_slots_to_unfill : int
420
+ Number of slots/words to unfill.
421
+ extract_text : Callable[[Item], str] | None
422
+ Text extraction function. If None, tries common keys.
423
+ include_group_metadata : bool
424
+ Whether to include group_key in metadata.
425
+ item_template_id : UUID | None
426
+ Template ID for created items.
427
+
428
+ Returns
429
+ -------
430
+ list[Item]
431
+ Cloze items from grouped source items.
432
+
433
+ Examples
434
+ --------
435
+ >>> cloze_items = create_cloze_items_from_groups(
436
+ ... items=source_items,
437
+ ... group_by=lambda i: i.item_metadata.get("category"),
438
+ ... n_slots_to_unfill=1
439
+ ... ) # doctest: +SKIP
440
+ """
441
+ # Group items
442
+ groups: dict[Any, list[Item]] = defaultdict(list)
443
+ for item in items:
444
+ group_key = group_by(item)
445
+ groups[group_key].append(item)
446
+
447
+ cloze_items: list[Item] = []
448
+
449
+ for group_key, group_items in groups.items():
450
+ for item in group_items:
451
+ # Extract text
452
+ if extract_text:
453
+ text: str = extract_text(item)
454
+ else:
455
+ text = _extract_text_from_item(item)
456
+
457
+ # Build metadata
458
+ item_metadata: dict[str, MetadataValue] = {
459
+ "source_item_id": str(item.id),
460
+ }
461
+ if include_group_metadata:
462
+ item_metadata["group_key"] = str(group_key)
463
+
464
+ # Create simple text-based cloze (fallback without template)
465
+ # Blank out the first n_slots_to_unfill words
466
+ tokens = text.split()
467
+ if n_slots_to_unfill > len(tokens):
468
+ # Skip items that are too short
469
+ continue
470
+
471
+ blank_positions = list(range(n_slots_to_unfill))
472
+
473
+ cloze_item = create_simple_cloze_item(
474
+ text=text,
475
+ blank_positions=blank_positions,
476
+ item_template_id=item_template_id,
477
+ metadata=item_metadata,
478
+ )
479
+ cloze_items.append(cloze_item)
480
+
481
+ return cloze_items
482
+
483
+
484
+ def create_filtered_cloze_items(
485
+ templates: list[Any],
486
+ n_slots_to_unfill: int = 1,
487
+ *,
488
+ template_filter: Callable[[Any], bool] | None = None,
489
+ slot_filter: Callable[[str, Any], bool] | None = None,
490
+ item_template_id: UUID | None = None,
491
+ ) -> list[Item]:
492
+ """Create cloze items with multi-level filtering.
493
+
494
+ Filters templates and/or slots before creating cloze items.
495
+
496
+ Parameters
497
+ ----------
498
+ templates : list[Template]
499
+ Source templates.
500
+ n_slots_to_unfill : int
501
+ Number of slots to unfill.
502
+ template_filter : Callable[[Template], bool] | None
503
+ Filter templates.
504
+ slot_filter : Callable[[str, Slot], bool] | None
505
+ Filter which slots can be unfilled (receives slot_name and Slot object).
506
+ item_template_id : UUID | None
507
+ Template ID for created items.
508
+
509
+ Returns
510
+ -------
511
+ list[Item]
512
+ Filtered cloze items.
513
+
514
+ Examples
515
+ --------
516
+ >>> # Only unfill slots with constraints
517
+ >>> cloze_items = create_filtered_cloze_items(
518
+ ... templates=all_templates,
519
+ ... n_slots_to_unfill=1,
520
+ ... template_filter=lambda t: len(t.slots) >= 3,
521
+ ... slot_filter=lambda name, slot: len(slot.constraints) > 0
522
+ ... ) # doctest: +SKIP
523
+ """
524
+ # Filter templates
525
+ filtered_templates = templates
526
+ if template_filter:
527
+ filtered_templates = [t for t in templates if template_filter(t)]
528
+
529
+ cloze_items: list[Item] = []
530
+
531
+ for template in filtered_templates:
532
+ # Filter slots if slot_filter provided
533
+ available_slots = list(template.slots.keys())
534
+ if slot_filter:
535
+ available_slots = [
536
+ name
537
+ for name in available_slots
538
+ if slot_filter(name, template.slots[name])
539
+ ]
540
+
541
+ # Skip if not enough slots
542
+ if len(available_slots) < n_slots_to_unfill:
543
+ continue
544
+
545
+ # Create cloze items from this template
546
+ items = create_cloze_items_from_template(
547
+ template=template,
548
+ n_unfilled=n_slots_to_unfill,
549
+ strategy="all_combinations",
550
+ item_template_id=item_template_id,
551
+ )
552
+
553
+ # Further filter items if slot_filter was used
554
+ if slot_filter:
555
+ # Only keep items where all unfilled slots pass the filter
556
+ items = [
557
+ item
558
+ for item in items
559
+ if all(
560
+ slot.slot_name in available_slots for slot in item.unfilled_slots
561
+ )
562
+ ]
563
+
564
+ cloze_items.extend(items)
565
+
566
+ return cloze_items
567
+
568
+
569
+ def _extract_text_from_item(item: Item) -> str:
570
+ """Extract text from item's rendered_elements.
571
+
572
+ Tries common keys: "text", "sentence", "content".
573
+ Raises error if no suitable text found.
574
+
575
+ Parameters
576
+ ----------
577
+ item : Item
578
+ Item to extract text from.
579
+
580
+ Returns
581
+ -------
582
+ str
583
+ Extracted text.
584
+
585
+ Raises
586
+ ------
587
+ ValueError
588
+ If no suitable text key found in rendered_elements.
589
+ """
590
+ for key in ["text", "sentence", "content"]:
591
+ if key in item.rendered_elements:
592
+ return item.rendered_elements[key]
593
+
594
+ raise ValueError(
595
+ f"Cannot extract text from item {item.id}. "
596
+ f"Expected one of ['text', 'sentence', 'content'] in rendered_elements, "
597
+ f"but found keys: {list(item.rendered_elements.keys())}. "
598
+ f"Use the extract_text parameter to provide a custom extraction function."
599
+ )
600
+
601
+
602
+ # Helper functions
603
+
604
+
605
+ def _validate_cloze_parameters(
606
+ template: Any, unfilled_slot_names: list[str], filled_slots: dict[str, str]
607
+ ) -> None:
608
+ """Validate cloze item parameters.
609
+
610
+ Raises
611
+ ------
612
+ ValueError
613
+ If validation fails with descriptive message.
614
+ """
615
+ # Check unfilled_slot_names not empty
616
+ if not unfilled_slot_names:
617
+ raise ValueError(
618
+ "Must have at least 1 unfilled slot. "
619
+ "Provide at least one slot name in unfilled_slot_names parameter."
620
+ )
621
+
622
+ # Check all unfilled slots exist in template
623
+ for slot_name in unfilled_slot_names:
624
+ if slot_name not in template.slots:
625
+ raise ValueError(
626
+ f"Unfilled slot '{slot_name}' not found in template. "
627
+ f"Available slots: {list(template.slots.keys())}"
628
+ )
629
+
630
+ # Check filled_slots if provided
631
+ if filled_slots:
632
+ for slot_name in filled_slots.keys():
633
+ if slot_name not in template.slots:
634
+ raise ValueError(
635
+ f"Filled slot '{slot_name}' not found in template. "
636
+ f"Available slots: {list(template.slots.keys())}"
637
+ )
638
+
639
+ # Check no overlap
640
+ overlap = set(unfilled_slot_names) & set(filled_slots.keys())
641
+ if overlap:
642
+ raise ValueError(
643
+ f"Slots cannot be both filled and unfilled. "
644
+ f"Overlapping slots: {overlap}"
645
+ )
646
+
647
+
648
+ def _render_template_for_cloze(
649
+ template_string: str, filled_slots: dict[str, str], unfilled_slot_names: list[str]
650
+ ) -> str:
651
+ """Render template with filled values and '___' for unfilled slots.
652
+
653
+ Parameters
654
+ ----------
655
+ template_string : str
656
+ Template string with {slot_name} placeholders.
657
+ filled_slots : dict[str, str]
658
+ Mapping of slot names to fill values.
659
+ unfilled_slot_names : list[str]
660
+ Names of slots to leave unfilled (replaced with "___").
661
+
662
+ Returns
663
+ -------
664
+ str
665
+ Rendered template string.
666
+ """
667
+ result = template_string
668
+
669
+ # Replace unfilled slots with "___"
670
+ for slot_name in unfilled_slot_names:
671
+ result = result.replace(f"{{{slot_name}}}", "___")
672
+
673
+ # Replace filled slots with their values
674
+ for slot_name, value in filled_slots.items():
675
+ result = result.replace(f"{{{slot_name}}}", value)
676
+
677
+ return result
678
+
679
+
680
+ def _calculate_positions(
681
+ template_string: str, unfilled_slot_names: list[str], filled_slots: dict[str, str]
682
+ ) -> dict[str, int]:
683
+ """Calculate token positions for unfilled slots.
684
+
685
+ Parameters
686
+ ----------
687
+ template_string : str
688
+ Template string with {slot_name} placeholders.
689
+ unfilled_slot_names : list[str]
690
+ Names of slots that are unfilled.
691
+ filled_slots : dict[str, str]
692
+ Mapping of slot names to fill values.
693
+
694
+ Returns
695
+ -------
696
+ dict[str, int]
697
+ Mapping from slot_name to position (token index, 0-indexed).
698
+ """
699
+ # Extract all slot placeholders in order
700
+ slot_pattern = re.compile(r"\{(\w+)\}")
701
+ slot_matches = slot_pattern.finditer(template_string)
702
+
703
+ positions: dict[str, int] = {}
704
+ token_index = 0
705
+
706
+ # Track position in template string
707
+ last_end = 0
708
+
709
+ for match in slot_matches:
710
+ slot_name = match.group(1)
711
+
712
+ # Count tokens before this slot
713
+ text_before = template_string[last_end : match.start()]
714
+ # Split by whitespace and count non-empty tokens
715
+ tokens_before = [t for t in text_before.split() if t]
716
+ token_index += len(tokens_before)
717
+
718
+ # This slot becomes one token (either filled value or "___")
719
+ if slot_name in unfilled_slot_names:
720
+ positions[slot_name] = token_index
721
+
722
+ token_index += 1
723
+ last_end = match.end()
724
+
725
+ return positions
726
+
727
+
728
+ def _extract_constraint_ids(template: Any, slot_name: str) -> list[UUID]:
729
+ """Extract constraint UUIDs from a template slot.
730
+
731
+ Parameters
732
+ ----------
733
+ template : Template
734
+ Source template.
735
+ slot_name : str
736
+ Name of slot to extract constraints from.
737
+
738
+ Returns
739
+ -------
740
+ list[UUID]
741
+ Constraint UUIDs for this slot.
742
+ """
743
+ if slot_name not in template.slots:
744
+ return []
745
+
746
+ slot = template.slots[slot_name]
747
+
748
+ if not hasattr(slot, "constraints") or slot.constraints is None:
749
+ return []
750
+
751
+ # Extract UUIDs from Constraint objects
752
+ constraint_ids: list[UUID] = []
753
+ for constraint in slot.constraints:
754
+ if hasattr(constraint, "id"):
755
+ constraint_ids.append(constraint.id)
756
+
757
+ return constraint_ids