bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/items/cloze.py
ADDED
|
@@ -0,0 +1,757 @@
|
|
|
1
|
+
"""Utilities for creating cloze experimental items.
|
|
2
|
+
|
|
3
|
+
This module provides language-agnostic utilities for creating cloze
|
|
4
|
+
items where participants fill in missing words/phrases in partially-filled
|
|
5
|
+
templates.
|
|
6
|
+
|
|
7
|
+
**SPECIAL**: This is the ONLY task type that uses the Item.unfilled_slots field.
|
|
8
|
+
|
|
9
|
+
Cloze items are unique in that:
|
|
10
|
+
- They use partially-filled templates with specific slots left blank
|
|
11
|
+
- UI widgets are inferred from slot constraints at deployment time:
|
|
12
|
+
- Extensional constraint (finite set) → dropdown
|
|
13
|
+
- Intensional constraint (rules) → text input with validation
|
|
14
|
+
- No constraint → free text input
|
|
15
|
+
- Multiple slots can be unfilled in a single item
|
|
16
|
+
|
|
17
|
+
Integration Points
|
|
18
|
+
------------------
|
|
19
|
+
- Active Learning: bead/active_learning/models/cloze.py
|
|
20
|
+
- Simulation: bead/simulation/strategies/cloze.py
|
|
21
|
+
- Deployment: bead/deployment/jspsych/ (dynamic widget generation)
|
|
22
|
+
- Resources: bead/resources/template.py (Template and Slot models)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import random
|
|
28
|
+
import re
|
|
29
|
+
from collections import defaultdict
|
|
30
|
+
from collections.abc import Callable
|
|
31
|
+
from itertools import combinations
|
|
32
|
+
from typing import Any
|
|
33
|
+
from uuid import UUID, uuid4
|
|
34
|
+
|
|
35
|
+
from bead.items.item import Item, MetadataValue, UnfilledSlot
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def create_cloze_item(
|
|
39
|
+
template: Any,
|
|
40
|
+
unfilled_slot_names: list[str],
|
|
41
|
+
filled_slots: dict[str, str] | None = None,
|
|
42
|
+
instructions: str | None = None,
|
|
43
|
+
*,
|
|
44
|
+
item_template_id: UUID | None = None,
|
|
45
|
+
metadata: dict[str, MetadataValue] | None = None,
|
|
46
|
+
) -> Item:
|
|
47
|
+
"""Create a cloze item from a template with specific slots unfilled.
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
template : Template
|
|
52
|
+
Source template with slots.
|
|
53
|
+
unfilled_slot_names : list[str]
|
|
54
|
+
Names of slots to leave unfilled (must exist in template.slots).
|
|
55
|
+
filled_slots : dict[str, str] | None
|
|
56
|
+
Pre-filled slots (keys must be valid slot names, disjoint from unfilled).
|
|
57
|
+
instructions : str | None
|
|
58
|
+
Optional instructions for filling (e.g., "Fill in the verb").
|
|
59
|
+
item_template_id : UUID | None
|
|
60
|
+
Template ID for the item. If None, generates new UUID.
|
|
61
|
+
metadata : dict[str, MetadataValue] | None
|
|
62
|
+
Additional metadata for item_metadata field.
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
Item
|
|
67
|
+
Cloze item with unfilled_slots populated.
|
|
68
|
+
|
|
69
|
+
Raises
|
|
70
|
+
------
|
|
71
|
+
ValueError
|
|
72
|
+
If unfilled_slot_names not in template, if filled_slots not in template,
|
|
73
|
+
if unfilled and filled overlap, if no unfilled slots, or if validation fails.
|
|
74
|
+
|
|
75
|
+
Examples
|
|
76
|
+
--------
|
|
77
|
+
>>> from bead.resources.template import Template, Slot
|
|
78
|
+
>>> template = Template(
|
|
79
|
+
... name="simple",
|
|
80
|
+
... template_string="{det} {noun} {verb}.",
|
|
81
|
+
... slots={
|
|
82
|
+
... "det": Slot(name="det"),
|
|
83
|
+
... "noun": Slot(name="noun"),
|
|
84
|
+
... "verb": Slot(name="verb")
|
|
85
|
+
... }
|
|
86
|
+
... )
|
|
87
|
+
>>> item = create_cloze_item(
|
|
88
|
+
... template,
|
|
89
|
+
... unfilled_slot_names=["verb"],
|
|
90
|
+
... filled_slots={"det": "The", "noun": "cat"}
|
|
91
|
+
... )
|
|
92
|
+
>>> item.rendered_elements["text"]
|
|
93
|
+
'The cat ___.'
|
|
94
|
+
>>> len(item.unfilled_slots)
|
|
95
|
+
1
|
|
96
|
+
>>> item.unfilled_slots[0].slot_name
|
|
97
|
+
'verb'
|
|
98
|
+
>>> item.unfilled_slots[0].position
|
|
99
|
+
2
|
|
100
|
+
"""
|
|
101
|
+
if filled_slots is None:
|
|
102
|
+
filled_slots = {}
|
|
103
|
+
|
|
104
|
+
# Validate parameters
|
|
105
|
+
_validate_cloze_parameters(template, unfilled_slot_names, filled_slots)
|
|
106
|
+
|
|
107
|
+
# Render template with filled values and "___" for unfilled slots
|
|
108
|
+
rendered_text = _render_template_for_cloze(
|
|
109
|
+
template.template_string, filled_slots, unfilled_slot_names
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Calculate positions for unfilled slots
|
|
113
|
+
positions = _calculate_positions(
|
|
114
|
+
template.template_string, unfilled_slot_names, filled_slots
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Extract constraint IDs for each unfilled slot
|
|
118
|
+
unfilled_slots_list: list[UnfilledSlot] = []
|
|
119
|
+
for slot_name in unfilled_slot_names:
|
|
120
|
+
position = positions[slot_name]
|
|
121
|
+
constraint_ids = _extract_constraint_ids(template, slot_name)
|
|
122
|
+
unfilled_slots_list.append(
|
|
123
|
+
UnfilledSlot(
|
|
124
|
+
slot_name=slot_name, position=position, constraint_ids=constraint_ids
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Build rendered_elements
|
|
129
|
+
rendered_elements: dict[str, str] = {"text": rendered_text}
|
|
130
|
+
if instructions:
|
|
131
|
+
rendered_elements["instructions"] = instructions
|
|
132
|
+
|
|
133
|
+
# Build item_metadata
|
|
134
|
+
# Convert filled_slots to MetadataValue format
|
|
135
|
+
filled_slots_metadata: dict[str, MetadataValue] = dict(filled_slots)
|
|
136
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
137
|
+
"template_id": str(template.id),
|
|
138
|
+
"filled_slots": filled_slots_metadata,
|
|
139
|
+
"n_unfilled_slots": len(unfilled_slot_names),
|
|
140
|
+
}
|
|
141
|
+
if metadata:
|
|
142
|
+
item_metadata.update(metadata)
|
|
143
|
+
|
|
144
|
+
if item_template_id is None:
|
|
145
|
+
item_template_id = uuid4()
|
|
146
|
+
|
|
147
|
+
return Item(
|
|
148
|
+
item_template_id=item_template_id,
|
|
149
|
+
rendered_elements=rendered_elements,
|
|
150
|
+
unfilled_slots=unfilled_slots_list,
|
|
151
|
+
item_metadata=item_metadata,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def create_cloze_items_from_template(
|
|
156
|
+
template: Any,
|
|
157
|
+
n_unfilled: int = 1,
|
|
158
|
+
strategy: str = "all_combinations",
|
|
159
|
+
unfilled_combinations: list[list[str]] | None = None,
|
|
160
|
+
instructions: str | None = None,
|
|
161
|
+
*,
|
|
162
|
+
item_template_id: UUID | None = None,
|
|
163
|
+
metadata_fn: Callable[[list[str]], dict[str, MetadataValue]] | None = None,
|
|
164
|
+
) -> list[Item]:
|
|
165
|
+
"""Create multiple cloze items from a template, varying unfilled slots.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
template : Template
|
|
170
|
+
Source template.
|
|
171
|
+
n_unfilled : int
|
|
172
|
+
Number of slots to leave unfilled per item (default: 1).
|
|
173
|
+
strategy : str
|
|
174
|
+
How to choose unfilled slots:
|
|
175
|
+
- 'random': Randomly sample combinations
|
|
176
|
+
- 'all_combinations': Generate all C(n_slots, n_unfilled) combinations
|
|
177
|
+
- 'specified': Use provided list
|
|
178
|
+
unfilled_combinations : list[list[str]] | None
|
|
179
|
+
For strategy='specified', list of slot name combinations to unfill.
|
|
180
|
+
instructions : str | None
|
|
181
|
+
Instructions for all items.
|
|
182
|
+
item_template_id : UUID | None
|
|
183
|
+
Template ID for all items.
|
|
184
|
+
metadata_fn : Callable[[list[str]], dict[str, MetadataValue]] | None
|
|
185
|
+
Generate metadata from unfilled slot names.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
list[Item]
|
|
190
|
+
Cloze items with varying unfilled slots.
|
|
191
|
+
|
|
192
|
+
Raises
|
|
193
|
+
------
|
|
194
|
+
ValueError
|
|
195
|
+
If n_unfilled invalid, if strategy='specified' without unfilled_combinations,
|
|
196
|
+
or if any combination contains invalid slots.
|
|
197
|
+
|
|
198
|
+
Examples
|
|
199
|
+
--------
|
|
200
|
+
>>> # Generate all single-slot cloze items
|
|
201
|
+
>>> items = create_cloze_items_from_template(
|
|
202
|
+
... template, n_unfilled=1, strategy='all_combinations'
|
|
203
|
+
... )
|
|
204
|
+
>>> len(items) # One for each slot
|
|
205
|
+
3
|
|
206
|
+
"""
|
|
207
|
+
slot_names = list(template.slots.keys())
|
|
208
|
+
|
|
209
|
+
# Validate n_unfilled
|
|
210
|
+
if n_unfilled < 1:
|
|
211
|
+
raise ValueError(
|
|
212
|
+
f"n_unfilled must be at least 1, got {n_unfilled}. "
|
|
213
|
+
f"Provide a positive number of slots to leave unfilled."
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if n_unfilled >= len(slot_names):
|
|
217
|
+
raise ValueError(
|
|
218
|
+
f"n_unfilled ({n_unfilled}) must be less than total slots "
|
|
219
|
+
f"({len(slot_names)}). Cannot unfill all slots in a cloze item."
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Generate combinations based on strategy
|
|
223
|
+
if strategy == "all_combinations":
|
|
224
|
+
combos = list(combinations(slot_names, n_unfilled))
|
|
225
|
+
elif strategy == "specified":
|
|
226
|
+
if unfilled_combinations is None:
|
|
227
|
+
raise ValueError(
|
|
228
|
+
"strategy='specified' requires unfilled_combinations parameter. "
|
|
229
|
+
"Provide a list of slot name combinations to unfill."
|
|
230
|
+
)
|
|
231
|
+
combos = [tuple(c) for c in unfilled_combinations]
|
|
232
|
+
elif strategy == "random":
|
|
233
|
+
# Generate one random combination (can be extended to generate N random ones)
|
|
234
|
+
combos = [tuple(random.sample(slot_names, n_unfilled))]
|
|
235
|
+
else:
|
|
236
|
+
raise ValueError(
|
|
237
|
+
f"Invalid strategy '{strategy}'. "
|
|
238
|
+
f"Must be one of ['random', 'all_combinations', 'specified']."
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Validate all combinations
|
|
242
|
+
for combo in combos:
|
|
243
|
+
if len(combo) != n_unfilled:
|
|
244
|
+
raise ValueError(
|
|
245
|
+
f"Each combination must have exactly {n_unfilled} slots, "
|
|
246
|
+
f"but got {len(combo)}: {combo}"
|
|
247
|
+
)
|
|
248
|
+
for slot_name in combo:
|
|
249
|
+
if slot_name not in template.slots:
|
|
250
|
+
raise ValueError(
|
|
251
|
+
f"Slot '{slot_name}' in combination not found in template. "
|
|
252
|
+
f"Available slots: {list(template.slots.keys())}"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Create items
|
|
256
|
+
items: list[Item] = []
|
|
257
|
+
for combo in combos:
|
|
258
|
+
unfilled_list = list(combo)
|
|
259
|
+
|
|
260
|
+
# Generate metadata if function provided
|
|
261
|
+
item_metadata = metadata_fn(unfilled_list) if metadata_fn else None
|
|
262
|
+
|
|
263
|
+
item = create_cloze_item(
|
|
264
|
+
template=template,
|
|
265
|
+
unfilled_slot_names=unfilled_list,
|
|
266
|
+
filled_slots=None, # Don't pre-fill any slots
|
|
267
|
+
instructions=instructions,
|
|
268
|
+
item_template_id=item_template_id,
|
|
269
|
+
metadata=item_metadata,
|
|
270
|
+
)
|
|
271
|
+
items.append(item)
|
|
272
|
+
|
|
273
|
+
return items
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def create_simple_cloze_item(
|
|
277
|
+
text: str,
|
|
278
|
+
blank_positions: list[int],
|
|
279
|
+
blank_labels: list[str] | None = None,
|
|
280
|
+
instructions: str | None = None,
|
|
281
|
+
*,
|
|
282
|
+
item_template_id: UUID | None = None,
|
|
283
|
+
metadata: dict[str, MetadataValue] | None = None,
|
|
284
|
+
) -> Item:
|
|
285
|
+
"""Create a cloze item from plain text (no template).
|
|
286
|
+
|
|
287
|
+
Replaces words at specified positions with blanks. This is a simplified
|
|
288
|
+
helper for creating cloze items without the template infrastructure.
|
|
289
|
+
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
292
|
+
text : str
|
|
293
|
+
Full text with no blanks.
|
|
294
|
+
blank_positions : list[int]
|
|
295
|
+
Word positions to blank (0-indexed).
|
|
296
|
+
blank_labels : list[str] | None
|
|
297
|
+
Optional labels for blanks (for slot_name field). If None, uses
|
|
298
|
+
generic labels like "blank_0", "blank_1".
|
|
299
|
+
instructions : str | None
|
|
300
|
+
Optional instructions.
|
|
301
|
+
item_template_id : UUID | None
|
|
302
|
+
Template ID for the item.
|
|
303
|
+
metadata : dict[str, MetadataValue] | None
|
|
304
|
+
Additional metadata.
|
|
305
|
+
|
|
306
|
+
Returns
|
|
307
|
+
-------
|
|
308
|
+
Item
|
|
309
|
+
Cloze item with text-based blanks.
|
|
310
|
+
|
|
311
|
+
Raises
|
|
312
|
+
------
|
|
313
|
+
ValueError
|
|
314
|
+
If blank_positions out of range or if blank_labels length mismatch.
|
|
315
|
+
|
|
316
|
+
Examples
|
|
317
|
+
--------
|
|
318
|
+
>>> item = create_simple_cloze_item(
|
|
319
|
+
... text="The quick brown fox",
|
|
320
|
+
... blank_positions=[1], # "quick"
|
|
321
|
+
... blank_labels=["adjective"]
|
|
322
|
+
... )
|
|
323
|
+
>>> item.rendered_elements["text"]
|
|
324
|
+
'The ___ brown fox'
|
|
325
|
+
>>> item.unfilled_slots[0].slot_name
|
|
326
|
+
'adjective'
|
|
327
|
+
>>> item.unfilled_slots[0].position
|
|
328
|
+
1
|
|
329
|
+
"""
|
|
330
|
+
if not text or not text.strip():
|
|
331
|
+
raise ValueError("text cannot be empty")
|
|
332
|
+
|
|
333
|
+
if not blank_positions:
|
|
334
|
+
raise ValueError(
|
|
335
|
+
"blank_positions cannot be empty. "
|
|
336
|
+
"Provide at least one position to blank out."
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# Tokenize text by whitespace
|
|
340
|
+
tokens = text.split()
|
|
341
|
+
|
|
342
|
+
# Validate positions
|
|
343
|
+
for pos in blank_positions:
|
|
344
|
+
if pos < 0 or pos >= len(tokens):
|
|
345
|
+
raise ValueError(
|
|
346
|
+
f"blank_position {pos} is out of range. "
|
|
347
|
+
f"Text has {len(tokens)} tokens (valid range: 0-{len(tokens) - 1})"
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Validate labels if provided
|
|
351
|
+
if blank_labels is not None:
|
|
352
|
+
if len(blank_labels) != len(blank_positions):
|
|
353
|
+
raise ValueError(
|
|
354
|
+
f"blank_labels length ({len(blank_labels)}) must match "
|
|
355
|
+
f"blank_positions length ({len(blank_positions)})"
|
|
356
|
+
)
|
|
357
|
+
else:
|
|
358
|
+
# Generate default labels
|
|
359
|
+
blank_labels = [f"blank_{i}" for i in range(len(blank_positions))]
|
|
360
|
+
|
|
361
|
+
# Create unfilled slots
|
|
362
|
+
unfilled_slots_list: list[UnfilledSlot] = []
|
|
363
|
+
for pos, label in zip(blank_positions, blank_labels, strict=True):
|
|
364
|
+
unfilled_slots_list.append(
|
|
365
|
+
UnfilledSlot(slot_name=label, position=pos, constraint_ids=[])
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# Replace tokens at blank positions with "___"
|
|
369
|
+
blanked_tokens = tokens.copy()
|
|
370
|
+
for pos in blank_positions:
|
|
371
|
+
blanked_tokens[pos] = "___"
|
|
372
|
+
rendered_text = " ".join(blanked_tokens)
|
|
373
|
+
|
|
374
|
+
# Build rendered_elements
|
|
375
|
+
rendered_elements: dict[str, str] = {"text": rendered_text}
|
|
376
|
+
if instructions:
|
|
377
|
+
rendered_elements["instructions"] = instructions
|
|
378
|
+
|
|
379
|
+
# Build item_metadata
|
|
380
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
381
|
+
"n_unfilled_slots": len(blank_positions),
|
|
382
|
+
"original_text": text,
|
|
383
|
+
}
|
|
384
|
+
if metadata:
|
|
385
|
+
item_metadata.update(metadata)
|
|
386
|
+
|
|
387
|
+
if item_template_id is None:
|
|
388
|
+
item_template_id = uuid4()
|
|
389
|
+
|
|
390
|
+
return Item(
|
|
391
|
+
item_template_id=item_template_id,
|
|
392
|
+
rendered_elements=rendered_elements,
|
|
393
|
+
unfilled_slots=unfilled_slots_list,
|
|
394
|
+
item_metadata=item_metadata,
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def create_cloze_items_from_groups(
|
|
399
|
+
items: list[Item],
|
|
400
|
+
group_by: Callable[[Item], Any],
|
|
401
|
+
n_slots_to_unfill: int = 1,
|
|
402
|
+
*,
|
|
403
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
404
|
+
include_group_metadata: bool = True,
|
|
405
|
+
item_template_id: UUID | None = None,
|
|
406
|
+
) -> list[Item]:
|
|
407
|
+
"""Create cloze items from grouped source items.
|
|
408
|
+
|
|
409
|
+
Groups items and creates cloze items from them. If source items have
|
|
410
|
+
template metadata, uses template-based cloze. Otherwise, falls back to
|
|
411
|
+
simple text-based cloze.
|
|
412
|
+
|
|
413
|
+
Parameters
|
|
414
|
+
----------
|
|
415
|
+
items : list[Item]
|
|
416
|
+
Source items to group.
|
|
417
|
+
group_by : Callable[[Item], Any]
|
|
418
|
+
Grouping function.
|
|
419
|
+
n_slots_to_unfill : int
|
|
420
|
+
Number of slots/words to unfill.
|
|
421
|
+
extract_text : Callable[[Item], str] | None
|
|
422
|
+
Text extraction function. If None, tries common keys.
|
|
423
|
+
include_group_metadata : bool
|
|
424
|
+
Whether to include group_key in metadata.
|
|
425
|
+
item_template_id : UUID | None
|
|
426
|
+
Template ID for created items.
|
|
427
|
+
|
|
428
|
+
Returns
|
|
429
|
+
-------
|
|
430
|
+
list[Item]
|
|
431
|
+
Cloze items from grouped source items.
|
|
432
|
+
|
|
433
|
+
Examples
|
|
434
|
+
--------
|
|
435
|
+
>>> cloze_items = create_cloze_items_from_groups(
|
|
436
|
+
... items=source_items,
|
|
437
|
+
... group_by=lambda i: i.item_metadata.get("category"),
|
|
438
|
+
... n_slots_to_unfill=1
|
|
439
|
+
... ) # doctest: +SKIP
|
|
440
|
+
"""
|
|
441
|
+
# Group items
|
|
442
|
+
groups: dict[Any, list[Item]] = defaultdict(list)
|
|
443
|
+
for item in items:
|
|
444
|
+
group_key = group_by(item)
|
|
445
|
+
groups[group_key].append(item)
|
|
446
|
+
|
|
447
|
+
cloze_items: list[Item] = []
|
|
448
|
+
|
|
449
|
+
for group_key, group_items in groups.items():
|
|
450
|
+
for item in group_items:
|
|
451
|
+
# Extract text
|
|
452
|
+
if extract_text:
|
|
453
|
+
text: str = extract_text(item)
|
|
454
|
+
else:
|
|
455
|
+
text = _extract_text_from_item(item)
|
|
456
|
+
|
|
457
|
+
# Build metadata
|
|
458
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
459
|
+
"source_item_id": str(item.id),
|
|
460
|
+
}
|
|
461
|
+
if include_group_metadata:
|
|
462
|
+
item_metadata["group_key"] = str(group_key)
|
|
463
|
+
|
|
464
|
+
# Create simple text-based cloze (fallback without template)
|
|
465
|
+
# Blank out the first n_slots_to_unfill words
|
|
466
|
+
tokens = text.split()
|
|
467
|
+
if n_slots_to_unfill > len(tokens):
|
|
468
|
+
# Skip items that are too short
|
|
469
|
+
continue
|
|
470
|
+
|
|
471
|
+
blank_positions = list(range(n_slots_to_unfill))
|
|
472
|
+
|
|
473
|
+
cloze_item = create_simple_cloze_item(
|
|
474
|
+
text=text,
|
|
475
|
+
blank_positions=blank_positions,
|
|
476
|
+
item_template_id=item_template_id,
|
|
477
|
+
metadata=item_metadata,
|
|
478
|
+
)
|
|
479
|
+
cloze_items.append(cloze_item)
|
|
480
|
+
|
|
481
|
+
return cloze_items
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def create_filtered_cloze_items(
|
|
485
|
+
templates: list[Any],
|
|
486
|
+
n_slots_to_unfill: int = 1,
|
|
487
|
+
*,
|
|
488
|
+
template_filter: Callable[[Any], bool] | None = None,
|
|
489
|
+
slot_filter: Callable[[str, Any], bool] | None = None,
|
|
490
|
+
item_template_id: UUID | None = None,
|
|
491
|
+
) -> list[Item]:
|
|
492
|
+
"""Create cloze items with multi-level filtering.
|
|
493
|
+
|
|
494
|
+
Filters templates and/or slots before creating cloze items.
|
|
495
|
+
|
|
496
|
+
Parameters
|
|
497
|
+
----------
|
|
498
|
+
templates : list[Template]
|
|
499
|
+
Source templates.
|
|
500
|
+
n_slots_to_unfill : int
|
|
501
|
+
Number of slots to unfill.
|
|
502
|
+
template_filter : Callable[[Template], bool] | None
|
|
503
|
+
Filter templates.
|
|
504
|
+
slot_filter : Callable[[str, Slot], bool] | None
|
|
505
|
+
Filter which slots can be unfilled (receives slot_name and Slot object).
|
|
506
|
+
item_template_id : UUID | None
|
|
507
|
+
Template ID for created items.
|
|
508
|
+
|
|
509
|
+
Returns
|
|
510
|
+
-------
|
|
511
|
+
list[Item]
|
|
512
|
+
Filtered cloze items.
|
|
513
|
+
|
|
514
|
+
Examples
|
|
515
|
+
--------
|
|
516
|
+
>>> # Only unfill slots with constraints
|
|
517
|
+
>>> cloze_items = create_filtered_cloze_items(
|
|
518
|
+
... templates=all_templates,
|
|
519
|
+
... n_slots_to_unfill=1,
|
|
520
|
+
... template_filter=lambda t: len(t.slots) >= 3,
|
|
521
|
+
... slot_filter=lambda name, slot: len(slot.constraints) > 0
|
|
522
|
+
... ) # doctest: +SKIP
|
|
523
|
+
"""
|
|
524
|
+
# Filter templates
|
|
525
|
+
filtered_templates = templates
|
|
526
|
+
if template_filter:
|
|
527
|
+
filtered_templates = [t for t in templates if template_filter(t)]
|
|
528
|
+
|
|
529
|
+
cloze_items: list[Item] = []
|
|
530
|
+
|
|
531
|
+
for template in filtered_templates:
|
|
532
|
+
# Filter slots if slot_filter provided
|
|
533
|
+
available_slots = list(template.slots.keys())
|
|
534
|
+
if slot_filter:
|
|
535
|
+
available_slots = [
|
|
536
|
+
name
|
|
537
|
+
for name in available_slots
|
|
538
|
+
if slot_filter(name, template.slots[name])
|
|
539
|
+
]
|
|
540
|
+
|
|
541
|
+
# Skip if not enough slots
|
|
542
|
+
if len(available_slots) < n_slots_to_unfill:
|
|
543
|
+
continue
|
|
544
|
+
|
|
545
|
+
# Create cloze items from this template
|
|
546
|
+
items = create_cloze_items_from_template(
|
|
547
|
+
template=template,
|
|
548
|
+
n_unfilled=n_slots_to_unfill,
|
|
549
|
+
strategy="all_combinations",
|
|
550
|
+
item_template_id=item_template_id,
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
# Further filter items if slot_filter was used
|
|
554
|
+
if slot_filter:
|
|
555
|
+
# Only keep items where all unfilled slots pass the filter
|
|
556
|
+
items = [
|
|
557
|
+
item
|
|
558
|
+
for item in items
|
|
559
|
+
if all(
|
|
560
|
+
slot.slot_name in available_slots for slot in item.unfilled_slots
|
|
561
|
+
)
|
|
562
|
+
]
|
|
563
|
+
|
|
564
|
+
cloze_items.extend(items)
|
|
565
|
+
|
|
566
|
+
return cloze_items
|
|
567
|
+
|
|
568
|
+
|
|
569
|
+
def _extract_text_from_item(item: Item) -> str:
|
|
570
|
+
"""Extract text from item's rendered_elements.
|
|
571
|
+
|
|
572
|
+
Tries common keys: "text", "sentence", "content".
|
|
573
|
+
Raises error if no suitable text found.
|
|
574
|
+
|
|
575
|
+
Parameters
|
|
576
|
+
----------
|
|
577
|
+
item : Item
|
|
578
|
+
Item to extract text from.
|
|
579
|
+
|
|
580
|
+
Returns
|
|
581
|
+
-------
|
|
582
|
+
str
|
|
583
|
+
Extracted text.
|
|
584
|
+
|
|
585
|
+
Raises
|
|
586
|
+
------
|
|
587
|
+
ValueError
|
|
588
|
+
If no suitable text key found in rendered_elements.
|
|
589
|
+
"""
|
|
590
|
+
for key in ["text", "sentence", "content"]:
|
|
591
|
+
if key in item.rendered_elements:
|
|
592
|
+
return item.rendered_elements[key]
|
|
593
|
+
|
|
594
|
+
raise ValueError(
|
|
595
|
+
f"Cannot extract text from item {item.id}. "
|
|
596
|
+
f"Expected one of ['text', 'sentence', 'content'] in rendered_elements, "
|
|
597
|
+
f"but found keys: {list(item.rendered_elements.keys())}. "
|
|
598
|
+
f"Use the extract_text parameter to provide a custom extraction function."
|
|
599
|
+
)
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
# Helper functions
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def _validate_cloze_parameters(
|
|
606
|
+
template: Any, unfilled_slot_names: list[str], filled_slots: dict[str, str]
|
|
607
|
+
) -> None:
|
|
608
|
+
"""Validate cloze item parameters.
|
|
609
|
+
|
|
610
|
+
Raises
|
|
611
|
+
------
|
|
612
|
+
ValueError
|
|
613
|
+
If validation fails with descriptive message.
|
|
614
|
+
"""
|
|
615
|
+
# Check unfilled_slot_names not empty
|
|
616
|
+
if not unfilled_slot_names:
|
|
617
|
+
raise ValueError(
|
|
618
|
+
"Must have at least 1 unfilled slot. "
|
|
619
|
+
"Provide at least one slot name in unfilled_slot_names parameter."
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
# Check all unfilled slots exist in template
|
|
623
|
+
for slot_name in unfilled_slot_names:
|
|
624
|
+
if slot_name not in template.slots:
|
|
625
|
+
raise ValueError(
|
|
626
|
+
f"Unfilled slot '{slot_name}' not found in template. "
|
|
627
|
+
f"Available slots: {list(template.slots.keys())}"
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
# Check filled_slots if provided
|
|
631
|
+
if filled_slots:
|
|
632
|
+
for slot_name in filled_slots.keys():
|
|
633
|
+
if slot_name not in template.slots:
|
|
634
|
+
raise ValueError(
|
|
635
|
+
f"Filled slot '{slot_name}' not found in template. "
|
|
636
|
+
f"Available slots: {list(template.slots.keys())}"
|
|
637
|
+
)
|
|
638
|
+
|
|
639
|
+
# Check no overlap
|
|
640
|
+
overlap = set(unfilled_slot_names) & set(filled_slots.keys())
|
|
641
|
+
if overlap:
|
|
642
|
+
raise ValueError(
|
|
643
|
+
f"Slots cannot be both filled and unfilled. "
|
|
644
|
+
f"Overlapping slots: {overlap}"
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def _render_template_for_cloze(
|
|
649
|
+
template_string: str, filled_slots: dict[str, str], unfilled_slot_names: list[str]
|
|
650
|
+
) -> str:
|
|
651
|
+
"""Render template with filled values and '___' for unfilled slots.
|
|
652
|
+
|
|
653
|
+
Parameters
|
|
654
|
+
----------
|
|
655
|
+
template_string : str
|
|
656
|
+
Template string with {slot_name} placeholders.
|
|
657
|
+
filled_slots : dict[str, str]
|
|
658
|
+
Mapping of slot names to fill values.
|
|
659
|
+
unfilled_slot_names : list[str]
|
|
660
|
+
Names of slots to leave unfilled (replaced with "___").
|
|
661
|
+
|
|
662
|
+
Returns
|
|
663
|
+
-------
|
|
664
|
+
str
|
|
665
|
+
Rendered template string.
|
|
666
|
+
"""
|
|
667
|
+
result = template_string
|
|
668
|
+
|
|
669
|
+
# Replace unfilled slots with "___"
|
|
670
|
+
for slot_name in unfilled_slot_names:
|
|
671
|
+
result = result.replace(f"{{{slot_name}}}", "___")
|
|
672
|
+
|
|
673
|
+
# Replace filled slots with their values
|
|
674
|
+
for slot_name, value in filled_slots.items():
|
|
675
|
+
result = result.replace(f"{{{slot_name}}}", value)
|
|
676
|
+
|
|
677
|
+
return result
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def _calculate_positions(
|
|
681
|
+
template_string: str, unfilled_slot_names: list[str], filled_slots: dict[str, str]
|
|
682
|
+
) -> dict[str, int]:
|
|
683
|
+
"""Calculate token positions for unfilled slots.
|
|
684
|
+
|
|
685
|
+
Parameters
|
|
686
|
+
----------
|
|
687
|
+
template_string : str
|
|
688
|
+
Template string with {slot_name} placeholders.
|
|
689
|
+
unfilled_slot_names : list[str]
|
|
690
|
+
Names of slots that are unfilled.
|
|
691
|
+
filled_slots : dict[str, str]
|
|
692
|
+
Mapping of slot names to fill values.
|
|
693
|
+
|
|
694
|
+
Returns
|
|
695
|
+
-------
|
|
696
|
+
dict[str, int]
|
|
697
|
+
Mapping from slot_name to position (token index, 0-indexed).
|
|
698
|
+
"""
|
|
699
|
+
# Extract all slot placeholders in order
|
|
700
|
+
slot_pattern = re.compile(r"\{(\w+)\}")
|
|
701
|
+
slot_matches = slot_pattern.finditer(template_string)
|
|
702
|
+
|
|
703
|
+
positions: dict[str, int] = {}
|
|
704
|
+
token_index = 0
|
|
705
|
+
|
|
706
|
+
# Track position in template string
|
|
707
|
+
last_end = 0
|
|
708
|
+
|
|
709
|
+
for match in slot_matches:
|
|
710
|
+
slot_name = match.group(1)
|
|
711
|
+
|
|
712
|
+
# Count tokens before this slot
|
|
713
|
+
text_before = template_string[last_end : match.start()]
|
|
714
|
+
# Split by whitespace and count non-empty tokens
|
|
715
|
+
tokens_before = [t for t in text_before.split() if t]
|
|
716
|
+
token_index += len(tokens_before)
|
|
717
|
+
|
|
718
|
+
# This slot becomes one token (either filled value or "___")
|
|
719
|
+
if slot_name in unfilled_slot_names:
|
|
720
|
+
positions[slot_name] = token_index
|
|
721
|
+
|
|
722
|
+
token_index += 1
|
|
723
|
+
last_end = match.end()
|
|
724
|
+
|
|
725
|
+
return positions
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _extract_constraint_ids(template: Any, slot_name: str) -> list[UUID]:
|
|
729
|
+
"""Extract constraint UUIDs from a template slot.
|
|
730
|
+
|
|
731
|
+
Parameters
|
|
732
|
+
----------
|
|
733
|
+
template : Template
|
|
734
|
+
Source template.
|
|
735
|
+
slot_name : str
|
|
736
|
+
Name of slot to extract constraints from.
|
|
737
|
+
|
|
738
|
+
Returns
|
|
739
|
+
-------
|
|
740
|
+
list[UUID]
|
|
741
|
+
Constraint UUIDs for this slot.
|
|
742
|
+
"""
|
|
743
|
+
if slot_name not in template.slots:
|
|
744
|
+
return []
|
|
745
|
+
|
|
746
|
+
slot = template.slots[slot_name]
|
|
747
|
+
|
|
748
|
+
if not hasattr(slot, "constraints") or slot.constraints is None:
|
|
749
|
+
return []
|
|
750
|
+
|
|
751
|
+
# Extract UUIDs from Constraint objects
|
|
752
|
+
constraint_ids: list[UUID] = []
|
|
753
|
+
for constraint in slot.constraints:
|
|
754
|
+
if hasattr(constraint, "id"):
|
|
755
|
+
constraint_ids.append(constraint.id)
|
|
756
|
+
|
|
757
|
+
return constraint_ids
|