bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
"""Utilities for creating multi-select experimental items.
|
|
2
|
+
|
|
3
|
+
This module provides language-agnostic utilities for creating multi-select
|
|
4
|
+
items where participants select one or more options from a set (checkboxes).
|
|
5
|
+
|
|
6
|
+
Integration Points
|
|
7
|
+
------------------
|
|
8
|
+
- Active Learning: bead/active_learning/models/multi_select.py
|
|
9
|
+
- Simulation: bead/simulation/strategies/multi_select.py
|
|
10
|
+
- Deployment: bead/deployment/jspsych/ (checkbox plugin)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from collections.abc import Callable
|
|
17
|
+
from itertools import combinations, product
|
|
18
|
+
from typing import Any
|
|
19
|
+
from uuid import UUID, uuid4
|
|
20
|
+
|
|
21
|
+
from bead.items.item import Item, MetadataValue
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def create_multi_select_item(
|
|
25
|
+
*options: str,
|
|
26
|
+
min_selections: int = 1,
|
|
27
|
+
max_selections: int | None = None,
|
|
28
|
+
item_template_id: UUID | None = None,
|
|
29
|
+
metadata: dict[str, MetadataValue] | None = None,
|
|
30
|
+
) -> Item:
|
|
31
|
+
"""Create a multi-select item from N text options.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
*options : str
|
|
36
|
+
Text for each option (2 or more required).
|
|
37
|
+
min_selections : int
|
|
38
|
+
Minimum number of options that must be selected (default: 1).
|
|
39
|
+
max_selections : int | None
|
|
40
|
+
Maximum number of options that can be selected. If None, defaults to
|
|
41
|
+
number of options (no upper limit).
|
|
42
|
+
item_template_id : UUID | None
|
|
43
|
+
Template ID for the item. If None, generates new UUID.
|
|
44
|
+
metadata : dict[str, MetadataValue] | None
|
|
45
|
+
Additional metadata for item_metadata field.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
Item
|
|
50
|
+
Multi-select item with options stored in the options field.
|
|
51
|
+
|
|
52
|
+
Raises
|
|
53
|
+
------
|
|
54
|
+
ValueError
|
|
55
|
+
If fewer than 2 options provided, or if min_selections > max_selections,
|
|
56
|
+
or if min_selections < 1, or if max_selections > number of options.
|
|
57
|
+
|
|
58
|
+
Examples
|
|
59
|
+
--------
|
|
60
|
+
>>> item = create_multi_select_item(
|
|
61
|
+
... "She walks.",
|
|
62
|
+
... "She walk.",
|
|
63
|
+
... "They walks.",
|
|
64
|
+
... "They walk.",
|
|
65
|
+
... min_selections=1,
|
|
66
|
+
... max_selections=4,
|
|
67
|
+
... metadata={"task": "select_grammatical"}
|
|
68
|
+
... )
|
|
69
|
+
>>> item.options[0]
|
|
70
|
+
'She walks.'
|
|
71
|
+
>>> item.item_metadata["min_selections"]
|
|
72
|
+
1
|
|
73
|
+
>>> item.item_metadata["max_selections"]
|
|
74
|
+
4
|
|
75
|
+
|
|
76
|
+
>>> # Multi-select with default max (all options)
|
|
77
|
+
>>> item = create_multi_select_item(
|
|
78
|
+
... "Option A",
|
|
79
|
+
... "Option B",
|
|
80
|
+
... "Option C"
|
|
81
|
+
... )
|
|
82
|
+
>>> item.item_metadata["max_selections"]
|
|
83
|
+
3
|
|
84
|
+
"""
|
|
85
|
+
if len(options) < 2:
|
|
86
|
+
raise ValueError("At least 2 options required for multi-select item")
|
|
87
|
+
|
|
88
|
+
if max_selections is None:
|
|
89
|
+
max_selections = len(options)
|
|
90
|
+
|
|
91
|
+
if min_selections < 1:
|
|
92
|
+
raise ValueError("min_selections must be at least 1")
|
|
93
|
+
|
|
94
|
+
if min_selections > max_selections:
|
|
95
|
+
raise ValueError("min_selections cannot be greater than max_selections")
|
|
96
|
+
|
|
97
|
+
if max_selections > len(options):
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"max_selections ({max_selections}) cannot exceed "
|
|
100
|
+
f"number of options ({len(options)})"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if item_template_id is None:
|
|
104
|
+
item_template_id = uuid4()
|
|
105
|
+
|
|
106
|
+
# Build item metadata
|
|
107
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
108
|
+
"min_selections": min_selections,
|
|
109
|
+
"max_selections": max_selections,
|
|
110
|
+
}
|
|
111
|
+
if metadata:
|
|
112
|
+
item_metadata.update(metadata)
|
|
113
|
+
|
|
114
|
+
return Item(
|
|
115
|
+
item_template_id=item_template_id,
|
|
116
|
+
options=list(options),
|
|
117
|
+
item_metadata=item_metadata,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def create_multi_select_items_from_groups(
|
|
122
|
+
items: list[Item],
|
|
123
|
+
group_by: Callable[[Item], Any],
|
|
124
|
+
n_options: int | None = None,
|
|
125
|
+
min_selections: int = 1,
|
|
126
|
+
max_selections: int | None = None,
|
|
127
|
+
*,
|
|
128
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
129
|
+
include_group_metadata: bool = True,
|
|
130
|
+
item_template_id: UUID | None = None,
|
|
131
|
+
) -> list[Item]:
|
|
132
|
+
"""Create multi-select items by grouping source items.
|
|
133
|
+
|
|
134
|
+
Groups items by a property, then creates multi-select items from each
|
|
135
|
+
group's items as options.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
items : list[Item]
|
|
140
|
+
Source items to group and combine.
|
|
141
|
+
group_by : Callable[[Item], Any]
|
|
142
|
+
Function to extract grouping key from items.
|
|
143
|
+
n_options : int | None
|
|
144
|
+
Number of options per multi-select item. If None, uses all items in
|
|
145
|
+
each group.
|
|
146
|
+
min_selections : int
|
|
147
|
+
Minimum number of selections required (default: 1).
|
|
148
|
+
max_selections : int | None
|
|
149
|
+
Maximum number of selections allowed. If None, defaults to n_options.
|
|
150
|
+
extract_text : Callable[[Item], str] | None
|
|
151
|
+
Function to extract text from item. If None, tries common keys
|
|
152
|
+
("text", "sentence", "content") from rendered_elements.
|
|
153
|
+
include_group_metadata : bool
|
|
154
|
+
Whether to include group key in item metadata.
|
|
155
|
+
item_template_id : UUID | None
|
|
156
|
+
Template ID for all created items. If None, generates one per item.
|
|
157
|
+
|
|
158
|
+
Returns
|
|
159
|
+
-------
|
|
160
|
+
list[Item]
|
|
161
|
+
Multi-select items created from groupings.
|
|
162
|
+
|
|
163
|
+
Examples
|
|
164
|
+
--------
|
|
165
|
+
Create multi-select items grouped by verb (select all acceptable frames):
|
|
166
|
+
>>> items = [
|
|
167
|
+
... Item(
|
|
168
|
+
... item_template_id=uuid4(),
|
|
169
|
+
... rendered_elements={"text": "She walks."},
|
|
170
|
+
... item_metadata={"verb": "walk", "frame": "intransitive"}
|
|
171
|
+
... ),
|
|
172
|
+
... Item(
|
|
173
|
+
... item_template_id=uuid4(),
|
|
174
|
+
... rendered_elements={"text": "She walks the dog."},
|
|
175
|
+
... item_metadata={"verb": "walk", "frame": "transitive"}
|
|
176
|
+
... ),
|
|
177
|
+
... Item(
|
|
178
|
+
... item_template_id=uuid4(),
|
|
179
|
+
... rendered_elements={"text": "She walks to school."},
|
|
180
|
+
... item_metadata={"verb": "walk", "frame": "intransitive_pp"}
|
|
181
|
+
... )
|
|
182
|
+
... ]
|
|
183
|
+
>>> ms_items = create_multi_select_items_from_groups(
|
|
184
|
+
... items,
|
|
185
|
+
... group_by=lambda item: item.item_metadata["verb"],
|
|
186
|
+
... min_selections=1,
|
|
187
|
+
... max_selections=3
|
|
188
|
+
... )
|
|
189
|
+
>>> len(ms_items)
|
|
190
|
+
1
|
|
191
|
+
>>> len(ms_items[0].rendered_elements)
|
|
192
|
+
3
|
|
193
|
+
"""
|
|
194
|
+
# Group items
|
|
195
|
+
groups: dict[Any, list[Item]] = defaultdict(list)
|
|
196
|
+
for item in items:
|
|
197
|
+
group_key = group_by(item)
|
|
198
|
+
groups[group_key].append(item)
|
|
199
|
+
|
|
200
|
+
# Create multi-select items from each group
|
|
201
|
+
ms_items: list[Item] = []
|
|
202
|
+
|
|
203
|
+
for group_key, group_items in groups.items():
|
|
204
|
+
# Validate n_options
|
|
205
|
+
if n_options is not None and n_options > len(group_items):
|
|
206
|
+
raise ValueError(
|
|
207
|
+
f"Group '{group_key}' has only {len(group_items)} item(s), "
|
|
208
|
+
f"but n_options={n_options} was requested. "
|
|
209
|
+
f"Cannot create {n_options}-option items from fewer items."
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# If n_options specified, create combinations
|
|
213
|
+
if n_options is not None and n_options < len(group_items):
|
|
214
|
+
item_combos = combinations(group_items, n_options)
|
|
215
|
+
else:
|
|
216
|
+
# Use all items in group as single combination
|
|
217
|
+
item_combos = [tuple(group_items)]
|
|
218
|
+
|
|
219
|
+
for combo in item_combos:
|
|
220
|
+
# Extract text from each item
|
|
221
|
+
texts: list[str] = []
|
|
222
|
+
for item in combo:
|
|
223
|
+
if extract_text:
|
|
224
|
+
text: str = extract_text(item)
|
|
225
|
+
else:
|
|
226
|
+
text = _extract_text_from_item(item)
|
|
227
|
+
texts.append(text)
|
|
228
|
+
|
|
229
|
+
# Build metadata
|
|
230
|
+
metadata: dict[str, MetadataValue] = {}
|
|
231
|
+
if include_group_metadata:
|
|
232
|
+
metadata["group_key"] = str(group_key)
|
|
233
|
+
|
|
234
|
+
# Include source item IDs
|
|
235
|
+
for i, item in enumerate(combo):
|
|
236
|
+
metadata[f"source_item_{i}_id"] = str(item.id)
|
|
237
|
+
|
|
238
|
+
# Create multi-select item
|
|
239
|
+
ms_item = create_multi_select_item(
|
|
240
|
+
*texts,
|
|
241
|
+
min_selections=min_selections,
|
|
242
|
+
max_selections=max_selections,
|
|
243
|
+
item_template_id=item_template_id,
|
|
244
|
+
metadata=metadata,
|
|
245
|
+
)
|
|
246
|
+
ms_items.append(ms_item)
|
|
247
|
+
|
|
248
|
+
return ms_items
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def create_multi_select_items_with_foils(
|
|
252
|
+
correct_items: list[Item],
|
|
253
|
+
foil_items: list[Item],
|
|
254
|
+
n_correct: int = 2,
|
|
255
|
+
n_foils: int = 2,
|
|
256
|
+
*,
|
|
257
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
258
|
+
item_template_id: UUID | None = None,
|
|
259
|
+
metadata_fn: (
|
|
260
|
+
Callable[[list[Item], list[Item]], dict[str, MetadataValue]] | None
|
|
261
|
+
) = None,
|
|
262
|
+
) -> list[Item]:
|
|
263
|
+
"""Create multi-select items by combining correct items with foils.
|
|
264
|
+
|
|
265
|
+
Useful for tasks like "Select all grammatical sentences" where some
|
|
266
|
+
options are correct and others are foils (distractors).
|
|
267
|
+
|
|
268
|
+
Parameters
|
|
269
|
+
----------
|
|
270
|
+
correct_items : list[Item]
|
|
271
|
+
Items that are correct (should be selected).
|
|
272
|
+
foil_items : list[Item]
|
|
273
|
+
Items that are foils/distractors (should not be selected).
|
|
274
|
+
n_correct : int
|
|
275
|
+
Number of correct items to include per multi-select item (default: 2).
|
|
276
|
+
n_foils : int
|
|
277
|
+
Number of foil items to include per multi-select item (default: 2).
|
|
278
|
+
extract_text : Callable[[Item], str] | None
|
|
279
|
+
Function to extract text from items.
|
|
280
|
+
item_template_id : UUID | None
|
|
281
|
+
Template ID for all created items.
|
|
282
|
+
metadata_fn : Callable[[list[Item], list[Item]], dict[str, MetadataValue]] | None
|
|
283
|
+
Function to generate metadata from (correct_items_used, foil_items_used).
|
|
284
|
+
|
|
285
|
+
Returns
|
|
286
|
+
-------
|
|
287
|
+
list[Item]
|
|
288
|
+
Multi-select items with correct items and foils.
|
|
289
|
+
|
|
290
|
+
Examples
|
|
291
|
+
--------
|
|
292
|
+
>>> grammatical = [
|
|
293
|
+
... Item(uuid4(), rendered_elements={"text": "She walks."},
|
|
294
|
+
... item_metadata={"grammatical": True}),
|
|
295
|
+
... Item(uuid4(), rendered_elements={"text": "They walk."},
|
|
296
|
+
... item_metadata={"grammatical": True})
|
|
297
|
+
... ]
|
|
298
|
+
>>> ungrammatical = [
|
|
299
|
+
... Item(uuid4(), rendered_elements={"text": "She walk."},
|
|
300
|
+
... item_metadata={"grammatical": False}),
|
|
301
|
+
... Item(uuid4(), rendered_elements={"text": "They walks."},
|
|
302
|
+
... item_metadata={"grammatical": False})
|
|
303
|
+
... ]
|
|
304
|
+
>>> ms_items = create_multi_select_items_with_foils(
|
|
305
|
+
... grammatical,
|
|
306
|
+
... ungrammatical,
|
|
307
|
+
... n_correct=2,
|
|
308
|
+
... n_foils=2
|
|
309
|
+
... )
|
|
310
|
+
>>> len(ms_items)
|
|
311
|
+
1
|
|
312
|
+
>>> ms_items[0].item_metadata["min_selections"]
|
|
313
|
+
1
|
|
314
|
+
>>> ms_items[0].item_metadata["max_selections"]
|
|
315
|
+
4
|
|
316
|
+
"""
|
|
317
|
+
# Generate combinations from each group
|
|
318
|
+
correct_combos = list(combinations(correct_items, n_correct))
|
|
319
|
+
foil_combos = list(combinations(foil_items, n_foils))
|
|
320
|
+
|
|
321
|
+
ms_items: list[Item] = []
|
|
322
|
+
|
|
323
|
+
# Cross-product of combinations
|
|
324
|
+
for correct_combo, foil_combo in product(correct_combos, foil_combos):
|
|
325
|
+
all_items = list(correct_combo) + list(foil_combo)
|
|
326
|
+
|
|
327
|
+
# Extract texts
|
|
328
|
+
texts: list[str] = []
|
|
329
|
+
for item in all_items:
|
|
330
|
+
if extract_text:
|
|
331
|
+
text: str = extract_text(item)
|
|
332
|
+
else:
|
|
333
|
+
text = _extract_text_from_item(item)
|
|
334
|
+
texts.append(text)
|
|
335
|
+
|
|
336
|
+
# Build metadata
|
|
337
|
+
metadata: dict[str, MetadataValue]
|
|
338
|
+
if metadata_fn:
|
|
339
|
+
metadata = metadata_fn(list(correct_combo), list(foil_combo))
|
|
340
|
+
else:
|
|
341
|
+
metadata = {
|
|
342
|
+
"correct_item_ids": [str(item.id) for item in correct_combo],
|
|
343
|
+
"foil_item_ids": [str(item.id) for item in foil_combo],
|
|
344
|
+
"n_correct": n_correct,
|
|
345
|
+
"n_foils": n_foils,
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
# Create multi-select item
|
|
349
|
+
# min_selections=1 (at least one must be selected)
|
|
350
|
+
# max_selections=total (all can be selected)
|
|
351
|
+
ms_item = create_multi_select_item(
|
|
352
|
+
*texts,
|
|
353
|
+
min_selections=1,
|
|
354
|
+
max_selections=len(texts),
|
|
355
|
+
item_template_id=item_template_id,
|
|
356
|
+
metadata=metadata,
|
|
357
|
+
)
|
|
358
|
+
ms_items.append(ms_item)
|
|
359
|
+
|
|
360
|
+
return ms_items
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def create_multi_select_items_cross_product(
|
|
364
|
+
group1_items: list[Item],
|
|
365
|
+
group2_items: list[Item],
|
|
366
|
+
n_from_group1: int = 1,
|
|
367
|
+
n_from_group2: int = 1,
|
|
368
|
+
min_selections: int = 1,
|
|
369
|
+
max_selections: int | None = None,
|
|
370
|
+
*,
|
|
371
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
372
|
+
item_template_id: UUID | None = None,
|
|
373
|
+
metadata_fn: (
|
|
374
|
+
Callable[[list[Item], list[Item]], dict[str, MetadataValue]] | None
|
|
375
|
+
) = None,
|
|
376
|
+
) -> list[Item]:
|
|
377
|
+
"""Create multi-select items from cross-product of two groups.
|
|
378
|
+
|
|
379
|
+
Combines n items from group1 with n items from group2 to create
|
|
380
|
+
multi-select items with (n_from_group1 + n_from_group2) options.
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
group1_items : list[Item]
|
|
385
|
+
Items in first group.
|
|
386
|
+
group2_items : list[Item]
|
|
387
|
+
Items in second group.
|
|
388
|
+
n_from_group1 : int
|
|
389
|
+
Number of items to select from group1 per combination (default: 1).
|
|
390
|
+
n_from_group2 : int
|
|
391
|
+
Number of items to select from group2 per combination (default: 1).
|
|
392
|
+
min_selections : int
|
|
393
|
+
Minimum number of selections required (default: 1).
|
|
394
|
+
max_selections : int | None
|
|
395
|
+
Maximum number of selections allowed. If None, defaults to total options.
|
|
396
|
+
extract_text : Callable[[Item], str] | None
|
|
397
|
+
Function to extract text from items.
|
|
398
|
+
item_template_id : UUID | None
|
|
399
|
+
Template ID for all created items.
|
|
400
|
+
metadata_fn : Callable[[list[Item], list[Item]], dict[str, MetadataValue]] | None
|
|
401
|
+
Function to generate metadata from (group1_items_used, group2_items_used).
|
|
402
|
+
|
|
403
|
+
Returns
|
|
404
|
+
-------
|
|
405
|
+
list[Item]
|
|
406
|
+
Multi-select items from cross-product.
|
|
407
|
+
|
|
408
|
+
Examples
|
|
409
|
+
--------
|
|
410
|
+
>>> active = [Item(uuid4(), rendered_elements={"text": "She walks."})]
|
|
411
|
+
>>> passive = [Item(uuid4(), rendered_elements={"text": "She is walked."})]
|
|
412
|
+
>>> ms_items = create_multi_select_items_cross_product(
|
|
413
|
+
... active, passive,
|
|
414
|
+
... n_from_group1=1,
|
|
415
|
+
... n_from_group2=1,
|
|
416
|
+
... min_selections=1,
|
|
417
|
+
... max_selections=2
|
|
418
|
+
... )
|
|
419
|
+
>>> len(ms_items)
|
|
420
|
+
1
|
|
421
|
+
"""
|
|
422
|
+
# Generate combinations from each group
|
|
423
|
+
group1_combos = list(combinations(group1_items, n_from_group1))
|
|
424
|
+
group2_combos = list(combinations(group2_items, n_from_group2))
|
|
425
|
+
|
|
426
|
+
ms_items: list[Item] = []
|
|
427
|
+
|
|
428
|
+
# Cross-product of combinations
|
|
429
|
+
for combo1, combo2 in product(group1_combos, group2_combos):
|
|
430
|
+
all_items = list(combo1) + list(combo2)
|
|
431
|
+
|
|
432
|
+
# Extract texts
|
|
433
|
+
texts: list[str] = []
|
|
434
|
+
for item in all_items:
|
|
435
|
+
if extract_text:
|
|
436
|
+
text: str = extract_text(item)
|
|
437
|
+
else:
|
|
438
|
+
text = _extract_text_from_item(item)
|
|
439
|
+
texts.append(text)
|
|
440
|
+
|
|
441
|
+
# Build metadata
|
|
442
|
+
metadata: dict[str, MetadataValue]
|
|
443
|
+
if metadata_fn:
|
|
444
|
+
metadata = metadata_fn(list(combo1), list(combo2))
|
|
445
|
+
else:
|
|
446
|
+
metadata = {
|
|
447
|
+
"source_group1_ids": [str(item.id) for item in combo1],
|
|
448
|
+
"source_group2_ids": [str(item.id) for item in combo2],
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
# Create multi-select item
|
|
452
|
+
ms_item = create_multi_select_item(
|
|
453
|
+
*texts,
|
|
454
|
+
min_selections=min_selections,
|
|
455
|
+
max_selections=max_selections,
|
|
456
|
+
item_template_id=item_template_id,
|
|
457
|
+
metadata=metadata,
|
|
458
|
+
)
|
|
459
|
+
ms_items.append(ms_item)
|
|
460
|
+
|
|
461
|
+
return ms_items
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def create_filtered_multi_select_items(
|
|
465
|
+
items: list[Item],
|
|
466
|
+
group_by: Callable[[Item], Any],
|
|
467
|
+
n_options: int | None = None,
|
|
468
|
+
min_selections: int = 1,
|
|
469
|
+
max_selections: int | None = None,
|
|
470
|
+
*,
|
|
471
|
+
item_filter: Callable[[Item], bool] | None = None,
|
|
472
|
+
group_filter: Callable[[Any, list[Item]], bool] | None = None,
|
|
473
|
+
combination_filter: Callable[[tuple[Item, ...]], bool] | None = None,
|
|
474
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
475
|
+
item_template_id: UUID | None = None,
|
|
476
|
+
) -> list[Item]:
|
|
477
|
+
"""Create multi-select items with multi-level filtering.
|
|
478
|
+
|
|
479
|
+
Parameters
|
|
480
|
+
----------
|
|
481
|
+
items : list[Item]
|
|
482
|
+
Source items.
|
|
483
|
+
group_by : Callable[[Item], Any]
|
|
484
|
+
Grouping function.
|
|
485
|
+
n_options : int | None
|
|
486
|
+
Number of options per item. If None, uses all items in each group.
|
|
487
|
+
min_selections : int
|
|
488
|
+
Minimum number of selections required.
|
|
489
|
+
max_selections : int | None
|
|
490
|
+
Maximum number of selections allowed.
|
|
491
|
+
item_filter : Callable[[Item], bool] | None
|
|
492
|
+
Filter individual items before grouping.
|
|
493
|
+
group_filter : Callable[[Any, list[Item]], bool] | None
|
|
494
|
+
Filter groups (receives group_key and group_items).
|
|
495
|
+
combination_filter : Callable[[tuple[Item, ...]], bool] | None
|
|
496
|
+
Filter specific combinations.
|
|
497
|
+
extract_text : Callable[[Item], str] | None
|
|
498
|
+
Text extraction function.
|
|
499
|
+
item_template_id : UUID | None
|
|
500
|
+
Template ID for created items.
|
|
501
|
+
|
|
502
|
+
Returns
|
|
503
|
+
-------
|
|
504
|
+
list[Item]
|
|
505
|
+
Filtered multi-select items.
|
|
506
|
+
|
|
507
|
+
Examples
|
|
508
|
+
--------
|
|
509
|
+
>>> ms_items = create_filtered_multi_select_items(
|
|
510
|
+
... items,
|
|
511
|
+
... group_by=lambda i: i.item_metadata["verb"],
|
|
512
|
+
... n_options=3,
|
|
513
|
+
... item_filter=lambda i: i.item_metadata.get("valid", True),
|
|
514
|
+
... group_filter=lambda key, items: len(items) >= 3,
|
|
515
|
+
... min_selections=1,
|
|
516
|
+
... max_selections=3
|
|
517
|
+
... ) # doctest: +SKIP
|
|
518
|
+
"""
|
|
519
|
+
# Filter items
|
|
520
|
+
filtered_items = items
|
|
521
|
+
if item_filter:
|
|
522
|
+
filtered_items = [item for item in items if item_filter(item)]
|
|
523
|
+
|
|
524
|
+
# Group items
|
|
525
|
+
groups: dict[Any, list[Item]] = defaultdict(list)
|
|
526
|
+
for item in filtered_items:
|
|
527
|
+
group_key = group_by(item)
|
|
528
|
+
groups[group_key].append(item)
|
|
529
|
+
|
|
530
|
+
# Filter groups
|
|
531
|
+
if group_filter:
|
|
532
|
+
groups = {k: v for k, v in groups.items() if group_filter(k, v)}
|
|
533
|
+
|
|
534
|
+
# Create combinations
|
|
535
|
+
ms_items: list[Item] = []
|
|
536
|
+
for group_key, group_items in groups.items():
|
|
537
|
+
# Validate group size
|
|
538
|
+
if len(group_items) < 2:
|
|
539
|
+
raise ValueError(
|
|
540
|
+
f"Group '{group_key}' has only {len(group_items)} item(s) "
|
|
541
|
+
f"after filtering. Multi-select requires at least 2 items. "
|
|
542
|
+
f"Use group_filter to exclude small groups."
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# Validate n_options
|
|
546
|
+
if n_options is not None and n_options > len(group_items):
|
|
547
|
+
raise ValueError(
|
|
548
|
+
f"Group '{group_key}' has only {len(group_items)} item(s), "
|
|
549
|
+
f"but n_options={n_options} was requested. "
|
|
550
|
+
f"Cannot create {n_options}-option items from fewer items."
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
# Determine combinations
|
|
554
|
+
if n_options is not None and n_options < len(group_items):
|
|
555
|
+
item_combos = combinations(group_items, n_options)
|
|
556
|
+
else:
|
|
557
|
+
item_combos = [tuple(group_items)]
|
|
558
|
+
|
|
559
|
+
for combo in item_combos:
|
|
560
|
+
# Filter combination
|
|
561
|
+
if combination_filter and not combination_filter(combo):
|
|
562
|
+
continue
|
|
563
|
+
|
|
564
|
+
# Extract texts
|
|
565
|
+
texts: list[str] = []
|
|
566
|
+
for item in combo:
|
|
567
|
+
if extract_text:
|
|
568
|
+
text: str = extract_text(item)
|
|
569
|
+
else:
|
|
570
|
+
text = _extract_text_from_item(item)
|
|
571
|
+
texts.append(text)
|
|
572
|
+
|
|
573
|
+
# Create item
|
|
574
|
+
metadata: dict[str, MetadataValue] = {
|
|
575
|
+
"group_key": str(group_key),
|
|
576
|
+
"source_item_ids": [str(item.id) for item in combo],
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
ms_item = create_multi_select_item(
|
|
580
|
+
*texts,
|
|
581
|
+
min_selections=min_selections,
|
|
582
|
+
max_selections=max_selections,
|
|
583
|
+
item_template_id=item_template_id,
|
|
584
|
+
metadata=metadata,
|
|
585
|
+
)
|
|
586
|
+
ms_items.append(ms_item)
|
|
587
|
+
|
|
588
|
+
return ms_items
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def _extract_text_from_item(item: Item) -> str:
|
|
592
|
+
"""Extract text from item's rendered_elements.
|
|
593
|
+
|
|
594
|
+
Tries common keys: "text", "sentence", "content".
|
|
595
|
+
Raises error if no suitable text found.
|
|
596
|
+
|
|
597
|
+
Parameters
|
|
598
|
+
----------
|
|
599
|
+
item : Item
|
|
600
|
+
Item to extract text from.
|
|
601
|
+
|
|
602
|
+
Returns
|
|
603
|
+
-------
|
|
604
|
+
str
|
|
605
|
+
Extracted text.
|
|
606
|
+
|
|
607
|
+
Raises
|
|
608
|
+
------
|
|
609
|
+
ValueError
|
|
610
|
+
If no suitable text key found in rendered_elements.
|
|
611
|
+
"""
|
|
612
|
+
for key in ["text", "sentence", "content"]:
|
|
613
|
+
if key in item.rendered_elements:
|
|
614
|
+
return item.rendered_elements[key]
|
|
615
|
+
|
|
616
|
+
raise ValueError(
|
|
617
|
+
f"Cannot extract text from item {item.id}. "
|
|
618
|
+
f"Expected one of ['text', 'sentence', 'content'] in rendered_elements, "
|
|
619
|
+
f"but found keys: {list(item.rendered_elements.keys())}. "
|
|
620
|
+
f"Use the extract_text parameter to provide a custom extraction function."
|
|
621
|
+
)
|