bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/items/binary.py
ADDED
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
"""Utilities for creating binary experimental items.
|
|
2
|
+
|
|
3
|
+
This module provides language-agnostic utilities for creating binary items
|
|
4
|
+
where participants make yes/no or true/false judgments about a single stimulus.
|
|
5
|
+
|
|
6
|
+
IMPORTANT: Binary tasks are semantically distinct from 2AFC tasks:
|
|
7
|
+
- Binary: Absolute judgment about single stimulus ("Is this grammatical?")
|
|
8
|
+
- 2AFC: Relative choice between two stimuli ("Which is more natural?")
|
|
9
|
+
|
|
10
|
+
Integration Points
|
|
11
|
+
------------------
|
|
12
|
+
- Active Learning: bead/active_learning/models/binary.py
|
|
13
|
+
- Simulation: bead/simulation/strategies/binary.py
|
|
14
|
+
- Deployment: bead/deployment/jspsych/ (binary button plugin)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from collections import defaultdict
|
|
20
|
+
from collections.abc import Callable, Hashable
|
|
21
|
+
from itertools import product
|
|
22
|
+
from uuid import UUID, uuid4
|
|
23
|
+
|
|
24
|
+
from bead.items.item import Item, MetadataValue
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def create_binary_item(
|
|
28
|
+
text: str,
|
|
29
|
+
prompt: str = "Yes/No?",
|
|
30
|
+
binary_options: tuple[str, str] = ("yes", "no"),
|
|
31
|
+
item_template_id: UUID | None = None,
|
|
32
|
+
metadata: dict[str, MetadataValue] | None = None,
|
|
33
|
+
) -> Item:
|
|
34
|
+
"""Create a binary judgment item for a single stimulus.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
text : str
|
|
39
|
+
The stimulus text to judge.
|
|
40
|
+
prompt : str
|
|
41
|
+
The question/prompt for the judgment (default: "Yes/No?").
|
|
42
|
+
binary_options : tuple[str, str]
|
|
43
|
+
The two response options (default: ("yes", "no")).
|
|
44
|
+
Can also be ("true", "false"), ("acceptable", "unacceptable"), etc.
|
|
45
|
+
item_template_id : UUID | None
|
|
46
|
+
Template ID for the item. If None, generates new UUID.
|
|
47
|
+
metadata : dict[str, MetadataValue] | None
|
|
48
|
+
Additional metadata for item_metadata field.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
Item
|
|
53
|
+
Binary item with text and prompt in rendered_elements.
|
|
54
|
+
|
|
55
|
+
Raises
|
|
56
|
+
------
|
|
57
|
+
ValueError
|
|
58
|
+
If text is empty or if binary_options doesn't have exactly 2 values.
|
|
59
|
+
|
|
60
|
+
Examples
|
|
61
|
+
--------
|
|
62
|
+
>>> item = create_binary_item(
|
|
63
|
+
... "The cat sat on the mat.",
|
|
64
|
+
... prompt="Is this sentence grammatical?",
|
|
65
|
+
... metadata={"judgment": "grammaticality"}
|
|
66
|
+
... )
|
|
67
|
+
>>> item.rendered_elements["text"]
|
|
68
|
+
'The cat sat on the mat.'
|
|
69
|
+
>>> item.rendered_elements["prompt"]
|
|
70
|
+
'Is this sentence grammatical?'
|
|
71
|
+
>>> item.item_metadata["binary_options"]
|
|
72
|
+
['yes', 'no']
|
|
73
|
+
|
|
74
|
+
>>> # Truth value judgment
|
|
75
|
+
>>> item = create_binary_item(
|
|
76
|
+
... "The sky is blue.",
|
|
77
|
+
... prompt="Is this statement true?",
|
|
78
|
+
... binary_options=("true", "false")
|
|
79
|
+
... )
|
|
80
|
+
>>> item.item_metadata["binary_options"]
|
|
81
|
+
['true', 'false']
|
|
82
|
+
"""
|
|
83
|
+
if not text or not text.strip():
|
|
84
|
+
raise ValueError("text cannot be empty")
|
|
85
|
+
|
|
86
|
+
if len(binary_options) != 2:
|
|
87
|
+
raise ValueError("binary_options must contain exactly 2 values")
|
|
88
|
+
|
|
89
|
+
if item_template_id is None:
|
|
90
|
+
item_template_id = uuid4()
|
|
91
|
+
|
|
92
|
+
rendered_elements: dict[str, str] = {
|
|
93
|
+
"text": text,
|
|
94
|
+
"prompt": prompt,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
# Build item metadata
|
|
98
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
99
|
+
"binary_options": list(binary_options),
|
|
100
|
+
}
|
|
101
|
+
if metadata:
|
|
102
|
+
item_metadata.update(metadata)
|
|
103
|
+
|
|
104
|
+
return Item(
|
|
105
|
+
item_template_id=item_template_id,
|
|
106
|
+
rendered_elements=rendered_elements,
|
|
107
|
+
item_metadata=item_metadata,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def create_binary_items_from_texts(
|
|
112
|
+
texts: list[str],
|
|
113
|
+
prompt: str,
|
|
114
|
+
binary_options: tuple[str, str] = ("yes", "no"),
|
|
115
|
+
*,
|
|
116
|
+
item_template_id: UUID | None = None,
|
|
117
|
+
metadata_fn: Callable[[str], dict[str, MetadataValue]] | None = None,
|
|
118
|
+
) -> list[Item]:
|
|
119
|
+
"""Create binary items from a list of texts with the same prompt.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
texts : list[str]
|
|
124
|
+
List of stimulus texts.
|
|
125
|
+
prompt : str
|
|
126
|
+
The question/prompt for all items.
|
|
127
|
+
binary_options : tuple[str, str]
|
|
128
|
+
The two response options (default: ("yes", "no")).
|
|
129
|
+
item_template_id : UUID | None
|
|
130
|
+
Template ID for all created items. If None, generates one per item.
|
|
131
|
+
metadata_fn : Callable[[str], dict[str, MetadataValue]] | None
|
|
132
|
+
Function to generate metadata from each text.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
list[Item]
|
|
137
|
+
Binary items for each text.
|
|
138
|
+
|
|
139
|
+
Examples
|
|
140
|
+
--------
|
|
141
|
+
>>> texts = [
|
|
142
|
+
... "She walks.",
|
|
143
|
+
... "She walk.",
|
|
144
|
+
... "They walk.",
|
|
145
|
+
... "They walks."
|
|
146
|
+
... ]
|
|
147
|
+
>>> items = create_binary_items_from_texts(
|
|
148
|
+
... texts,
|
|
149
|
+
... prompt="Is this sentence grammatical?",
|
|
150
|
+
... binary_options=("yes", "no")
|
|
151
|
+
... )
|
|
152
|
+
>>> len(items)
|
|
153
|
+
4
|
|
154
|
+
>>> items[0].rendered_elements["text"]
|
|
155
|
+
'She walks.'
|
|
156
|
+
"""
|
|
157
|
+
binary_items: list[Item] = []
|
|
158
|
+
|
|
159
|
+
for text in texts:
|
|
160
|
+
metadata: dict[str, MetadataValue] = {}
|
|
161
|
+
if metadata_fn:
|
|
162
|
+
metadata = metadata_fn(text)
|
|
163
|
+
|
|
164
|
+
item = create_binary_item(
|
|
165
|
+
text=text,
|
|
166
|
+
prompt=prompt,
|
|
167
|
+
binary_options=binary_options,
|
|
168
|
+
item_template_id=item_template_id,
|
|
169
|
+
metadata=metadata,
|
|
170
|
+
)
|
|
171
|
+
binary_items.append(item)
|
|
172
|
+
|
|
173
|
+
return binary_items
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def create_binary_items_with_context(
|
|
177
|
+
contexts: list[str],
|
|
178
|
+
targets: list[str],
|
|
179
|
+
prompt: str,
|
|
180
|
+
binary_options: tuple[str, str] = ("yes", "no"),
|
|
181
|
+
*,
|
|
182
|
+
context_label: str = "Context",
|
|
183
|
+
target_label: str = "Statement",
|
|
184
|
+
item_template_id: UUID | None = None,
|
|
185
|
+
metadata_fn: (Callable[[str, str], dict[str, MetadataValue]] | None) = None,
|
|
186
|
+
) -> list[Item]:
|
|
187
|
+
"""Create binary items with context + target structure.
|
|
188
|
+
|
|
189
|
+
Useful for judgments like "Given context X, is statement Y true?".
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
contexts : list[str]
|
|
194
|
+
Context texts (same length as targets).
|
|
195
|
+
targets : list[str]
|
|
196
|
+
Target texts to judge given context.
|
|
197
|
+
prompt : str
|
|
198
|
+
The question/prompt for the judgment.
|
|
199
|
+
binary_options : tuple[str, str]
|
|
200
|
+
The two response options (default: ("yes", "no")).
|
|
201
|
+
context_label : str
|
|
202
|
+
Label for context in rendered text (default: "Context").
|
|
203
|
+
target_label : str
|
|
204
|
+
Label for target in rendered text (default: "Statement").
|
|
205
|
+
item_template_id : UUID | None
|
|
206
|
+
Template ID for all created items. If None, generates one per item.
|
|
207
|
+
metadata_fn : Callable[[str, str], dict[str, MetadataValue]] | None
|
|
208
|
+
Function to generate metadata from (context, target).
|
|
209
|
+
|
|
210
|
+
Returns
|
|
211
|
+
-------
|
|
212
|
+
list[Item]
|
|
213
|
+
Binary items with context + target structure.
|
|
214
|
+
|
|
215
|
+
Raises
|
|
216
|
+
------
|
|
217
|
+
ValueError
|
|
218
|
+
If contexts and targets have different lengths.
|
|
219
|
+
|
|
220
|
+
Examples
|
|
221
|
+
--------
|
|
222
|
+
>>> contexts = ["The dog barked loudly."]
|
|
223
|
+
>>> targets = ["The dog made a sound."]
|
|
224
|
+
>>> items = create_binary_items_with_context(
|
|
225
|
+
... contexts,
|
|
226
|
+
... targets,
|
|
227
|
+
... prompt="Is the statement true given the context?",
|
|
228
|
+
... binary_options=("true", "false")
|
|
229
|
+
... )
|
|
230
|
+
>>> len(items)
|
|
231
|
+
1
|
|
232
|
+
>>> "Context:" in items[0].rendered_elements["text"]
|
|
233
|
+
True
|
|
234
|
+
"""
|
|
235
|
+
if len(contexts) != len(targets):
|
|
236
|
+
raise ValueError("contexts and targets must have same length")
|
|
237
|
+
|
|
238
|
+
binary_items: list[Item] = []
|
|
239
|
+
|
|
240
|
+
for context, target in zip(contexts, targets, strict=True):
|
|
241
|
+
# Combine context and target into single text
|
|
242
|
+
combined_text = f"{context_label}: {context}\n{target_label}: {target}"
|
|
243
|
+
|
|
244
|
+
metadata: dict[str, MetadataValue] = {
|
|
245
|
+
"context": context,
|
|
246
|
+
"target": target,
|
|
247
|
+
}
|
|
248
|
+
if metadata_fn:
|
|
249
|
+
metadata.update(metadata_fn(context, target))
|
|
250
|
+
|
|
251
|
+
item = create_binary_item(
|
|
252
|
+
text=combined_text,
|
|
253
|
+
prompt=prompt,
|
|
254
|
+
binary_options=binary_options,
|
|
255
|
+
item_template_id=item_template_id,
|
|
256
|
+
metadata=metadata,
|
|
257
|
+
)
|
|
258
|
+
binary_items.append(item)
|
|
259
|
+
|
|
260
|
+
return binary_items
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def create_binary_items_from_groups(
|
|
264
|
+
items: list[Item],
|
|
265
|
+
group_by: Callable[[Item], Hashable],
|
|
266
|
+
prompt: str,
|
|
267
|
+
binary_options: tuple[str, str] = ("yes", "no"),
|
|
268
|
+
*,
|
|
269
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
270
|
+
include_group_metadata: bool = True,
|
|
271
|
+
item_template_id: UUID | None = None,
|
|
272
|
+
) -> list[Item]:
|
|
273
|
+
"""Create binary items from grouped source items.
|
|
274
|
+
|
|
275
|
+
Groups items and creates one binary item per source item, preserving
|
|
276
|
+
group information in metadata.
|
|
277
|
+
|
|
278
|
+
Parameters
|
|
279
|
+
----------
|
|
280
|
+
items : list[Item]
|
|
281
|
+
Source items to process.
|
|
282
|
+
group_by : Callable[[Item], Hashable]
|
|
283
|
+
Function to extract grouping key from items.
|
|
284
|
+
prompt : str
|
|
285
|
+
The question/prompt for all items.
|
|
286
|
+
binary_options : tuple[str, str]
|
|
287
|
+
The two response options (default: ("yes", "no")).
|
|
288
|
+
extract_text : Callable[[Item], str] | None
|
|
289
|
+
Function to extract text from item. If None, tries common keys.
|
|
290
|
+
include_group_metadata : bool
|
|
291
|
+
Whether to include group key in item metadata.
|
|
292
|
+
item_template_id : UUID | None
|
|
293
|
+
Template ID for all created items. If None, generates one per item.
|
|
294
|
+
|
|
295
|
+
Returns
|
|
296
|
+
-------
|
|
297
|
+
list[Item]
|
|
298
|
+
Binary items from source items.
|
|
299
|
+
|
|
300
|
+
Examples
|
|
301
|
+
--------
|
|
302
|
+
>>> source_items = [
|
|
303
|
+
... Item(
|
|
304
|
+
... uuid4(),
|
|
305
|
+
... rendered_elements={"text": "She walks."},
|
|
306
|
+
... item_metadata={"verb": "walk"}
|
|
307
|
+
... ),
|
|
308
|
+
... Item(
|
|
309
|
+
... uuid4(),
|
|
310
|
+
... rendered_elements={"text": "She runs."},
|
|
311
|
+
... item_metadata={"verb": "run"}
|
|
312
|
+
... )
|
|
313
|
+
... ]
|
|
314
|
+
>>> binary_items = create_binary_items_from_groups(
|
|
315
|
+
... source_items,
|
|
316
|
+
... group_by=lambda i: i.item_metadata["verb"],
|
|
317
|
+
... prompt="Is this sentence grammatical?"
|
|
318
|
+
... )
|
|
319
|
+
>>> len(binary_items)
|
|
320
|
+
2
|
|
321
|
+
"""
|
|
322
|
+
# Group items
|
|
323
|
+
groups: dict[Hashable, list[Item]] = defaultdict(list)
|
|
324
|
+
for item in items:
|
|
325
|
+
group_key = group_by(item)
|
|
326
|
+
groups[group_key].append(item)
|
|
327
|
+
|
|
328
|
+
binary_items: list[Item] = []
|
|
329
|
+
|
|
330
|
+
for group_key, group_items in groups.items():
|
|
331
|
+
for item in group_items:
|
|
332
|
+
# Extract text
|
|
333
|
+
if extract_text:
|
|
334
|
+
text: str = extract_text(item)
|
|
335
|
+
else:
|
|
336
|
+
text = _extract_text_from_item(item)
|
|
337
|
+
|
|
338
|
+
# Build metadata
|
|
339
|
+
metadata: dict[str, MetadataValue] = {
|
|
340
|
+
"source_item_id": str(item.id),
|
|
341
|
+
}
|
|
342
|
+
if include_group_metadata:
|
|
343
|
+
metadata["group_key"] = str(group_key)
|
|
344
|
+
|
|
345
|
+
# Create binary item
|
|
346
|
+
binary_item = create_binary_item(
|
|
347
|
+
text=text,
|
|
348
|
+
prompt=prompt,
|
|
349
|
+
binary_options=binary_options,
|
|
350
|
+
item_template_id=item_template_id,
|
|
351
|
+
metadata=metadata,
|
|
352
|
+
)
|
|
353
|
+
binary_items.append(binary_item)
|
|
354
|
+
|
|
355
|
+
return binary_items
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def create_binary_items_cross_product(
|
|
359
|
+
texts: list[str],
|
|
360
|
+
prompts: list[str],
|
|
361
|
+
binary_options: tuple[str, str] = ("yes", "no"),
|
|
362
|
+
*,
|
|
363
|
+
item_template_id: UUID | None = None,
|
|
364
|
+
metadata_fn: (Callable[[str, str], dict[str, MetadataValue]] | None) = None,
|
|
365
|
+
) -> list[Item]:
|
|
366
|
+
"""Create binary items from cross-product of texts and prompts.
|
|
367
|
+
|
|
368
|
+
Useful when you want to apply multiple prompts to each text.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
texts : list[str]
|
|
373
|
+
List of stimulus texts.
|
|
374
|
+
prompts : list[str]
|
|
375
|
+
List of prompts to apply.
|
|
376
|
+
binary_options : tuple[str, str]
|
|
377
|
+
The two response options (default: ("yes", "no")).
|
|
378
|
+
item_template_id : UUID | None
|
|
379
|
+
Template ID for all created items.
|
|
380
|
+
metadata_fn : Callable[[str, str], dict[str, MetadataValue]] | None
|
|
381
|
+
Function to generate metadata from (text, prompt).
|
|
382
|
+
|
|
383
|
+
Returns
|
|
384
|
+
-------
|
|
385
|
+
list[Item]
|
|
386
|
+
Binary items from cross-product.
|
|
387
|
+
|
|
388
|
+
Examples
|
|
389
|
+
--------
|
|
390
|
+
>>> texts = ["The cat sat.", "The dog ran."]
|
|
391
|
+
>>> prompts = ["Is this grammatical?", "Is this natural?"]
|
|
392
|
+
>>> items = create_binary_items_cross_product(texts, prompts)
|
|
393
|
+
>>> len(items)
|
|
394
|
+
4
|
|
395
|
+
"""
|
|
396
|
+
binary_items: list[Item] = []
|
|
397
|
+
|
|
398
|
+
for text, prompt in product(texts, prompts):
|
|
399
|
+
metadata: dict[str, MetadataValue] = {}
|
|
400
|
+
if metadata_fn:
|
|
401
|
+
metadata = metadata_fn(text, prompt)
|
|
402
|
+
|
|
403
|
+
item = create_binary_item(
|
|
404
|
+
text=text,
|
|
405
|
+
prompt=prompt,
|
|
406
|
+
binary_options=binary_options,
|
|
407
|
+
item_template_id=item_template_id,
|
|
408
|
+
metadata=metadata,
|
|
409
|
+
)
|
|
410
|
+
binary_items.append(item)
|
|
411
|
+
|
|
412
|
+
return binary_items
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def create_filtered_binary_items(
|
|
416
|
+
items: list[Item],
|
|
417
|
+
prompt: str,
|
|
418
|
+
binary_options: tuple[str, str] = ("yes", "no"),
|
|
419
|
+
*,
|
|
420
|
+
item_filter: Callable[[Item], bool] | None = None,
|
|
421
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
422
|
+
item_template_id: UUID | None = None,
|
|
423
|
+
) -> list[Item]:
|
|
424
|
+
"""Create binary items with filtering.
|
|
425
|
+
|
|
426
|
+
Parameters
|
|
427
|
+
----------
|
|
428
|
+
items : list[Item]
|
|
429
|
+
Source items.
|
|
430
|
+
prompt : str
|
|
431
|
+
The question/prompt for all items.
|
|
432
|
+
binary_options : tuple[str, str]
|
|
433
|
+
The two response options (default: ("yes", "no")).
|
|
434
|
+
item_filter : Callable[[Item], bool] | None
|
|
435
|
+
Filter individual items.
|
|
436
|
+
extract_text : Callable[[Item], str] | None
|
|
437
|
+
Text extraction function.
|
|
438
|
+
item_template_id : UUID | None
|
|
439
|
+
Template ID for created items.
|
|
440
|
+
|
|
441
|
+
Returns
|
|
442
|
+
-------
|
|
443
|
+
list[Item]
|
|
444
|
+
Filtered binary items.
|
|
445
|
+
|
|
446
|
+
Examples
|
|
447
|
+
--------
|
|
448
|
+
>>> binary_items = create_filtered_binary_items(
|
|
449
|
+
... items,
|
|
450
|
+
... prompt="Is this grammatical?",
|
|
451
|
+
... item_filter=lambda i: i.item_metadata.get("valid", True)
|
|
452
|
+
... ) # doctest: +SKIP
|
|
453
|
+
"""
|
|
454
|
+
# Filter items
|
|
455
|
+
filtered_items = items
|
|
456
|
+
if item_filter:
|
|
457
|
+
filtered_items = [item for item in items if item_filter(item)]
|
|
458
|
+
|
|
459
|
+
binary_items: list[Item] = []
|
|
460
|
+
|
|
461
|
+
for item in filtered_items:
|
|
462
|
+
# Extract text
|
|
463
|
+
if extract_text:
|
|
464
|
+
text: str = extract_text(item)
|
|
465
|
+
else:
|
|
466
|
+
text = _extract_text_from_item(item)
|
|
467
|
+
|
|
468
|
+
# Create binary item
|
|
469
|
+
metadata: dict[str, MetadataValue] = {
|
|
470
|
+
"source_item_id": str(item.id),
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
binary_item = create_binary_item(
|
|
474
|
+
text=text,
|
|
475
|
+
prompt=prompt,
|
|
476
|
+
binary_options=binary_options,
|
|
477
|
+
item_template_id=item_template_id,
|
|
478
|
+
metadata=metadata,
|
|
479
|
+
)
|
|
480
|
+
binary_items.append(binary_item)
|
|
481
|
+
|
|
482
|
+
return binary_items
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def _extract_text_from_item(item: Item) -> str:
|
|
486
|
+
"""Extract text from item's rendered_elements.
|
|
487
|
+
|
|
488
|
+
Tries common keys: "text", "sentence", "content".
|
|
489
|
+
Raises error if no suitable text found.
|
|
490
|
+
|
|
491
|
+
Parameters
|
|
492
|
+
----------
|
|
493
|
+
item : Item
|
|
494
|
+
Item to extract text from.
|
|
495
|
+
|
|
496
|
+
Returns
|
|
497
|
+
-------
|
|
498
|
+
str
|
|
499
|
+
Extracted text.
|
|
500
|
+
|
|
501
|
+
Raises
|
|
502
|
+
------
|
|
503
|
+
ValueError
|
|
504
|
+
If no suitable text key found in rendered_elements.
|
|
505
|
+
"""
|
|
506
|
+
for key in ["text", "sentence", "content"]:
|
|
507
|
+
if key in item.rendered_elements:
|
|
508
|
+
return item.rendered_elements[key]
|
|
509
|
+
|
|
510
|
+
raise ValueError(
|
|
511
|
+
f"Cannot extract text from item {item.id}. "
|
|
512
|
+
f"Expected one of ['text', 'sentence', 'content'] in rendered_elements, "
|
|
513
|
+
f"but found keys: {list(item.rendered_elements.keys())}. "
|
|
514
|
+
f"Use the extract_text parameter to provide a custom extraction function."
|
|
515
|
+
)
|