bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/items/free_text.py
ADDED
|
@@ -0,0 +1,681 @@
|
|
|
1
|
+
"""Utilities for creating free text experimental items.
|
|
2
|
+
|
|
3
|
+
This module provides language-agnostic utilities for creating free text
|
|
4
|
+
items where participants provide open-ended text responses (e.g., paraphrasing,
|
|
5
|
+
question answering, cloze completion).
|
|
6
|
+
|
|
7
|
+
Integration Points
|
|
8
|
+
------------------
|
|
9
|
+
- Active Learning: bead/active_learning/models/free_text.py
|
|
10
|
+
- Simulation: bead/simulation/strategies/free_text.py
|
|
11
|
+
- Deployment: bead/deployment/jspsych/ (text input or textarea)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from collections import defaultdict
|
|
17
|
+
from collections.abc import Callable
|
|
18
|
+
from itertools import product
|
|
19
|
+
from typing import Any
|
|
20
|
+
from uuid import UUID, uuid4
|
|
21
|
+
|
|
22
|
+
from bead.items.item import Item, MetadataValue
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def create_free_text_item(
|
|
26
|
+
text: str,
|
|
27
|
+
prompt: str,
|
|
28
|
+
max_length: int | None = None,
|
|
29
|
+
validation_pattern: str | None = None,
|
|
30
|
+
min_length: int | None = None,
|
|
31
|
+
multiline: bool = False,
|
|
32
|
+
item_template_id: UUID | None = None,
|
|
33
|
+
metadata: dict[str, MetadataValue] | None = None,
|
|
34
|
+
) -> Item:
|
|
35
|
+
"""Create a free text (open-ended) item.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
text : str
|
|
40
|
+
The stimulus text or context.
|
|
41
|
+
prompt : str
|
|
42
|
+
The question/instruction for what to enter (required).
|
|
43
|
+
max_length : int | None
|
|
44
|
+
Maximum character limit. None means unlimited.
|
|
45
|
+
validation_pattern : str | None
|
|
46
|
+
Optional regex pattern for validation (validated at deployment).
|
|
47
|
+
min_length : int | None
|
|
48
|
+
Minimum characters required. None means no minimum.
|
|
49
|
+
multiline : bool
|
|
50
|
+
True for textarea (multiline), False for single-line input (default).
|
|
51
|
+
item_template_id : UUID | None
|
|
52
|
+
Template ID for the item. If None, generates new UUID.
|
|
53
|
+
metadata : dict[str, MetadataValue] | None
|
|
54
|
+
Additional metadata for item_metadata field.
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
Item
|
|
59
|
+
Free text item with text and prompt in rendered_elements.
|
|
60
|
+
|
|
61
|
+
Raises
|
|
62
|
+
------
|
|
63
|
+
ValueError
|
|
64
|
+
If text or prompt is empty, or if min_length > max_length.
|
|
65
|
+
|
|
66
|
+
Examples
|
|
67
|
+
--------
|
|
68
|
+
>>> item = create_free_text_item(
|
|
69
|
+
... text="The dog chased the cat.",
|
|
70
|
+
... prompt="Who chased whom?",
|
|
71
|
+
... max_length=100
|
|
72
|
+
... )
|
|
73
|
+
>>> item.rendered_elements["text"]
|
|
74
|
+
'The dog chased the cat.'
|
|
75
|
+
>>> item.rendered_elements["prompt"]
|
|
76
|
+
'Who chased whom?'
|
|
77
|
+
>>> item.item_metadata["max_length"]
|
|
78
|
+
100
|
|
79
|
+
|
|
80
|
+
>>> # Multiline paraphrase task
|
|
81
|
+
>>> item = create_free_text_item(
|
|
82
|
+
... text="The quick brown fox jumps over the lazy dog.",
|
|
83
|
+
... prompt="Rewrite this sentence in your own words:",
|
|
84
|
+
... multiline=True,
|
|
85
|
+
... max_length=200
|
|
86
|
+
... )
|
|
87
|
+
>>> item.item_metadata["multiline"]
|
|
88
|
+
True
|
|
89
|
+
"""
|
|
90
|
+
if not text or not text.strip():
|
|
91
|
+
raise ValueError("text cannot be empty")
|
|
92
|
+
|
|
93
|
+
if not prompt or not prompt.strip():
|
|
94
|
+
raise ValueError("prompt is required for free text items")
|
|
95
|
+
|
|
96
|
+
# Validate length constraints
|
|
97
|
+
if min_length is not None and max_length is not None:
|
|
98
|
+
if min_length > max_length:
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"min_length ({min_length}) cannot be greater than "
|
|
101
|
+
f"max_length ({max_length})"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
if item_template_id is None:
|
|
105
|
+
item_template_id = uuid4()
|
|
106
|
+
|
|
107
|
+
rendered_elements: dict[str, str] = {
|
|
108
|
+
"text": text,
|
|
109
|
+
"prompt": prompt,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
# Build item metadata
|
|
113
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
114
|
+
"multiline": multiline,
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if max_length is not None:
|
|
118
|
+
item_metadata["max_length"] = max_length
|
|
119
|
+
|
|
120
|
+
if min_length is not None:
|
|
121
|
+
item_metadata["min_length"] = min_length
|
|
122
|
+
|
|
123
|
+
if validation_pattern is not None:
|
|
124
|
+
item_metadata["validation_pattern"] = validation_pattern
|
|
125
|
+
|
|
126
|
+
if metadata:
|
|
127
|
+
item_metadata.update(metadata)
|
|
128
|
+
|
|
129
|
+
return Item(
|
|
130
|
+
item_template_id=item_template_id,
|
|
131
|
+
rendered_elements=rendered_elements,
|
|
132
|
+
item_metadata=item_metadata,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def create_free_text_items_from_texts(
|
|
137
|
+
texts: list[str],
|
|
138
|
+
prompt: str,
|
|
139
|
+
max_length: int | None = None,
|
|
140
|
+
validation_pattern: str | None = None,
|
|
141
|
+
min_length: int | None = None,
|
|
142
|
+
multiline: bool = False,
|
|
143
|
+
*,
|
|
144
|
+
item_template_id: UUID | None = None,
|
|
145
|
+
metadata_fn: Callable[[str], dict[str, MetadataValue]] | None = None,
|
|
146
|
+
) -> list[Item]:
|
|
147
|
+
"""Create free text items from a list of texts with the same prompt.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
texts : list[str]
|
|
152
|
+
List of stimulus texts.
|
|
153
|
+
prompt : str
|
|
154
|
+
The question/instruction for all items (required).
|
|
155
|
+
max_length : int | None
|
|
156
|
+
Maximum character limit for all items.
|
|
157
|
+
validation_pattern : str | None
|
|
158
|
+
Optional regex pattern for validation.
|
|
159
|
+
min_length : int | None
|
|
160
|
+
Minimum characters required.
|
|
161
|
+
multiline : bool
|
|
162
|
+
True for textarea, False for single-line input.
|
|
163
|
+
item_template_id : UUID | None
|
|
164
|
+
Template ID for all created items. If None, generates one per item.
|
|
165
|
+
metadata_fn : Callable[[str], dict[str, MetadataValue]] | None
|
|
166
|
+
Function to generate metadata from each text.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
list[Item]
|
|
171
|
+
Free text items for each text.
|
|
172
|
+
|
|
173
|
+
Examples
|
|
174
|
+
--------
|
|
175
|
+
>>> texts = ["Sentence 1", "Sentence 2", "Sentence 3"]
|
|
176
|
+
>>> items = create_free_text_items_from_texts(
|
|
177
|
+
... texts,
|
|
178
|
+
... prompt="Paraphrase this:",
|
|
179
|
+
... multiline=True,
|
|
180
|
+
... max_length=200,
|
|
181
|
+
... metadata_fn=lambda t: {"original_length": len(t)}
|
|
182
|
+
... )
|
|
183
|
+
>>> len(items)
|
|
184
|
+
3
|
|
185
|
+
>>> items[0].item_metadata["original_length"]
|
|
186
|
+
10
|
|
187
|
+
"""
|
|
188
|
+
free_text_items: list[Item] = []
|
|
189
|
+
|
|
190
|
+
for text in texts:
|
|
191
|
+
item_metadata: dict[str, MetadataValue] = {}
|
|
192
|
+
if metadata_fn:
|
|
193
|
+
item_metadata = metadata_fn(text)
|
|
194
|
+
|
|
195
|
+
item = create_free_text_item(
|
|
196
|
+
text=text,
|
|
197
|
+
prompt=prompt,
|
|
198
|
+
max_length=max_length,
|
|
199
|
+
validation_pattern=validation_pattern,
|
|
200
|
+
min_length=min_length,
|
|
201
|
+
multiline=multiline,
|
|
202
|
+
item_template_id=item_template_id,
|
|
203
|
+
metadata=item_metadata,
|
|
204
|
+
)
|
|
205
|
+
free_text_items.append(item)
|
|
206
|
+
|
|
207
|
+
return free_text_items
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def create_free_text_items_with_context(
|
|
211
|
+
contexts: list[str],
|
|
212
|
+
prompts: list[str],
|
|
213
|
+
max_length: int | None = None,
|
|
214
|
+
validation_pattern: str | None = None,
|
|
215
|
+
min_length: int | None = None,
|
|
216
|
+
multiline: bool = False,
|
|
217
|
+
*,
|
|
218
|
+
item_template_id: UUID | None = None,
|
|
219
|
+
metadata_fn: (Callable[[str, str], dict[str, MetadataValue]] | None) = None,
|
|
220
|
+
) -> list[Item]:
|
|
221
|
+
"""Create free text items with context + prompt pairs.
|
|
222
|
+
|
|
223
|
+
Useful for reading comprehension, question answering where each context
|
|
224
|
+
has a specific question.
|
|
225
|
+
|
|
226
|
+
Parameters
|
|
227
|
+
----------
|
|
228
|
+
contexts : list[str]
|
|
229
|
+
Context texts (same length as prompts).
|
|
230
|
+
prompts : list[str]
|
|
231
|
+
Prompts/questions for each context.
|
|
232
|
+
max_length : int | None
|
|
233
|
+
Maximum character limit for all items.
|
|
234
|
+
validation_pattern : str | None
|
|
235
|
+
Optional regex pattern for validation.
|
|
236
|
+
min_length : int | None
|
|
237
|
+
Minimum characters required.
|
|
238
|
+
multiline : bool
|
|
239
|
+
True for textarea, False for single-line input.
|
|
240
|
+
item_template_id : UUID | None
|
|
241
|
+
Template ID for all created items. If None, generates one per item.
|
|
242
|
+
metadata_fn : Callable[[str, str], dict[str, MetadataValue]] | None
|
|
243
|
+
Function to generate metadata from (context, prompt).
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
list[Item]
|
|
248
|
+
Free text items with context + prompt structure.
|
|
249
|
+
|
|
250
|
+
Raises
|
|
251
|
+
------
|
|
252
|
+
ValueError
|
|
253
|
+
If contexts and prompts have different lengths.
|
|
254
|
+
|
|
255
|
+
Examples
|
|
256
|
+
--------
|
|
257
|
+
>>> contexts = ["The cat sat on the mat."]
|
|
258
|
+
>>> prompts = ["What sat on the mat?"]
|
|
259
|
+
>>> items = create_free_text_items_with_context(
|
|
260
|
+
... contexts,
|
|
261
|
+
... prompts,
|
|
262
|
+
... max_length=50
|
|
263
|
+
... )
|
|
264
|
+
>>> len(items)
|
|
265
|
+
1
|
|
266
|
+
>>> items[0].rendered_elements["text"]
|
|
267
|
+
'The cat sat on the mat.'
|
|
268
|
+
>>> items[0].rendered_elements["prompt"]
|
|
269
|
+
'What sat on the mat?'
|
|
270
|
+
"""
|
|
271
|
+
if len(contexts) != len(prompts):
|
|
272
|
+
raise ValueError(
|
|
273
|
+
f"contexts and prompts must have same length "
|
|
274
|
+
f"(got {len(contexts)} and {len(prompts)})"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
free_text_items: list[Item] = []
|
|
278
|
+
|
|
279
|
+
for context, prompt in zip(contexts, prompts, strict=True):
|
|
280
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
281
|
+
"context": context,
|
|
282
|
+
}
|
|
283
|
+
if metadata_fn:
|
|
284
|
+
item_metadata.update(metadata_fn(context, prompt))
|
|
285
|
+
|
|
286
|
+
item = create_free_text_item(
|
|
287
|
+
text=context,
|
|
288
|
+
prompt=prompt,
|
|
289
|
+
max_length=max_length,
|
|
290
|
+
validation_pattern=validation_pattern,
|
|
291
|
+
min_length=min_length,
|
|
292
|
+
multiline=multiline,
|
|
293
|
+
item_template_id=item_template_id,
|
|
294
|
+
metadata=item_metadata,
|
|
295
|
+
)
|
|
296
|
+
free_text_items.append(item)
|
|
297
|
+
|
|
298
|
+
return free_text_items
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def create_free_text_items_from_groups(
|
|
302
|
+
items: list[Item],
|
|
303
|
+
group_by: Callable[[Item], Any],
|
|
304
|
+
prompt: str,
|
|
305
|
+
max_length: int | None = None,
|
|
306
|
+
validation_pattern: str | None = None,
|
|
307
|
+
min_length: int | None = None,
|
|
308
|
+
multiline: bool = False,
|
|
309
|
+
*,
|
|
310
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
311
|
+
include_group_metadata: bool = True,
|
|
312
|
+
item_template_id: UUID | None = None,
|
|
313
|
+
) -> list[Item]:
|
|
314
|
+
"""Create free text items from grouped source items.
|
|
315
|
+
|
|
316
|
+
Groups items and creates one free text item per source item,
|
|
317
|
+
preserving group information in metadata.
|
|
318
|
+
|
|
319
|
+
Parameters
|
|
320
|
+
----------
|
|
321
|
+
items : list[Item]
|
|
322
|
+
Source items to process.
|
|
323
|
+
group_by : Callable[[Item], Any]
|
|
324
|
+
Function to extract grouping key from items.
|
|
325
|
+
prompt : str
|
|
326
|
+
The question/instruction for all items (required).
|
|
327
|
+
max_length : int | None
|
|
328
|
+
Maximum character limit.
|
|
329
|
+
validation_pattern : str | None
|
|
330
|
+
Optional regex pattern for validation.
|
|
331
|
+
min_length : int | None
|
|
332
|
+
Minimum characters required.
|
|
333
|
+
multiline : bool
|
|
334
|
+
True for textarea, False for single-line input.
|
|
335
|
+
extract_text : Callable[[Item], str] | None
|
|
336
|
+
Function to extract text from item. If None, tries common keys.
|
|
337
|
+
include_group_metadata : bool
|
|
338
|
+
Whether to include group key in item metadata.
|
|
339
|
+
item_template_id : UUID | None
|
|
340
|
+
Template ID for all created items. If None, generates one per item.
|
|
341
|
+
|
|
342
|
+
Returns
|
|
343
|
+
-------
|
|
344
|
+
list[Item]
|
|
345
|
+
Free text items from source items.
|
|
346
|
+
|
|
347
|
+
Examples
|
|
348
|
+
--------
|
|
349
|
+
>>> source_items = [
|
|
350
|
+
... Item(
|
|
351
|
+
... uuid4(),
|
|
352
|
+
... rendered_elements={"text": "Sentence 1"},
|
|
353
|
+
... item_metadata={"type": "simple"}
|
|
354
|
+
... )
|
|
355
|
+
... ]
|
|
356
|
+
>>> free_text_items = create_free_text_items_from_groups(
|
|
357
|
+
... source_items,
|
|
358
|
+
... group_by=lambda i: i.item_metadata["type"],
|
|
359
|
+
... prompt="Paraphrase this:",
|
|
360
|
+
... multiline=True
|
|
361
|
+
... )
|
|
362
|
+
>>> len(free_text_items)
|
|
363
|
+
1
|
|
364
|
+
"""
|
|
365
|
+
# Group items
|
|
366
|
+
groups: dict[Any, list[Item]] = defaultdict(list)
|
|
367
|
+
for item in items:
|
|
368
|
+
group_key = group_by(item)
|
|
369
|
+
groups[group_key].append(item)
|
|
370
|
+
|
|
371
|
+
free_text_items: list[Item] = []
|
|
372
|
+
|
|
373
|
+
for group_key, group_items in groups.items():
|
|
374
|
+
for item in group_items:
|
|
375
|
+
# Extract text
|
|
376
|
+
if extract_text:
|
|
377
|
+
text: str = extract_text(item)
|
|
378
|
+
else:
|
|
379
|
+
text = _extract_text_from_item(item)
|
|
380
|
+
|
|
381
|
+
# Build metadata
|
|
382
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
383
|
+
"source_item_id": str(item.id),
|
|
384
|
+
}
|
|
385
|
+
if include_group_metadata:
|
|
386
|
+
item_metadata["group_key"] = str(group_key)
|
|
387
|
+
|
|
388
|
+
# Create free text item
|
|
389
|
+
free_text_item = create_free_text_item(
|
|
390
|
+
text=text,
|
|
391
|
+
prompt=prompt,
|
|
392
|
+
max_length=max_length,
|
|
393
|
+
validation_pattern=validation_pattern,
|
|
394
|
+
min_length=min_length,
|
|
395
|
+
multiline=multiline,
|
|
396
|
+
item_template_id=item_template_id,
|
|
397
|
+
metadata=item_metadata,
|
|
398
|
+
)
|
|
399
|
+
free_text_items.append(free_text_item)
|
|
400
|
+
|
|
401
|
+
return free_text_items
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def create_free_text_items_cross_product(
|
|
405
|
+
texts: list[str],
|
|
406
|
+
prompts: list[str],
|
|
407
|
+
max_length: int | None = None,
|
|
408
|
+
validation_pattern: str | None = None,
|
|
409
|
+
min_length: int | None = None,
|
|
410
|
+
multiline: bool = False,
|
|
411
|
+
*,
|
|
412
|
+
item_template_id: UUID | None = None,
|
|
413
|
+
metadata_fn: (Callable[[str, str], dict[str, MetadataValue]] | None) = None,
|
|
414
|
+
) -> list[Item]:
|
|
415
|
+
"""Create free text items from cross-product of texts and prompts.
|
|
416
|
+
|
|
417
|
+
Useful when you want to apply multiple prompts to each text.
|
|
418
|
+
|
|
419
|
+
Parameters
|
|
420
|
+
----------
|
|
421
|
+
texts : list[str]
|
|
422
|
+
List of stimulus texts.
|
|
423
|
+
prompts : list[str]
|
|
424
|
+
List of prompts to apply.
|
|
425
|
+
max_length : int | None
|
|
426
|
+
Maximum character limit for all items.
|
|
427
|
+
validation_pattern : str | None
|
|
428
|
+
Optional regex pattern for validation.
|
|
429
|
+
min_length : int | None
|
|
430
|
+
Minimum characters required.
|
|
431
|
+
multiline : bool
|
|
432
|
+
True for textarea, False for single-line input.
|
|
433
|
+
item_template_id : UUID | None
|
|
434
|
+
Template ID for all created items.
|
|
435
|
+
metadata_fn : Callable[[str, str], dict[str, MetadataValue]] | None
|
|
436
|
+
Function to generate metadata from (text, prompt).
|
|
437
|
+
|
|
438
|
+
Returns
|
|
439
|
+
-------
|
|
440
|
+
list[Item]
|
|
441
|
+
Free text items from cross-product.
|
|
442
|
+
|
|
443
|
+
Examples
|
|
444
|
+
--------
|
|
445
|
+
>>> texts = ["Sentence 1", "Sentence 2"]
|
|
446
|
+
>>> prompts = ["Paraphrase this:", "Summarize this:"]
|
|
447
|
+
>>> items = create_free_text_items_cross_product(
|
|
448
|
+
... texts, prompts, multiline=True, max_length=200
|
|
449
|
+
... )
|
|
450
|
+
>>> len(items)
|
|
451
|
+
4
|
|
452
|
+
"""
|
|
453
|
+
free_text_items: list[Item] = []
|
|
454
|
+
|
|
455
|
+
for text, prompt in product(texts, prompts):
|
|
456
|
+
item_metadata: dict[str, MetadataValue] = {}
|
|
457
|
+
if metadata_fn:
|
|
458
|
+
item_metadata = metadata_fn(text, prompt)
|
|
459
|
+
|
|
460
|
+
item = create_free_text_item(
|
|
461
|
+
text=text,
|
|
462
|
+
prompt=prompt,
|
|
463
|
+
max_length=max_length,
|
|
464
|
+
validation_pattern=validation_pattern,
|
|
465
|
+
min_length=min_length,
|
|
466
|
+
multiline=multiline,
|
|
467
|
+
item_template_id=item_template_id,
|
|
468
|
+
metadata=item_metadata,
|
|
469
|
+
)
|
|
470
|
+
free_text_items.append(item)
|
|
471
|
+
|
|
472
|
+
return free_text_items
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def create_filtered_free_text_items(
|
|
476
|
+
items: list[Item],
|
|
477
|
+
prompt: str,
|
|
478
|
+
max_length: int | None = None,
|
|
479
|
+
validation_pattern: str | None = None,
|
|
480
|
+
min_length: int | None = None,
|
|
481
|
+
multiline: bool = False,
|
|
482
|
+
*,
|
|
483
|
+
item_filter: Callable[[Item], bool] | None = None,
|
|
484
|
+
extract_text: Callable[[Item], str] | None = None,
|
|
485
|
+
item_template_id: UUID | None = None,
|
|
486
|
+
) -> list[Item]:
|
|
487
|
+
"""Create free text items with filtering.
|
|
488
|
+
|
|
489
|
+
Parameters
|
|
490
|
+
----------
|
|
491
|
+
items : list[Item]
|
|
492
|
+
Source items.
|
|
493
|
+
prompt : str
|
|
494
|
+
The question/instruction for all items (required).
|
|
495
|
+
max_length : int | None
|
|
496
|
+
Maximum character limit.
|
|
497
|
+
validation_pattern : str | None
|
|
498
|
+
Optional regex pattern for validation.
|
|
499
|
+
min_length : int | None
|
|
500
|
+
Minimum characters required.
|
|
501
|
+
multiline : bool
|
|
502
|
+
True for textarea, False for single-line input.
|
|
503
|
+
item_filter : Callable[[Item], bool] | None
|
|
504
|
+
Filter individual items.
|
|
505
|
+
extract_text : Callable[[Item], str] | None
|
|
506
|
+
Text extraction function.
|
|
507
|
+
item_template_id : UUID | None
|
|
508
|
+
Template ID for created items.
|
|
509
|
+
|
|
510
|
+
Returns
|
|
511
|
+
-------
|
|
512
|
+
list[Item]
|
|
513
|
+
Filtered free text items.
|
|
514
|
+
|
|
515
|
+
Examples
|
|
516
|
+
--------
|
|
517
|
+
>>> free_text_items = create_filtered_free_text_items(
|
|
518
|
+
... items,
|
|
519
|
+
... prompt="Paraphrase this:",
|
|
520
|
+
... multiline=True,
|
|
521
|
+
... item_filter=lambda i: i.item_metadata.get("valid", True)
|
|
522
|
+
... ) # doctest: +SKIP
|
|
523
|
+
"""
|
|
524
|
+
# Filter items
|
|
525
|
+
filtered_items = items
|
|
526
|
+
if item_filter:
|
|
527
|
+
filtered_items = [item for item in items if item_filter(item)]
|
|
528
|
+
|
|
529
|
+
free_text_items: list[Item] = []
|
|
530
|
+
|
|
531
|
+
for item in filtered_items:
|
|
532
|
+
# Extract text
|
|
533
|
+
if extract_text:
|
|
534
|
+
text: str = extract_text(item)
|
|
535
|
+
else:
|
|
536
|
+
text = _extract_text_from_item(item)
|
|
537
|
+
|
|
538
|
+
# Create free text item
|
|
539
|
+
item_metadata: dict[str, MetadataValue] = {
|
|
540
|
+
"source_item_id": str(item.id),
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
free_text_item = create_free_text_item(
|
|
544
|
+
text=text,
|
|
545
|
+
prompt=prompt,
|
|
546
|
+
max_length=max_length,
|
|
547
|
+
validation_pattern=validation_pattern,
|
|
548
|
+
min_length=min_length,
|
|
549
|
+
multiline=multiline,
|
|
550
|
+
item_template_id=item_template_id,
|
|
551
|
+
metadata=item_metadata,
|
|
552
|
+
)
|
|
553
|
+
free_text_items.append(free_text_item)
|
|
554
|
+
|
|
555
|
+
return free_text_items
|
|
556
|
+
|
|
557
|
+
|
|
558
|
+
def create_paraphrase_item(
|
|
559
|
+
text: str,
|
|
560
|
+
instruction: str = "Rewrite in your own words:",
|
|
561
|
+
item_template_id: UUID | None = None,
|
|
562
|
+
metadata: dict[str, MetadataValue] | None = None,
|
|
563
|
+
) -> Item:
|
|
564
|
+
"""Create a paraphrase generation item.
|
|
565
|
+
|
|
566
|
+
Convenience function for paraphrase tasks with multiline input.
|
|
567
|
+
|
|
568
|
+
Parameters
|
|
569
|
+
----------
|
|
570
|
+
text : str
|
|
571
|
+
The text to paraphrase.
|
|
572
|
+
instruction : str
|
|
573
|
+
The instruction for paraphrasing (default: "Rewrite in your own words:").
|
|
574
|
+
item_template_id : UUID | None
|
|
575
|
+
Template ID for the item. If None, generates new UUID.
|
|
576
|
+
metadata : dict[str, MetadataValue] | None
|
|
577
|
+
Additional metadata for item_metadata field.
|
|
578
|
+
|
|
579
|
+
Returns
|
|
580
|
+
-------
|
|
581
|
+
Item
|
|
582
|
+
Paraphrase free text item.
|
|
583
|
+
|
|
584
|
+
Examples
|
|
585
|
+
--------
|
|
586
|
+
>>> item = create_paraphrase_item(
|
|
587
|
+
... "The quick brown fox jumps over the lazy dog."
|
|
588
|
+
... )
|
|
589
|
+
>>> item.rendered_elements["prompt"]
|
|
590
|
+
'Rewrite in your own words:'
|
|
591
|
+
>>> item.item_metadata["multiline"]
|
|
592
|
+
True
|
|
593
|
+
"""
|
|
594
|
+
return create_free_text_item(
|
|
595
|
+
text,
|
|
596
|
+
prompt=instruction,
|
|
597
|
+
multiline=True,
|
|
598
|
+
max_length=500,
|
|
599
|
+
item_template_id=item_template_id,
|
|
600
|
+
metadata=metadata,
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def create_wh_question_item(
|
|
605
|
+
text: str,
|
|
606
|
+
question_word: str = "Who",
|
|
607
|
+
item_template_id: UUID | None = None,
|
|
608
|
+
metadata: dict[str, MetadataValue] | None = None,
|
|
609
|
+
) -> Item:
|
|
610
|
+
"""Create a WH-question answering item.
|
|
611
|
+
|
|
612
|
+
Convenience function for WH-question answering with short text input.
|
|
613
|
+
|
|
614
|
+
Parameters
|
|
615
|
+
----------
|
|
616
|
+
text : str
|
|
617
|
+
The context/passage for the question.
|
|
618
|
+
question_word : str
|
|
619
|
+
The question word to use (default: "Who").
|
|
620
|
+
item_template_id : UUID | None
|
|
621
|
+
Template ID for the item. If None, generates new UUID.
|
|
622
|
+
metadata : dict[str, MetadataValue] | None
|
|
623
|
+
Additional metadata for item_metadata field.
|
|
624
|
+
|
|
625
|
+
Returns
|
|
626
|
+
-------
|
|
627
|
+
Item
|
|
628
|
+
WH-question free text item.
|
|
629
|
+
|
|
630
|
+
Examples
|
|
631
|
+
--------
|
|
632
|
+
>>> item = create_wh_question_item(
|
|
633
|
+
... "The dog chased the cat.",
|
|
634
|
+
... question_word="What"
|
|
635
|
+
... )
|
|
636
|
+
>>> "What" in item.rendered_elements["prompt"]
|
|
637
|
+
True
|
|
638
|
+
>>> item.item_metadata["max_length"]
|
|
639
|
+
100
|
|
640
|
+
"""
|
|
641
|
+
return create_free_text_item(
|
|
642
|
+
text,
|
|
643
|
+
prompt=f"{question_word} question answering:",
|
|
644
|
+
multiline=False,
|
|
645
|
+
max_length=100,
|
|
646
|
+
item_template_id=item_template_id,
|
|
647
|
+
metadata=metadata,
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def _extract_text_from_item(item: Item) -> str:
|
|
652
|
+
"""Extract text from item's rendered_elements.
|
|
653
|
+
|
|
654
|
+
Tries common keys: "text", "sentence", "content".
|
|
655
|
+
Raises error if no suitable text found.
|
|
656
|
+
|
|
657
|
+
Parameters
|
|
658
|
+
----------
|
|
659
|
+
item : Item
|
|
660
|
+
Item to extract text from.
|
|
661
|
+
|
|
662
|
+
Returns
|
|
663
|
+
-------
|
|
664
|
+
str
|
|
665
|
+
Extracted text.
|
|
666
|
+
|
|
667
|
+
Raises
|
|
668
|
+
------
|
|
669
|
+
ValueError
|
|
670
|
+
If no suitable text key found in rendered_elements.
|
|
671
|
+
"""
|
|
672
|
+
for key in ["text", "sentence", "content"]:
|
|
673
|
+
if key in item.rendered_elements:
|
|
674
|
+
return item.rendered_elements[key]
|
|
675
|
+
|
|
676
|
+
raise ValueError(
|
|
677
|
+
f"Cannot extract text from item {item.id}. "
|
|
678
|
+
f"Expected one of ['text', 'sentence', 'content'] in rendered_elements, "
|
|
679
|
+
f"but found keys: {list(item.rendered_elements.keys())}. "
|
|
680
|
+
f"Use the extract_text parameter to provide a custom extraction function."
|
|
681
|
+
)
|