bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,681 @@
1
+ """Utilities for creating free text experimental items.
2
+
3
+ This module provides language-agnostic utilities for creating free text
4
+ items where participants provide open-ended text responses (e.g., paraphrasing,
5
+ question answering, cloze completion).
6
+
7
+ Integration Points
8
+ ------------------
9
+ - Active Learning: bead/active_learning/models/free_text.py
10
+ - Simulation: bead/simulation/strategies/free_text.py
11
+ - Deployment: bead/deployment/jspsych/ (text input or textarea)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections import defaultdict
17
+ from collections.abc import Callable
18
+ from itertools import product
19
+ from typing import Any
20
+ from uuid import UUID, uuid4
21
+
22
+ from bead.items.item import Item, MetadataValue
23
+
24
+
25
+ def create_free_text_item(
26
+ text: str,
27
+ prompt: str,
28
+ max_length: int | None = None,
29
+ validation_pattern: str | None = None,
30
+ min_length: int | None = None,
31
+ multiline: bool = False,
32
+ item_template_id: UUID | None = None,
33
+ metadata: dict[str, MetadataValue] | None = None,
34
+ ) -> Item:
35
+ """Create a free text (open-ended) item.
36
+
37
+ Parameters
38
+ ----------
39
+ text : str
40
+ The stimulus text or context.
41
+ prompt : str
42
+ The question/instruction for what to enter (required).
43
+ max_length : int | None
44
+ Maximum character limit. None means unlimited.
45
+ validation_pattern : str | None
46
+ Optional regex pattern for validation (validated at deployment).
47
+ min_length : int | None
48
+ Minimum characters required. None means no minimum.
49
+ multiline : bool
50
+ True for textarea (multiline), False for single-line input (default).
51
+ item_template_id : UUID | None
52
+ Template ID for the item. If None, generates new UUID.
53
+ metadata : dict[str, MetadataValue] | None
54
+ Additional metadata for item_metadata field.
55
+
56
+ Returns
57
+ -------
58
+ Item
59
+ Free text item with text and prompt in rendered_elements.
60
+
61
+ Raises
62
+ ------
63
+ ValueError
64
+ If text or prompt is empty, or if min_length > max_length.
65
+
66
+ Examples
67
+ --------
68
+ >>> item = create_free_text_item(
69
+ ... text="The dog chased the cat.",
70
+ ... prompt="Who chased whom?",
71
+ ... max_length=100
72
+ ... )
73
+ >>> item.rendered_elements["text"]
74
+ 'The dog chased the cat.'
75
+ >>> item.rendered_elements["prompt"]
76
+ 'Who chased whom?'
77
+ >>> item.item_metadata["max_length"]
78
+ 100
79
+
80
+ >>> # Multiline paraphrase task
81
+ >>> item = create_free_text_item(
82
+ ... text="The quick brown fox jumps over the lazy dog.",
83
+ ... prompt="Rewrite this sentence in your own words:",
84
+ ... multiline=True,
85
+ ... max_length=200
86
+ ... )
87
+ >>> item.item_metadata["multiline"]
88
+ True
89
+ """
90
+ if not text or not text.strip():
91
+ raise ValueError("text cannot be empty")
92
+
93
+ if not prompt or not prompt.strip():
94
+ raise ValueError("prompt is required for free text items")
95
+
96
+ # Validate length constraints
97
+ if min_length is not None and max_length is not None:
98
+ if min_length > max_length:
99
+ raise ValueError(
100
+ f"min_length ({min_length}) cannot be greater than "
101
+ f"max_length ({max_length})"
102
+ )
103
+
104
+ if item_template_id is None:
105
+ item_template_id = uuid4()
106
+
107
+ rendered_elements: dict[str, str] = {
108
+ "text": text,
109
+ "prompt": prompt,
110
+ }
111
+
112
+ # Build item metadata
113
+ item_metadata: dict[str, MetadataValue] = {
114
+ "multiline": multiline,
115
+ }
116
+
117
+ if max_length is not None:
118
+ item_metadata["max_length"] = max_length
119
+
120
+ if min_length is not None:
121
+ item_metadata["min_length"] = min_length
122
+
123
+ if validation_pattern is not None:
124
+ item_metadata["validation_pattern"] = validation_pattern
125
+
126
+ if metadata:
127
+ item_metadata.update(metadata)
128
+
129
+ return Item(
130
+ item_template_id=item_template_id,
131
+ rendered_elements=rendered_elements,
132
+ item_metadata=item_metadata,
133
+ )
134
+
135
+
136
+ def create_free_text_items_from_texts(
137
+ texts: list[str],
138
+ prompt: str,
139
+ max_length: int | None = None,
140
+ validation_pattern: str | None = None,
141
+ min_length: int | None = None,
142
+ multiline: bool = False,
143
+ *,
144
+ item_template_id: UUID | None = None,
145
+ metadata_fn: Callable[[str], dict[str, MetadataValue]] | None = None,
146
+ ) -> list[Item]:
147
+ """Create free text items from a list of texts with the same prompt.
148
+
149
+ Parameters
150
+ ----------
151
+ texts : list[str]
152
+ List of stimulus texts.
153
+ prompt : str
154
+ The question/instruction for all items (required).
155
+ max_length : int | None
156
+ Maximum character limit for all items.
157
+ validation_pattern : str | None
158
+ Optional regex pattern for validation.
159
+ min_length : int | None
160
+ Minimum characters required.
161
+ multiline : bool
162
+ True for textarea, False for single-line input.
163
+ item_template_id : UUID | None
164
+ Template ID for all created items. If None, generates one per item.
165
+ metadata_fn : Callable[[str], dict[str, MetadataValue]] | None
166
+ Function to generate metadata from each text.
167
+
168
+ Returns
169
+ -------
170
+ list[Item]
171
+ Free text items for each text.
172
+
173
+ Examples
174
+ --------
175
+ >>> texts = ["Sentence 1", "Sentence 2", "Sentence 3"]
176
+ >>> items = create_free_text_items_from_texts(
177
+ ... texts,
178
+ ... prompt="Paraphrase this:",
179
+ ... multiline=True,
180
+ ... max_length=200,
181
+ ... metadata_fn=lambda t: {"original_length": len(t)}
182
+ ... )
183
+ >>> len(items)
184
+ 3
185
+ >>> items[0].item_metadata["original_length"]
186
+ 10
187
+ """
188
+ free_text_items: list[Item] = []
189
+
190
+ for text in texts:
191
+ item_metadata: dict[str, MetadataValue] = {}
192
+ if metadata_fn:
193
+ item_metadata = metadata_fn(text)
194
+
195
+ item = create_free_text_item(
196
+ text=text,
197
+ prompt=prompt,
198
+ max_length=max_length,
199
+ validation_pattern=validation_pattern,
200
+ min_length=min_length,
201
+ multiline=multiline,
202
+ item_template_id=item_template_id,
203
+ metadata=item_metadata,
204
+ )
205
+ free_text_items.append(item)
206
+
207
+ return free_text_items
208
+
209
+
210
+ def create_free_text_items_with_context(
211
+ contexts: list[str],
212
+ prompts: list[str],
213
+ max_length: int | None = None,
214
+ validation_pattern: str | None = None,
215
+ min_length: int | None = None,
216
+ multiline: bool = False,
217
+ *,
218
+ item_template_id: UUID | None = None,
219
+ metadata_fn: (Callable[[str, str], dict[str, MetadataValue]] | None) = None,
220
+ ) -> list[Item]:
221
+ """Create free text items with context + prompt pairs.
222
+
223
+ Useful for reading comprehension, question answering where each context
224
+ has a specific question.
225
+
226
+ Parameters
227
+ ----------
228
+ contexts : list[str]
229
+ Context texts (same length as prompts).
230
+ prompts : list[str]
231
+ Prompts/questions for each context.
232
+ max_length : int | None
233
+ Maximum character limit for all items.
234
+ validation_pattern : str | None
235
+ Optional regex pattern for validation.
236
+ min_length : int | None
237
+ Minimum characters required.
238
+ multiline : bool
239
+ True for textarea, False for single-line input.
240
+ item_template_id : UUID | None
241
+ Template ID for all created items. If None, generates one per item.
242
+ metadata_fn : Callable[[str, str], dict[str, MetadataValue]] | None
243
+ Function to generate metadata from (context, prompt).
244
+
245
+ Returns
246
+ -------
247
+ list[Item]
248
+ Free text items with context + prompt structure.
249
+
250
+ Raises
251
+ ------
252
+ ValueError
253
+ If contexts and prompts have different lengths.
254
+
255
+ Examples
256
+ --------
257
+ >>> contexts = ["The cat sat on the mat."]
258
+ >>> prompts = ["What sat on the mat?"]
259
+ >>> items = create_free_text_items_with_context(
260
+ ... contexts,
261
+ ... prompts,
262
+ ... max_length=50
263
+ ... )
264
+ >>> len(items)
265
+ 1
266
+ >>> items[0].rendered_elements["text"]
267
+ 'The cat sat on the mat.'
268
+ >>> items[0].rendered_elements["prompt"]
269
+ 'What sat on the mat?'
270
+ """
271
+ if len(contexts) != len(prompts):
272
+ raise ValueError(
273
+ f"contexts and prompts must have same length "
274
+ f"(got {len(contexts)} and {len(prompts)})"
275
+ )
276
+
277
+ free_text_items: list[Item] = []
278
+
279
+ for context, prompt in zip(contexts, prompts, strict=True):
280
+ item_metadata: dict[str, MetadataValue] = {
281
+ "context": context,
282
+ }
283
+ if metadata_fn:
284
+ item_metadata.update(metadata_fn(context, prompt))
285
+
286
+ item = create_free_text_item(
287
+ text=context,
288
+ prompt=prompt,
289
+ max_length=max_length,
290
+ validation_pattern=validation_pattern,
291
+ min_length=min_length,
292
+ multiline=multiline,
293
+ item_template_id=item_template_id,
294
+ metadata=item_metadata,
295
+ )
296
+ free_text_items.append(item)
297
+
298
+ return free_text_items
299
+
300
+
301
+ def create_free_text_items_from_groups(
302
+ items: list[Item],
303
+ group_by: Callable[[Item], Any],
304
+ prompt: str,
305
+ max_length: int | None = None,
306
+ validation_pattern: str | None = None,
307
+ min_length: int | None = None,
308
+ multiline: bool = False,
309
+ *,
310
+ extract_text: Callable[[Item], str] | None = None,
311
+ include_group_metadata: bool = True,
312
+ item_template_id: UUID | None = None,
313
+ ) -> list[Item]:
314
+ """Create free text items from grouped source items.
315
+
316
+ Groups items and creates one free text item per source item,
317
+ preserving group information in metadata.
318
+
319
+ Parameters
320
+ ----------
321
+ items : list[Item]
322
+ Source items to process.
323
+ group_by : Callable[[Item], Any]
324
+ Function to extract grouping key from items.
325
+ prompt : str
326
+ The question/instruction for all items (required).
327
+ max_length : int | None
328
+ Maximum character limit.
329
+ validation_pattern : str | None
330
+ Optional regex pattern for validation.
331
+ min_length : int | None
332
+ Minimum characters required.
333
+ multiline : bool
334
+ True for textarea, False for single-line input.
335
+ extract_text : Callable[[Item], str] | None
336
+ Function to extract text from item. If None, tries common keys.
337
+ include_group_metadata : bool
338
+ Whether to include group key in item metadata.
339
+ item_template_id : UUID | None
340
+ Template ID for all created items. If None, generates one per item.
341
+
342
+ Returns
343
+ -------
344
+ list[Item]
345
+ Free text items from source items.
346
+
347
+ Examples
348
+ --------
349
+ >>> source_items = [
350
+ ... Item(
351
+ ... uuid4(),
352
+ ... rendered_elements={"text": "Sentence 1"},
353
+ ... item_metadata={"type": "simple"}
354
+ ... )
355
+ ... ]
356
+ >>> free_text_items = create_free_text_items_from_groups(
357
+ ... source_items,
358
+ ... group_by=lambda i: i.item_metadata["type"],
359
+ ... prompt="Paraphrase this:",
360
+ ... multiline=True
361
+ ... )
362
+ >>> len(free_text_items)
363
+ 1
364
+ """
365
+ # Group items
366
+ groups: dict[Any, list[Item]] = defaultdict(list)
367
+ for item in items:
368
+ group_key = group_by(item)
369
+ groups[group_key].append(item)
370
+
371
+ free_text_items: list[Item] = []
372
+
373
+ for group_key, group_items in groups.items():
374
+ for item in group_items:
375
+ # Extract text
376
+ if extract_text:
377
+ text: str = extract_text(item)
378
+ else:
379
+ text = _extract_text_from_item(item)
380
+
381
+ # Build metadata
382
+ item_metadata: dict[str, MetadataValue] = {
383
+ "source_item_id": str(item.id),
384
+ }
385
+ if include_group_metadata:
386
+ item_metadata["group_key"] = str(group_key)
387
+
388
+ # Create free text item
389
+ free_text_item = create_free_text_item(
390
+ text=text,
391
+ prompt=prompt,
392
+ max_length=max_length,
393
+ validation_pattern=validation_pattern,
394
+ min_length=min_length,
395
+ multiline=multiline,
396
+ item_template_id=item_template_id,
397
+ metadata=item_metadata,
398
+ )
399
+ free_text_items.append(free_text_item)
400
+
401
+ return free_text_items
402
+
403
+
404
+ def create_free_text_items_cross_product(
405
+ texts: list[str],
406
+ prompts: list[str],
407
+ max_length: int | None = None,
408
+ validation_pattern: str | None = None,
409
+ min_length: int | None = None,
410
+ multiline: bool = False,
411
+ *,
412
+ item_template_id: UUID | None = None,
413
+ metadata_fn: (Callable[[str, str], dict[str, MetadataValue]] | None) = None,
414
+ ) -> list[Item]:
415
+ """Create free text items from cross-product of texts and prompts.
416
+
417
+ Useful when you want to apply multiple prompts to each text.
418
+
419
+ Parameters
420
+ ----------
421
+ texts : list[str]
422
+ List of stimulus texts.
423
+ prompts : list[str]
424
+ List of prompts to apply.
425
+ max_length : int | None
426
+ Maximum character limit for all items.
427
+ validation_pattern : str | None
428
+ Optional regex pattern for validation.
429
+ min_length : int | None
430
+ Minimum characters required.
431
+ multiline : bool
432
+ True for textarea, False for single-line input.
433
+ item_template_id : UUID | None
434
+ Template ID for all created items.
435
+ metadata_fn : Callable[[str, str], dict[str, MetadataValue]] | None
436
+ Function to generate metadata from (text, prompt).
437
+
438
+ Returns
439
+ -------
440
+ list[Item]
441
+ Free text items from cross-product.
442
+
443
+ Examples
444
+ --------
445
+ >>> texts = ["Sentence 1", "Sentence 2"]
446
+ >>> prompts = ["Paraphrase this:", "Summarize this:"]
447
+ >>> items = create_free_text_items_cross_product(
448
+ ... texts, prompts, multiline=True, max_length=200
449
+ ... )
450
+ >>> len(items)
451
+ 4
452
+ """
453
+ free_text_items: list[Item] = []
454
+
455
+ for text, prompt in product(texts, prompts):
456
+ item_metadata: dict[str, MetadataValue] = {}
457
+ if metadata_fn:
458
+ item_metadata = metadata_fn(text, prompt)
459
+
460
+ item = create_free_text_item(
461
+ text=text,
462
+ prompt=prompt,
463
+ max_length=max_length,
464
+ validation_pattern=validation_pattern,
465
+ min_length=min_length,
466
+ multiline=multiline,
467
+ item_template_id=item_template_id,
468
+ metadata=item_metadata,
469
+ )
470
+ free_text_items.append(item)
471
+
472
+ return free_text_items
473
+
474
+
475
+ def create_filtered_free_text_items(
476
+ items: list[Item],
477
+ prompt: str,
478
+ max_length: int | None = None,
479
+ validation_pattern: str | None = None,
480
+ min_length: int | None = None,
481
+ multiline: bool = False,
482
+ *,
483
+ item_filter: Callable[[Item], bool] | None = None,
484
+ extract_text: Callable[[Item], str] | None = None,
485
+ item_template_id: UUID | None = None,
486
+ ) -> list[Item]:
487
+ """Create free text items with filtering.
488
+
489
+ Parameters
490
+ ----------
491
+ items : list[Item]
492
+ Source items.
493
+ prompt : str
494
+ The question/instruction for all items (required).
495
+ max_length : int | None
496
+ Maximum character limit.
497
+ validation_pattern : str | None
498
+ Optional regex pattern for validation.
499
+ min_length : int | None
500
+ Minimum characters required.
501
+ multiline : bool
502
+ True for textarea, False for single-line input.
503
+ item_filter : Callable[[Item], bool] | None
504
+ Filter individual items.
505
+ extract_text : Callable[[Item], str] | None
506
+ Text extraction function.
507
+ item_template_id : UUID | None
508
+ Template ID for created items.
509
+
510
+ Returns
511
+ -------
512
+ list[Item]
513
+ Filtered free text items.
514
+
515
+ Examples
516
+ --------
517
+ >>> free_text_items = create_filtered_free_text_items(
518
+ ... items,
519
+ ... prompt="Paraphrase this:",
520
+ ... multiline=True,
521
+ ... item_filter=lambda i: i.item_metadata.get("valid", True)
522
+ ... ) # doctest: +SKIP
523
+ """
524
+ # Filter items
525
+ filtered_items = items
526
+ if item_filter:
527
+ filtered_items = [item for item in items if item_filter(item)]
528
+
529
+ free_text_items: list[Item] = []
530
+
531
+ for item in filtered_items:
532
+ # Extract text
533
+ if extract_text:
534
+ text: str = extract_text(item)
535
+ else:
536
+ text = _extract_text_from_item(item)
537
+
538
+ # Create free text item
539
+ item_metadata: dict[str, MetadataValue] = {
540
+ "source_item_id": str(item.id),
541
+ }
542
+
543
+ free_text_item = create_free_text_item(
544
+ text=text,
545
+ prompt=prompt,
546
+ max_length=max_length,
547
+ validation_pattern=validation_pattern,
548
+ min_length=min_length,
549
+ multiline=multiline,
550
+ item_template_id=item_template_id,
551
+ metadata=item_metadata,
552
+ )
553
+ free_text_items.append(free_text_item)
554
+
555
+ return free_text_items
556
+
557
+
558
+ def create_paraphrase_item(
559
+ text: str,
560
+ instruction: str = "Rewrite in your own words:",
561
+ item_template_id: UUID | None = None,
562
+ metadata: dict[str, MetadataValue] | None = None,
563
+ ) -> Item:
564
+ """Create a paraphrase generation item.
565
+
566
+ Convenience function for paraphrase tasks with multiline input.
567
+
568
+ Parameters
569
+ ----------
570
+ text : str
571
+ The text to paraphrase.
572
+ instruction : str
573
+ The instruction for paraphrasing (default: "Rewrite in your own words:").
574
+ item_template_id : UUID | None
575
+ Template ID for the item. If None, generates new UUID.
576
+ metadata : dict[str, MetadataValue] | None
577
+ Additional metadata for item_metadata field.
578
+
579
+ Returns
580
+ -------
581
+ Item
582
+ Paraphrase free text item.
583
+
584
+ Examples
585
+ --------
586
+ >>> item = create_paraphrase_item(
587
+ ... "The quick brown fox jumps over the lazy dog."
588
+ ... )
589
+ >>> item.rendered_elements["prompt"]
590
+ 'Rewrite in your own words:'
591
+ >>> item.item_metadata["multiline"]
592
+ True
593
+ """
594
+ return create_free_text_item(
595
+ text,
596
+ prompt=instruction,
597
+ multiline=True,
598
+ max_length=500,
599
+ item_template_id=item_template_id,
600
+ metadata=metadata,
601
+ )
602
+
603
+
604
+ def create_wh_question_item(
605
+ text: str,
606
+ question_word: str = "Who",
607
+ item_template_id: UUID | None = None,
608
+ metadata: dict[str, MetadataValue] | None = None,
609
+ ) -> Item:
610
+ """Create a WH-question answering item.
611
+
612
+ Convenience function for WH-question answering with short text input.
613
+
614
+ Parameters
615
+ ----------
616
+ text : str
617
+ The context/passage for the question.
618
+ question_word : str
619
+ The question word to use (default: "Who").
620
+ item_template_id : UUID | None
621
+ Template ID for the item. If None, generates new UUID.
622
+ metadata : dict[str, MetadataValue] | None
623
+ Additional metadata for item_metadata field.
624
+
625
+ Returns
626
+ -------
627
+ Item
628
+ WH-question free text item.
629
+
630
+ Examples
631
+ --------
632
+ >>> item = create_wh_question_item(
633
+ ... "The dog chased the cat.",
634
+ ... question_word="What"
635
+ ... )
636
+ >>> "What" in item.rendered_elements["prompt"]
637
+ True
638
+ >>> item.item_metadata["max_length"]
639
+ 100
640
+ """
641
+ return create_free_text_item(
642
+ text,
643
+ prompt=f"{question_word} question answering:",
644
+ multiline=False,
645
+ max_length=100,
646
+ item_template_id=item_template_id,
647
+ metadata=metadata,
648
+ )
649
+
650
+
651
+ def _extract_text_from_item(item: Item) -> str:
652
+ """Extract text from item's rendered_elements.
653
+
654
+ Tries common keys: "text", "sentence", "content".
655
+ Raises error if no suitable text found.
656
+
657
+ Parameters
658
+ ----------
659
+ item : Item
660
+ Item to extract text from.
661
+
662
+ Returns
663
+ -------
664
+ str
665
+ Extracted text.
666
+
667
+ Raises
668
+ ------
669
+ ValueError
670
+ If no suitable text key found in rendered_elements.
671
+ """
672
+ for key in ["text", "sentence", "content"]:
673
+ if key in item.rendered_elements:
674
+ return item.rendered_elements[key]
675
+
676
+ raise ValueError(
677
+ f"Cannot extract text from item {item.id}. "
678
+ f"Expected one of ['text', 'sentence', 'content'] in rendered_elements, "
679
+ f"but found keys: {list(item.rendered_elements.keys())}. "
680
+ f"Use the extract_text parameter to provide a custom extraction function."
681
+ )