bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,593 @@
1
+ """Utilities for creating categorical experimental items.
2
+
3
+ This module provides language-agnostic utilities for creating categorical
4
+ items where participants select from N unordered categories (e.g., NLI labels,
5
+ POS tags, semantic relations).
6
+
7
+ Integration Points
8
+ ------------------
9
+ - Active Learning: bead/active_learning/models/categorical.py
10
+ - Simulation: bead/simulation/strategies/categorical.py
11
+ - Deployment: bead/deployment/jspsych/ (dropdown or radio buttons)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections import defaultdict
17
+ from collections.abc import Callable, Hashable
18
+ from itertools import product
19
+ from uuid import UUID, uuid4
20
+
21
+ from bead.items.item import Item, MetadataValue
22
+
23
+
24
+ def create_categorical_item(
25
+ text: str,
26
+ categories: list[str],
27
+ prompt: str | None = None,
28
+ item_template_id: UUID | None = None,
29
+ metadata: dict[str, MetadataValue] | None = None,
30
+ ) -> Item:
31
+ """Create a categorical classification item.
32
+
33
+ Parameters
34
+ ----------
35
+ text : str
36
+ The stimulus text to classify.
37
+ categories : list[str]
38
+ List of category labels (unordered). Must have at least 2 categories.
39
+ prompt : str | None
40
+ Optional question/prompt for the classification.
41
+ If None, uses "Select a category:".
42
+ item_template_id : UUID | None
43
+ Template ID for the item. If None, generates new UUID.
44
+ metadata : dict[str, MetadataValue] | None
45
+ Additional metadata for item_metadata field.
46
+
47
+ Returns
48
+ -------
49
+ Item
50
+ Categorical item with text and prompt in rendered_elements.
51
+
52
+ Raises
53
+ ------
54
+ ValueError
55
+ If text is empty or if fewer than 2 categories provided.
56
+
57
+ Examples
58
+ --------
59
+ >>> item = create_categorical_item(
60
+ ... text="Premise: All dogs bark. Hypothesis: Some dogs bark.",
61
+ ... categories=["entailment", "neutral", "contradiction"],
62
+ ... prompt="What is the relationship?",
63
+ ... metadata={"task": "nli"}
64
+ ... )
65
+ >>> item.rendered_elements["text"]
66
+ 'Premise: All dogs bark. Hypothesis: Some dogs bark.'
67
+ >>> item.rendered_elements["prompt"]
68
+ 'What is the relationship?'
69
+ >>> item.item_metadata["categories"]
70
+ ['entailment', 'neutral', 'contradiction']
71
+
72
+ >>> # POS tagging
73
+ >>> item = create_categorical_item(
74
+ ... text="The cat sat on the mat.",
75
+ ... categories=["noun", "verb", "adjective", "determiner", "preposition"],
76
+ ... prompt="What is the part of speech of 'cat'?"
77
+ ... )
78
+ >>> len(item.item_metadata["categories"])
79
+ 5
80
+ """
81
+ if not text or not text.strip():
82
+ raise ValueError("text cannot be empty")
83
+
84
+ if len(categories) < 2:
85
+ raise ValueError("At least 2 categories required for categorical item")
86
+
87
+ if item_template_id is None:
88
+ item_template_id = uuid4()
89
+
90
+ if prompt is None:
91
+ prompt = "Select a category:"
92
+
93
+ rendered_elements: dict[str, str] = {
94
+ "text": text,
95
+ "prompt": prompt,
96
+ }
97
+
98
+ # Build item metadata
99
+ item_metadata: dict[str, MetadataValue] = {
100
+ "categories": list(categories),
101
+ }
102
+ if metadata:
103
+ item_metadata.update(metadata)
104
+
105
+ return Item(
106
+ item_template_id=item_template_id,
107
+ rendered_elements=rendered_elements,
108
+ item_metadata=item_metadata,
109
+ )
110
+
111
+
112
+ def create_nli_item(
113
+ premise: str,
114
+ hypothesis: str,
115
+ categories: list[str] | None = None,
116
+ prompt: str | None = None,
117
+ item_template_id: UUID | None = None,
118
+ metadata: dict[str, MetadataValue] | None = None,
119
+ ) -> Item:
120
+ """Create a Natural Language Inference (NLI) item.
121
+
122
+ Specialized helper for NLI tasks with automatic formatting and default
123
+ categories.
124
+
125
+ Parameters
126
+ ----------
127
+ premise : str
128
+ The premise text.
129
+ hypothesis : str
130
+ The hypothesis text.
131
+ categories : list[str] | None
132
+ Category labels. If None, uses ["entailment", "neutral", "contradiction"].
133
+ prompt : str | None
134
+ Question/prompt. If None, uses "What is the relationship?".
135
+ item_template_id : UUID | None
136
+ Template ID for the item. If None, generates new UUID.
137
+ metadata : dict[str, MetadataValue] | None
138
+ Additional metadata for item_metadata field.
139
+
140
+ Returns
141
+ -------
142
+ Item
143
+ NLI categorical item.
144
+
145
+ Examples
146
+ --------
147
+ >>> item = create_nli_item(
148
+ ... premise="All dogs bark.",
149
+ ... hypothesis="Some dogs bark."
150
+ ... )
151
+ >>> "Premise:" in item.rendered_elements["text"]
152
+ True
153
+ >>> "Hypothesis:" in item.rendered_elements["text"]
154
+ True
155
+ >>> item.item_metadata["categories"]
156
+ ['entailment', 'neutral', 'contradiction']
157
+ >>> item.item_metadata["premise"]
158
+ 'All dogs bark.'
159
+
160
+ >>> # Custom categories
161
+ >>> item = create_nli_item(
162
+ ... premise="The cat is on the mat.",
163
+ ... hypothesis="There is an animal on the mat.",
164
+ ... categories=["entails", "contradicts", "neither"]
165
+ ... )
166
+ >>> item.item_metadata["categories"]
167
+ ['entails', 'contradicts', 'neither']
168
+ """
169
+ if categories is None:
170
+ categories = ["entailment", "neutral", "contradiction"]
171
+
172
+ if prompt is None:
173
+ prompt = "What is the relationship?"
174
+
175
+ # Format as premise-hypothesis pair
176
+ combined_text = f"Premise: {premise}\nHypothesis: {hypothesis}"
177
+
178
+ # Build metadata with premise and hypothesis
179
+ nli_metadata: dict[str, MetadataValue] = {
180
+ "premise": premise,
181
+ "hypothesis": hypothesis,
182
+ "task": "nli",
183
+ }
184
+ if metadata:
185
+ nli_metadata.update(metadata)
186
+
187
+ return create_categorical_item(
188
+ text=combined_text,
189
+ categories=categories,
190
+ prompt=prompt,
191
+ item_template_id=item_template_id,
192
+ metadata=nli_metadata,
193
+ )
194
+
195
+
196
+ def create_categorical_items_from_texts(
197
+ texts: list[str],
198
+ categories: list[str],
199
+ prompt: str | None = None,
200
+ *,
201
+ item_template_id: UUID | None = None,
202
+ metadata_fn: Callable[[str], dict[str, MetadataValue]] | None = None,
203
+ ) -> list[Item]:
204
+ """Create categorical items from a list of texts with the same categories.
205
+
206
+ Parameters
207
+ ----------
208
+ texts : list[str]
209
+ List of stimulus texts.
210
+ categories : list[str]
211
+ Category labels for all items.
212
+ prompt : str | None
213
+ The question/prompt for all items.
214
+ item_template_id : UUID | None
215
+ Template ID for all created items. If None, generates one per item.
216
+ metadata_fn : Callable[[str], dict[str, MetadataValue]] | None
217
+ Function to generate metadata from each text.
218
+
219
+ Returns
220
+ -------
221
+ list[Item]
222
+ Categorical items for each text.
223
+
224
+ Examples
225
+ --------
226
+ >>> texts = ["The cat sat.", "The dog ran.", "The bird flew."]
227
+ >>> categories = ["past", "present", "future"]
228
+ >>> items = create_categorical_items_from_texts(
229
+ ... texts,
230
+ ... categories=categories,
231
+ ... prompt="What is the tense?"
232
+ ... )
233
+ >>> len(items)
234
+ 3
235
+ >>> items[0].item_metadata["categories"]
236
+ ['past', 'present', 'future']
237
+ """
238
+ categorical_items: list[Item] = []
239
+
240
+ for text in texts:
241
+ metadata: dict[str, MetadataValue] = {}
242
+ if metadata_fn:
243
+ metadata = metadata_fn(text)
244
+
245
+ item = create_categorical_item(
246
+ text=text,
247
+ categories=categories,
248
+ prompt=prompt,
249
+ item_template_id=item_template_id,
250
+ metadata=metadata,
251
+ )
252
+ categorical_items.append(item)
253
+
254
+ return categorical_items
255
+
256
+
257
+ def create_categorical_items_from_pairs(
258
+ pairs: list[tuple[str, str]],
259
+ categories: list[str],
260
+ prompt: str | None = None,
261
+ *,
262
+ pair_label1: str = "Text 1",
263
+ pair_label2: str = "Text 2",
264
+ item_template_id: UUID | None = None,
265
+ metadata_fn: (Callable[[str, str], dict[str, MetadataValue]] | None) = None,
266
+ ) -> list[Item]:
267
+ """Create categorical items from pairs of texts.
268
+
269
+ Useful for NLI, paraphrase detection, semantic similarity, etc.
270
+
271
+ Parameters
272
+ ----------
273
+ pairs : list[tuple[str, str]]
274
+ List of (text1, text2) pairs.
275
+ categories : list[str]
276
+ Category labels for all items.
277
+ prompt : str | None
278
+ The question/prompt for all items.
279
+ pair_label1 : str
280
+ Label for first text in pair (default: "Text 1").
281
+ pair_label2 : str
282
+ Label for second text in pair (default: "Text 2").
283
+ item_template_id : UUID | None
284
+ Template ID for all created items. If None, generates one per item.
285
+ metadata_fn : Callable[[str, str], dict[str, MetadataValue]] | None
286
+ Function to generate metadata from (text1, text2).
287
+
288
+ Returns
289
+ -------
290
+ list[Item]
291
+ Categorical items from pairs.
292
+
293
+ Examples
294
+ --------
295
+ >>> pairs = [
296
+ ... ("All dogs bark.", "Some dogs bark."),
297
+ ... ("The sky is blue.", "The sky is not blue.")
298
+ ... ]
299
+ >>> items = create_categorical_items_from_pairs(
300
+ ... pairs,
301
+ ... categories=["entailment", "neutral", "contradiction"],
302
+ ... prompt="What is the relationship?",
303
+ ... pair_label1="Premise",
304
+ ... pair_label2="Hypothesis"
305
+ ... )
306
+ >>> len(items)
307
+ 2
308
+ >>> "Premise:" in items[0].rendered_elements["text"]
309
+ True
310
+ """
311
+ categorical_items: list[Item] = []
312
+
313
+ for text1, text2 in pairs:
314
+ # Combine pairs into single text
315
+ combined_text = f"{pair_label1}: {text1}\n{pair_label2}: {text2}"
316
+
317
+ metadata: dict[str, MetadataValue] = {
318
+ "text1": text1,
319
+ "text2": text2,
320
+ }
321
+ if metadata_fn:
322
+ metadata.update(metadata_fn(text1, text2))
323
+
324
+ item = create_categorical_item(
325
+ text=combined_text,
326
+ categories=categories,
327
+ prompt=prompt,
328
+ item_template_id=item_template_id,
329
+ metadata=metadata,
330
+ )
331
+ categorical_items.append(item)
332
+
333
+ return categorical_items
334
+
335
+
336
+ def create_categorical_items_from_groups(
337
+ items: list[Item],
338
+ group_by: Callable[[Item], Hashable],
339
+ categories: list[str],
340
+ prompt: str | None = None,
341
+ *,
342
+ extract_text: Callable[[Item], str] | None = None,
343
+ include_group_metadata: bool = True,
344
+ item_template_id: UUID | None = None,
345
+ ) -> list[Item]:
346
+ """Create categorical items from grouped source items.
347
+
348
+ Groups items and creates one categorical item per source item, preserving
349
+ group information in metadata.
350
+
351
+ Parameters
352
+ ----------
353
+ items : list[Item]
354
+ Source items to process.
355
+ group_by : Callable[[Item], Hashable]
356
+ Function to extract grouping key from items.
357
+ categories : list[str]
358
+ Category labels for all items.
359
+ prompt : str | None
360
+ The question/prompt for all items.
361
+ extract_text : Callable[[Item], str] | None
362
+ Function to extract text from item. If None, tries common keys.
363
+ include_group_metadata : bool
364
+ Whether to include group key in item metadata.
365
+ item_template_id : UUID | None
366
+ Template ID for all created items. If None, generates one per item.
367
+
368
+ Returns
369
+ -------
370
+ list[Item]
371
+ Categorical items from source items.
372
+
373
+ Examples
374
+ --------
375
+ >>> source_items = [
376
+ ... Item(
377
+ ... uuid4(),
378
+ ... rendered_elements={"text": "The cat sat."},
379
+ ... item_metadata={"tense": "past"}
380
+ ... ),
381
+ ... Item(
382
+ ... uuid4(),
383
+ ... rendered_elements={"text": "The dog runs."},
384
+ ... item_metadata={"tense": "present"}
385
+ ... )
386
+ ... ]
387
+ >>> categorical_items = create_categorical_items_from_groups(
388
+ ... source_items,
389
+ ... group_by=lambda i: i.item_metadata["tense"],
390
+ ... categories=["past", "present", "future"],
391
+ ... prompt="What is the tense?"
392
+ ... )
393
+ >>> len(categorical_items)
394
+ 2
395
+ """
396
+ # Group items
397
+ groups: dict[Hashable, list[Item]] = defaultdict(list)
398
+ for item in items:
399
+ group_key = group_by(item)
400
+ groups[group_key].append(item)
401
+
402
+ categorical_items: list[Item] = []
403
+
404
+ for group_key, group_items in groups.items():
405
+ for item in group_items:
406
+ # Extract text
407
+ if extract_text:
408
+ text: str = extract_text(item)
409
+ else:
410
+ text = _extract_text_from_item(item)
411
+
412
+ # Build metadata
413
+ metadata: dict[str, MetadataValue] = {
414
+ "source_item_id": str(item.id),
415
+ }
416
+ if include_group_metadata:
417
+ metadata["group_key"] = str(group_key)
418
+
419
+ # Create categorical item
420
+ categorical_item = create_categorical_item(
421
+ text=text,
422
+ categories=categories,
423
+ prompt=prompt,
424
+ item_template_id=item_template_id,
425
+ metadata=metadata,
426
+ )
427
+ categorical_items.append(categorical_item)
428
+
429
+ return categorical_items
430
+
431
+
432
+ def create_categorical_items_cross_product(
433
+ texts: list[str],
434
+ prompts: list[str],
435
+ categories: list[str],
436
+ *,
437
+ item_template_id: UUID | None = None,
438
+ metadata_fn: (Callable[[str, str], dict[str, MetadataValue]] | None) = None,
439
+ ) -> list[Item]:
440
+ """Create categorical items from cross-product of texts and prompts.
441
+
442
+ Useful when you want to apply multiple prompts to each text.
443
+
444
+ Parameters
445
+ ----------
446
+ texts : list[str]
447
+ List of stimulus texts.
448
+ prompts : list[str]
449
+ List of prompts to apply.
450
+ categories : list[str]
451
+ Category labels for all items.
452
+ item_template_id : UUID | None
453
+ Template ID for all created items.
454
+ metadata_fn : Callable[[str, str], dict[str, MetadataValue]] | None
455
+ Function to generate metadata from (text, prompt).
456
+
457
+ Returns
458
+ -------
459
+ list[Item]
460
+ Categorical items from cross-product.
461
+
462
+ Examples
463
+ --------
464
+ >>> texts = ["The cat sat.", "The dog ran."]
465
+ >>> prompts = ["What is the tense?", "What is the aspect?"]
466
+ >>> categories = ["past", "present", "future"]
467
+ >>> items = create_categorical_items_cross_product(
468
+ ... texts, prompts, categories
469
+ ... )
470
+ >>> len(items)
471
+ 4
472
+ """
473
+ categorical_items: list[Item] = []
474
+
475
+ for text, prompt in product(texts, prompts):
476
+ metadata: dict[str, MetadataValue] = {}
477
+ if metadata_fn:
478
+ metadata = metadata_fn(text, prompt)
479
+
480
+ item = create_categorical_item(
481
+ text=text,
482
+ categories=categories,
483
+ prompt=prompt,
484
+ item_template_id=item_template_id,
485
+ metadata=metadata,
486
+ )
487
+ categorical_items.append(item)
488
+
489
+ return categorical_items
490
+
491
+
492
+ def create_filtered_categorical_items(
493
+ items: list[Item],
494
+ categories: list[str],
495
+ prompt: str | None = None,
496
+ *,
497
+ item_filter: Callable[[Item], bool] | None = None,
498
+ extract_text: Callable[[Item], str] | None = None,
499
+ item_template_id: UUID | None = None,
500
+ ) -> list[Item]:
501
+ """Create categorical items with filtering.
502
+
503
+ Parameters
504
+ ----------
505
+ items : list[Item]
506
+ Source items.
507
+ categories : list[str]
508
+ Category labels for all items.
509
+ prompt : str | None
510
+ The question/prompt for all items.
511
+ item_filter : Callable[[Item], bool] | None
512
+ Filter individual items.
513
+ extract_text : Callable[[Item], str] | None
514
+ Text extraction function.
515
+ item_template_id : UUID | None
516
+ Template ID for created items.
517
+
518
+ Returns
519
+ -------
520
+ list[Item]
521
+ Filtered categorical items.
522
+
523
+ Examples
524
+ --------
525
+ >>> categorical_items = create_filtered_categorical_items(
526
+ ... items,
527
+ ... categories=["past", "present", "future"],
528
+ ... prompt="What is the tense?",
529
+ ... item_filter=lambda i: i.item_metadata.get("valid", True)
530
+ ... ) # doctest: +SKIP
531
+ """
532
+ # Filter items
533
+ filtered_items = items
534
+ if item_filter:
535
+ filtered_items = [item for item in items if item_filter(item)]
536
+
537
+ categorical_items: list[Item] = []
538
+
539
+ for item in filtered_items:
540
+ # Extract text
541
+ if extract_text:
542
+ text: str = extract_text(item)
543
+ else:
544
+ text = _extract_text_from_item(item)
545
+
546
+ # Create categorical item
547
+ metadata: dict[str, MetadataValue] = {
548
+ "source_item_id": str(item.id),
549
+ }
550
+
551
+ categorical_item = create_categorical_item(
552
+ text=text,
553
+ categories=categories,
554
+ prompt=prompt,
555
+ item_template_id=item_template_id,
556
+ metadata=metadata,
557
+ )
558
+ categorical_items.append(categorical_item)
559
+
560
+ return categorical_items
561
+
562
+
563
+ def _extract_text_from_item(item: Item) -> str:
564
+ """Extract text from item's rendered_elements.
565
+
566
+ Tries common keys: "text", "sentence", "content".
567
+ Raises error if no suitable text found.
568
+
569
+ Parameters
570
+ ----------
571
+ item : Item
572
+ Item to extract text from.
573
+
574
+ Returns
575
+ -------
576
+ str
577
+ Extracted text.
578
+
579
+ Raises
580
+ ------
581
+ ValueError
582
+ If no suitable text key found in rendered_elements.
583
+ """
584
+ for key in ["text", "sentence", "content"]:
585
+ if key in item.rendered_elements:
586
+ return item.rendered_elements[key]
587
+
588
+ raise ValueError(
589
+ f"Cannot extract text from item {item.id}. "
590
+ f"Expected one of ['text', 'sentence', 'content'] in rendered_elements, "
591
+ f"but found keys: {list(item.rendered_elements.keys())}. "
592
+ f"Use the extract_text parameter to provide a custom extraction function."
593
+ )