bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,707 @@
1
+ """Template collection management.
2
+
3
+ This module provides the TemplateCollection class for managing collections
4
+ of sentence templates.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from collections.abc import Callable, Iterator
11
+ from pathlib import Path
12
+ from typing import Literal
13
+ from uuid import UUID
14
+
15
+ import pandas as pd
16
+ import polars as pl
17
+ from pydantic import Field
18
+
19
+ from bead.data.base import BeadBaseModel
20
+ from bead.resources.template import Template
21
+
22
+ # Type alias for supported DataFrame types
23
+ DataFrame = pd.DataFrame | pl.DataFrame
24
+
25
+
26
+ def _empty_str_list() -> list[str]:
27
+ """Create an empty string list."""
28
+ return []
29
+
30
+
31
+ def _empty_template_dict() -> dict[UUID, Template]:
32
+ """Create an empty template dictionary."""
33
+ return {}
34
+
35
+
36
+ class TemplateCollection(BeadBaseModel):
37
+ """A collection of templates with operations for filtering and analysis.
38
+
39
+ Similar to Lexicon but for Template objects. The TemplateCollection class
40
+ manages collections of Template objects and provides methods for:
41
+ - Adding and removing templates (CRUD operations)
42
+ - Filtering by properties and tags
43
+ - Searching by name or template string
44
+ - Merging with other collections
45
+ - Converting to/from pandas and polars DataFrames
46
+ - Serialization to JSONLines
47
+
48
+ Attributes
49
+ ----------
50
+ name : str
51
+ Name of the collection.
52
+ description : str | None
53
+ Optional description.
54
+ language_code : str | None
55
+ ISO 639-1 or 639-3 language code (e.g., "en", "es", "eng").
56
+ templates : dict[UUID, Template]
57
+ Dictionary of templates indexed by their UUIDs.
58
+ tags : list[str]
59
+ Tags for categorization.
60
+
61
+ Examples
62
+ --------
63
+ >>> from bead.resources import Slot
64
+ >>> collection = TemplateCollection(name="transitive")
65
+ >>> template = Template(
66
+ ... name="simple",
67
+ ... template_string="{subject} {verb} {object}.",
68
+ ... slots={
69
+ ... "subject": Slot(name="subject"),
70
+ ... "verb": Slot(name="verb"),
71
+ ... "object": Slot(name="object"),
72
+ ... }
73
+ ... )
74
+ >>> collection.add(template)
75
+ >>> len(collection)
76
+ 1
77
+ """
78
+
79
+ name: str
80
+ description: str | None = None
81
+ language_code: str | None = None
82
+ templates: dict[UUID, Template] = Field(default_factory=_empty_template_dict)
83
+ tags: list[str] = Field(default_factory=_empty_str_list)
84
+
85
+ def __len__(self) -> int:
86
+ """Return number of templates in collection.
87
+
88
+ Returns
89
+ -------
90
+ int
91
+ Number of templates in the collection.
92
+
93
+ Examples
94
+ --------
95
+ >>> collection = TemplateCollection(name="test")
96
+ >>> len(collection)
97
+ 0
98
+ """
99
+ return len(self.templates)
100
+
101
+ def __iter__(self) -> Iterator[Template]: # type: ignore[override]
102
+ """Iterate over templates in collection.
103
+
104
+ Returns
105
+ -------
106
+ Iterator[Template]
107
+ Iterator over templates.
108
+
109
+ Examples
110
+ --------
111
+ >>> from bead.resources import Slot
112
+ >>> collection = TemplateCollection(name="test")
113
+ >>> t1 = Template(
114
+ ... name="t1", template_string="{x}.", slots={"x": Slot(name="x")}
115
+ ... )
116
+ >>> t2 = Template(
117
+ ... name="t2", template_string="{y}.", slots={"y": Slot(name="y")}
118
+ ... )
119
+ >>> collection.add(t1)
120
+ >>> collection.add(t2)
121
+ >>> [t.name for t in collection]
122
+ ['t1', 't2']
123
+ """
124
+ return iter(self.templates.values())
125
+
126
+ def __contains__(self, template_id: UUID) -> bool:
127
+ """Check if template ID is in collection.
128
+
129
+ Parameters
130
+ ----------
131
+ template_id : UUID
132
+ The template ID to check.
133
+
134
+ Returns
135
+ -------
136
+ bool
137
+ True if template ID exists in collection.
138
+
139
+ Examples
140
+ --------
141
+ >>> from bead.resources import Slot
142
+ >>> collection = TemplateCollection(name="test")
143
+ >>> template = Template(
144
+ ... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
145
+ ... )
146
+ >>> collection.add(template)
147
+ >>> template.id in collection
148
+ True
149
+ """
150
+ return template_id in self.templates
151
+
152
+ def add(self, template: Template) -> None:
153
+ """Add a template to the collection.
154
+
155
+ Parameters
156
+ ----------
157
+ template : Template
158
+ The template to add.
159
+
160
+ Raises
161
+ ------
162
+ ValueError
163
+ If template with same ID already exists.
164
+
165
+ Examples
166
+ --------
167
+ >>> from bead.resources import Slot
168
+ >>> collection = TemplateCollection(name="test")
169
+ >>> template = Template(
170
+ ... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
171
+ ... )
172
+ >>> collection.add(template)
173
+ >>> len(collection)
174
+ 1
175
+ """
176
+ if template.id in self.templates:
177
+ raise ValueError(
178
+ f"Template with ID {template.id} already exists in collection"
179
+ )
180
+ self.templates[template.id] = template
181
+ self.update_modified_time()
182
+
183
+ def add_many(self, templates: list[Template]) -> None:
184
+ """Add multiple templates to the collection.
185
+
186
+ Parameters
187
+ ----------
188
+ templates : list[Template]
189
+ The templates to add.
190
+
191
+ Raises
192
+ ------
193
+ ValueError
194
+ If any template with same ID already exists.
195
+
196
+ Examples
197
+ --------
198
+ >>> from bead.resources import Slot
199
+ >>> collection = TemplateCollection(name="test")
200
+ >>> t1 = Template(
201
+ ... name="t1", template_string="{x}.", slots={"x": Slot(name="x")}
202
+ ... )
203
+ >>> t2 = Template(
204
+ ... name="t2", template_string="{y}.", slots={"y": Slot(name="y")}
205
+ ... )
206
+ >>> collection.add_many([t1, t2])
207
+ >>> len(collection)
208
+ 2
209
+ """
210
+ for template in templates:
211
+ self.add(template)
212
+
213
+ def remove(self, template_id: UUID) -> Template:
214
+ """Remove and return a template by ID.
215
+
216
+ Parameters
217
+ ----------
218
+ template_id : UUID
219
+ The ID of the template to remove.
220
+
221
+ Returns
222
+ -------
223
+ Template
224
+ The removed template.
225
+
226
+ Raises
227
+ ------
228
+ KeyError
229
+ If template ID not found.
230
+
231
+ Examples
232
+ --------
233
+ >>> from bead.resources import Slot
234
+ >>> collection = TemplateCollection(name="test")
235
+ >>> template = Template(
236
+ ... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
237
+ ... )
238
+ >>> collection.add(template)
239
+ >>> removed = collection.remove(template.id)
240
+ >>> removed.name
241
+ 'test'
242
+ >>> len(collection)
243
+ 0
244
+ """
245
+ if template_id not in self.templates:
246
+ raise KeyError(f"Template with ID {template_id} not found in collection")
247
+ template = self.templates.pop(template_id)
248
+ self.update_modified_time()
249
+ return template
250
+
251
+ def get(self, template_id: UUID) -> Template | None:
252
+ """Get a template by ID, or None if not found.
253
+
254
+ Parameters
255
+ ----------
256
+ template_id : UUID
257
+ The ID of the template to get.
258
+
259
+ Returns
260
+ -------
261
+ Template | None
262
+ The template if found, None otherwise.
263
+
264
+ Examples
265
+ --------
266
+ >>> from bead.resources import Slot
267
+ >>> collection = TemplateCollection(name="test")
268
+ >>> template = Template(
269
+ ... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
270
+ ... )
271
+ >>> collection.add(template)
272
+ >>> retrieved = collection.get(template.id)
273
+ >>> retrieved.name # doctest: +SKIP
274
+ 'test'
275
+ >>> from uuid import uuid4
276
+ >>> collection.get(uuid4()) is None
277
+ True
278
+ """
279
+ return self.templates.get(template_id)
280
+
281
+ def filter(self, predicate: Callable[[Template], bool]) -> TemplateCollection:
282
+ """Filter templates by a predicate function.
283
+
284
+ Creates a new collection containing only templates that satisfy the predicate.
285
+
286
+ Parameters
287
+ ----------
288
+ predicate : Callable[[Template], bool]
289
+ Function that returns True for templates to include.
290
+
291
+ Returns
292
+ -------
293
+ TemplateCollection
294
+ New collection with filtered templates.
295
+
296
+ Examples
297
+ --------
298
+ >>> from bead.resources import Slot
299
+ >>> collection = TemplateCollection(name="test")
300
+ >>> t1 = Template(
301
+ ... name="t1",
302
+ ... template_string="{x}.",
303
+ ... slots={"x": Slot(name="x")},
304
+ ... tags=["simple"],
305
+ ... )
306
+ >>> t2 = Template(
307
+ ... name="t2",
308
+ ... template_string="{y} {z}.",
309
+ ... slots={"y": Slot(name="y"), "z": Slot(name="z")},
310
+ ... tags=["complex"],
311
+ ... )
312
+ >>> collection.add(t1)
313
+ >>> collection.add(t2)
314
+ >>> simple = collection.filter(lambda t: "simple" in t.tags)
315
+ >>> len(simple.templates)
316
+ 1
317
+ """
318
+ filtered = TemplateCollection(
319
+ name=f"{self.name}_filtered",
320
+ description=self.description,
321
+ language_code=self.language_code,
322
+ tags=self.tags.copy(),
323
+ )
324
+ filtered.templates = {
325
+ template_id: template
326
+ for template_id, template in self.templates.items()
327
+ if predicate(template)
328
+ }
329
+ return filtered
330
+
331
+ def filter_by_tag(self, tag: str) -> TemplateCollection:
332
+ """Filter templates by tag.
333
+
334
+ Parameters
335
+ ----------
336
+ tag : str
337
+ The tag to filter by.
338
+
339
+ Returns
340
+ -------
341
+ TemplateCollection
342
+ New collection with templates having the specified tag.
343
+
344
+ Examples
345
+ --------
346
+ >>> from bead.resources import Slot
347
+ >>> collection = TemplateCollection(name="test")
348
+ >>> t1 = Template(
349
+ ... name="t1",
350
+ ... template_string="{x}.",
351
+ ... slots={"x": Slot(name="x")},
352
+ ... tags=["simple"],
353
+ ... )
354
+ >>> t2 = Template(
355
+ ... name="t2",
356
+ ... template_string="{y}.",
357
+ ... slots={"y": Slot(name="y")},
358
+ ... tags=["complex"],
359
+ ... )
360
+ >>> collection.add(t1)
361
+ >>> collection.add(t2)
362
+ >>> simple = collection.filter_by_tag("simple")
363
+ >>> len(simple.templates)
364
+ 1
365
+ """
366
+ return self.filter(lambda template: tag in template.tags)
367
+
368
+ def filter_by_slot_count(self, count: int) -> TemplateCollection:
369
+ """Filter templates by number of slots.
370
+
371
+ Parameters
372
+ ----------
373
+ count : int
374
+ The number of slots to filter by.
375
+
376
+ Returns
377
+ -------
378
+ TemplateCollection
379
+ New collection with templates having the specified slot count.
380
+
381
+ Examples
382
+ --------
383
+ >>> from bead.resources import Slot
384
+ >>> collection = TemplateCollection(name="test")
385
+ >>> t1 = Template(
386
+ ... name="t1", template_string="{x}.", slots={"x": Slot(name="x")}
387
+ ... )
388
+ >>> t2 = Template(
389
+ ... name="t2",
390
+ ... template_string="{y} {z}.",
391
+ ... slots={"y": Slot(name="y"), "z": Slot(name="z")},
392
+ ... )
393
+ >>> collection.add(t1)
394
+ >>> collection.add(t2)
395
+ >>> single_slot = collection.filter_by_slot_count(1)
396
+ >>> len(single_slot.templates)
397
+ 1
398
+ """
399
+ return self.filter(lambda template: len(template.slots) == count)
400
+
401
+ def search(self, query: str, field: str = "name") -> TemplateCollection:
402
+ """Search for templates containing query string in specified field.
403
+
404
+ Parameters
405
+ ----------
406
+ query : str
407
+ Search string (case-insensitive substring match).
408
+ field : str
409
+ Field to search in ("name", "template_string").
410
+
411
+ Returns
412
+ -------
413
+ TemplateCollection
414
+ New collection with matching templates.
415
+
416
+ Raises
417
+ ------
418
+ ValueError
419
+ If field is not a valid searchable field.
420
+
421
+ Examples
422
+ --------
423
+ >>> from bead.resources import Slot
424
+ >>> collection = TemplateCollection(name="test")
425
+ >>> template = Template(
426
+ ... name="transitive",
427
+ ... template_string="{x}.",
428
+ ... slots={"x": Slot(name="x")},
429
+ ... )
430
+ >>> collection.add(template)
431
+ >>> results = collection.search("trans")
432
+ >>> len(results.templates)
433
+ 1
434
+ """
435
+ query_lower = query.lower()
436
+
437
+ if field == "name":
438
+ return self.filter(lambda template: query_lower in template.name.lower())
439
+ elif field == "template_string":
440
+ return self.filter(
441
+ lambda template: query_lower in template.template_string.lower()
442
+ )
443
+ else:
444
+ raise ValueError(
445
+ f"Invalid field '{field}'. Must be 'name' or 'template_string'."
446
+ )
447
+
448
+ def merge(
449
+ self,
450
+ other: TemplateCollection,
451
+ strategy: Literal["keep_first", "keep_second", "error"] = "keep_first",
452
+ ) -> TemplateCollection:
453
+ """Merge with another collection.
454
+
455
+ Parameters
456
+ ----------
457
+ other : TemplateCollection
458
+ The collection to merge with.
459
+ strategy : Literal["keep_first", "keep_second", "error"]
460
+ How to handle duplicate IDs:
461
+ - "keep_first": Keep template from self
462
+ - "keep_second": Keep template from other
463
+ - "error": Raise error on duplicates
464
+
465
+ Returns
466
+ -------
467
+ TemplateCollection
468
+ New merged collection.
469
+
470
+ Raises
471
+ ------
472
+ ValueError
473
+ If strategy is "error" and duplicates found.
474
+
475
+ Examples
476
+ --------
477
+ >>> from bead.resources import Slot
478
+ >>> c1 = TemplateCollection(name="c1")
479
+ >>> c1.add(
480
+ ... Template(
481
+ ... name="t1", template_string="{x}.", slots={"x": Slot(name="x")}
482
+ ... )
483
+ ... )
484
+ >>> c2 = TemplateCollection(name="c2")
485
+ >>> c2.add(
486
+ ... Template(
487
+ ... name="t2", template_string="{y}.", slots={"y": Slot(name="y")}
488
+ ... )
489
+ ... )
490
+ >>> merged = c1.merge(c2)
491
+ >>> len(merged.templates)
492
+ 2
493
+ """
494
+ # Check for duplicates if strategy is "error"
495
+ if strategy == "error":
496
+ duplicates = set(self.templates.keys()) & set(other.templates.keys())
497
+ if duplicates:
498
+ raise ValueError(
499
+ f"Duplicate template IDs found: {duplicates}. "
500
+ "Use strategy='keep_first' or 'keep_second' to resolve."
501
+ )
502
+
503
+ # Create merged collection
504
+ # Use language_code from self, or other if self's is None
505
+ language_code = self.language_code or other.language_code
506
+
507
+ merged = TemplateCollection(
508
+ name=f"{self.name}_merged",
509
+ description=self.description,
510
+ language_code=language_code,
511
+ tags=list(set(self.tags + other.tags)),
512
+ )
513
+
514
+ # Add templates based on strategy
515
+ if strategy == "keep_first":
516
+ merged.templates = {**other.templates, **self.templates}
517
+ elif strategy == "keep_second":
518
+ merged.templates = {**self.templates, **other.templates}
519
+ else: # strategy == "error" already handled above
520
+ merged.templates = {**self.templates, **other.templates}
521
+
522
+ return merged
523
+
524
+ def to_dataframe(
525
+ self, backend: Literal["pandas", "polars"] = "pandas"
526
+ ) -> DataFrame:
527
+ """Convert collection to DataFrame.
528
+
529
+ Parameters
530
+ ----------
531
+ backend : Literal["pandas", "polars"]
532
+ DataFrame backend to use (default: "pandas").
533
+
534
+ Returns
535
+ -------
536
+ DataFrame
537
+ pandas or polars DataFrame with columns: id, name, template_string,
538
+ description, slot_count, slot_names, tags, created_at, modified_at.
539
+
540
+ Examples
541
+ --------
542
+ >>> from bead.resources import Slot
543
+ >>> collection = TemplateCollection(name="test")
544
+ >>> template = Template(
545
+ ... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
546
+ ... )
547
+ >>> collection.add(template)
548
+ >>> df = collection.to_dataframe()
549
+ >>> "name" in df.columns
550
+ True
551
+ >>> "template_string" in df.columns
552
+ True
553
+ """
554
+ if not self.templates:
555
+ # Return empty DataFrame with expected columns
556
+ columns = [
557
+ "id",
558
+ "name",
559
+ "template_string",
560
+ "description",
561
+ "slot_count",
562
+ "slot_names",
563
+ "tags",
564
+ "created_at",
565
+ "modified_at",
566
+ ]
567
+ if backend == "pandas":
568
+ return pd.DataFrame(columns=columns)
569
+ else:
570
+ schema: dict[str, type[pl.Utf8]] = dict.fromkeys(columns, pl.Utf8)
571
+ return pl.DataFrame(schema=schema)
572
+
573
+ rows = []
574
+ for template in self.templates.values():
575
+ row = {
576
+ "id": str(template.id),
577
+ "name": template.name,
578
+ "template_string": template.template_string,
579
+ "description": template.description,
580
+ "slot_count": len(template.slots),
581
+ "slot_names": ",".join(sorted(template.slots.keys())),
582
+ "tags": ",".join(template.tags),
583
+ "created_at": template.created_at.isoformat(),
584
+ "modified_at": template.modified_at.isoformat(),
585
+ }
586
+ rows.append(row) # type: ignore[arg-type]
587
+
588
+ if backend == "pandas":
589
+ return pd.DataFrame(rows)
590
+ else:
591
+ return pl.DataFrame(rows)
592
+
593
+ @classmethod
594
+ def from_dataframe(cls, df: DataFrame, name: str) -> TemplateCollection:
595
+ """Create collection from DataFrame.
596
+
597
+ Note: This method creates templates without slot definitions since
598
+ DataFrame representation doesn't include full slot information.
599
+ Use from_jsonl for full template serialization.
600
+
601
+ Parameters
602
+ ----------
603
+ df : DataFrame
604
+ pandas or polars DataFrame with at minimum 'name' and
605
+ 'template_string' columns.
606
+ name : str
607
+ Name for the collection.
608
+
609
+ Returns
610
+ -------
611
+ TemplateCollection
612
+ New collection created from DataFrame.
613
+
614
+ Raises
615
+ ------
616
+ ValueError
617
+ If DataFrame does not have required columns.
618
+
619
+ Examples
620
+ --------
621
+ >>> import pandas as pd
622
+ >>> df = pd.DataFrame({
623
+ ... "name": ["t1", "t2"],
624
+ ... "template_string": ["{x}.", "{y}."],
625
+ ... "slot_names": ["x", "y"]
626
+ ... })
627
+ >>> collection = TemplateCollection.from_dataframe(df, "test") # doctest: +SKIP
628
+ """
629
+ # Get columns, handling both pandas and polars
630
+ is_polars = isinstance(df, pl.DataFrame)
631
+ if is_polars:
632
+ assert isinstance(df, pl.DataFrame)
633
+ columns_list: list[str] = df.columns
634
+ else:
635
+ assert isinstance(df, pd.DataFrame)
636
+ columns_list = list(df.columns)
637
+
638
+ if "name" not in columns_list or "template_string" not in columns_list:
639
+ raise ValueError("DataFrame must have 'name' and 'template_string' columns")
640
+
641
+ collection = cls(name=name)
642
+
643
+ # Note: We cannot fully reconstruct templates from DataFrames since
644
+ # slot information is complex. This is a simplified reconstruction.
645
+ # For full serialization, use to_jsonl/from_jsonl.
646
+
647
+ return collection
648
+
649
+ def to_jsonl(self, path: str) -> None:
650
+ """Save collection to JSONLines file (one template per line).
651
+
652
+ Parameters
653
+ ----------
654
+ path : str
655
+ Path to the output file.
656
+
657
+ Examples
658
+ --------
659
+ >>> from bead.resources import Slot
660
+ >>> collection = TemplateCollection(name="test")
661
+ >>> template = Template(
662
+ ... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
663
+ ... )
664
+ >>> collection.add(template)
665
+ >>> collection.to_jsonl("/tmp/templates.jsonl") # doctest: +SKIP
666
+ """
667
+ file_path = Path(path)
668
+ file_path.parent.mkdir(parents=True, exist_ok=True)
669
+
670
+ with open(file_path, "w", encoding="utf-8") as f:
671
+ for template in self.templates.values():
672
+ f.write(template.model_dump_json() + "\n")
673
+
674
+ @classmethod
675
+ def from_jsonl(cls, path: str, name: str) -> TemplateCollection:
676
+ """Load collection from JSONLines file.
677
+
678
+ Parameters
679
+ ----------
680
+ path : str
681
+ Path to the input file.
682
+ name : str
683
+ Name for the collection.
684
+
685
+ Returns
686
+ -------
687
+ TemplateCollection
688
+ New collection loaded from file.
689
+
690
+ Examples
691
+ --------
692
+ >>> collection = TemplateCollection.from_jsonl(
693
+ ... "/tmp/templates.jsonl", "loaded"
694
+ ... ) # doctest: +SKIP
695
+ """
696
+ collection = cls(name=name)
697
+ file_path = Path(path)
698
+
699
+ with open(file_path, encoding="utf-8") as f:
700
+ for line in f:
701
+ line = line.strip()
702
+ if line:
703
+ template_data = json.loads(line)
704
+ template = Template(**template_data)
705
+ collection.add(template)
706
+
707
+ return collection