bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,707 @@
|
|
|
1
|
+
"""Template collection management.
|
|
2
|
+
|
|
3
|
+
This module provides the TemplateCollection class for managing collections
|
|
4
|
+
of sentence templates.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from collections.abc import Callable, Iterator
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Literal
|
|
13
|
+
from uuid import UUID
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import polars as pl
|
|
17
|
+
from pydantic import Field
|
|
18
|
+
|
|
19
|
+
from bead.data.base import BeadBaseModel
|
|
20
|
+
from bead.resources.template import Template
|
|
21
|
+
|
|
22
|
+
# Type alias for supported DataFrame types
|
|
23
|
+
DataFrame = pd.DataFrame | pl.DataFrame
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _empty_str_list() -> list[str]:
|
|
27
|
+
"""Create an empty string list."""
|
|
28
|
+
return []
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _empty_template_dict() -> dict[UUID, Template]:
|
|
32
|
+
"""Create an empty template dictionary."""
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TemplateCollection(BeadBaseModel):
|
|
37
|
+
"""A collection of templates with operations for filtering and analysis.
|
|
38
|
+
|
|
39
|
+
Similar to Lexicon but for Template objects. The TemplateCollection class
|
|
40
|
+
manages collections of Template objects and provides methods for:
|
|
41
|
+
- Adding and removing templates (CRUD operations)
|
|
42
|
+
- Filtering by properties and tags
|
|
43
|
+
- Searching by name or template string
|
|
44
|
+
- Merging with other collections
|
|
45
|
+
- Converting to/from pandas and polars DataFrames
|
|
46
|
+
- Serialization to JSONLines
|
|
47
|
+
|
|
48
|
+
Attributes
|
|
49
|
+
----------
|
|
50
|
+
name : str
|
|
51
|
+
Name of the collection.
|
|
52
|
+
description : str | None
|
|
53
|
+
Optional description.
|
|
54
|
+
language_code : str | None
|
|
55
|
+
ISO 639-1 or 639-3 language code (e.g., "en", "es", "eng").
|
|
56
|
+
templates : dict[UUID, Template]
|
|
57
|
+
Dictionary of templates indexed by their UUIDs.
|
|
58
|
+
tags : list[str]
|
|
59
|
+
Tags for categorization.
|
|
60
|
+
|
|
61
|
+
Examples
|
|
62
|
+
--------
|
|
63
|
+
>>> from bead.resources import Slot
|
|
64
|
+
>>> collection = TemplateCollection(name="transitive")
|
|
65
|
+
>>> template = Template(
|
|
66
|
+
... name="simple",
|
|
67
|
+
... template_string="{subject} {verb} {object}.",
|
|
68
|
+
... slots={
|
|
69
|
+
... "subject": Slot(name="subject"),
|
|
70
|
+
... "verb": Slot(name="verb"),
|
|
71
|
+
... "object": Slot(name="object"),
|
|
72
|
+
... }
|
|
73
|
+
... )
|
|
74
|
+
>>> collection.add(template)
|
|
75
|
+
>>> len(collection)
|
|
76
|
+
1
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
name: str
|
|
80
|
+
description: str | None = None
|
|
81
|
+
language_code: str | None = None
|
|
82
|
+
templates: dict[UUID, Template] = Field(default_factory=_empty_template_dict)
|
|
83
|
+
tags: list[str] = Field(default_factory=_empty_str_list)
|
|
84
|
+
|
|
85
|
+
def __len__(self) -> int:
|
|
86
|
+
"""Return number of templates in collection.
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
int
|
|
91
|
+
Number of templates in the collection.
|
|
92
|
+
|
|
93
|
+
Examples
|
|
94
|
+
--------
|
|
95
|
+
>>> collection = TemplateCollection(name="test")
|
|
96
|
+
>>> len(collection)
|
|
97
|
+
0
|
|
98
|
+
"""
|
|
99
|
+
return len(self.templates)
|
|
100
|
+
|
|
101
|
+
def __iter__(self) -> Iterator[Template]: # type: ignore[override]
|
|
102
|
+
"""Iterate over templates in collection.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
Iterator[Template]
|
|
107
|
+
Iterator over templates.
|
|
108
|
+
|
|
109
|
+
Examples
|
|
110
|
+
--------
|
|
111
|
+
>>> from bead.resources import Slot
|
|
112
|
+
>>> collection = TemplateCollection(name="test")
|
|
113
|
+
>>> t1 = Template(
|
|
114
|
+
... name="t1", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
115
|
+
... )
|
|
116
|
+
>>> t2 = Template(
|
|
117
|
+
... name="t2", template_string="{y}.", slots={"y": Slot(name="y")}
|
|
118
|
+
... )
|
|
119
|
+
>>> collection.add(t1)
|
|
120
|
+
>>> collection.add(t2)
|
|
121
|
+
>>> [t.name for t in collection]
|
|
122
|
+
['t1', 't2']
|
|
123
|
+
"""
|
|
124
|
+
return iter(self.templates.values())
|
|
125
|
+
|
|
126
|
+
def __contains__(self, template_id: UUID) -> bool:
|
|
127
|
+
"""Check if template ID is in collection.
|
|
128
|
+
|
|
129
|
+
Parameters
|
|
130
|
+
----------
|
|
131
|
+
template_id : UUID
|
|
132
|
+
The template ID to check.
|
|
133
|
+
|
|
134
|
+
Returns
|
|
135
|
+
-------
|
|
136
|
+
bool
|
|
137
|
+
True if template ID exists in collection.
|
|
138
|
+
|
|
139
|
+
Examples
|
|
140
|
+
--------
|
|
141
|
+
>>> from bead.resources import Slot
|
|
142
|
+
>>> collection = TemplateCollection(name="test")
|
|
143
|
+
>>> template = Template(
|
|
144
|
+
... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
145
|
+
... )
|
|
146
|
+
>>> collection.add(template)
|
|
147
|
+
>>> template.id in collection
|
|
148
|
+
True
|
|
149
|
+
"""
|
|
150
|
+
return template_id in self.templates
|
|
151
|
+
|
|
152
|
+
def add(self, template: Template) -> None:
|
|
153
|
+
"""Add a template to the collection.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
template : Template
|
|
158
|
+
The template to add.
|
|
159
|
+
|
|
160
|
+
Raises
|
|
161
|
+
------
|
|
162
|
+
ValueError
|
|
163
|
+
If template with same ID already exists.
|
|
164
|
+
|
|
165
|
+
Examples
|
|
166
|
+
--------
|
|
167
|
+
>>> from bead.resources import Slot
|
|
168
|
+
>>> collection = TemplateCollection(name="test")
|
|
169
|
+
>>> template = Template(
|
|
170
|
+
... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
171
|
+
... )
|
|
172
|
+
>>> collection.add(template)
|
|
173
|
+
>>> len(collection)
|
|
174
|
+
1
|
|
175
|
+
"""
|
|
176
|
+
if template.id in self.templates:
|
|
177
|
+
raise ValueError(
|
|
178
|
+
f"Template with ID {template.id} already exists in collection"
|
|
179
|
+
)
|
|
180
|
+
self.templates[template.id] = template
|
|
181
|
+
self.update_modified_time()
|
|
182
|
+
|
|
183
|
+
def add_many(self, templates: list[Template]) -> None:
|
|
184
|
+
"""Add multiple templates to the collection.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
templates : list[Template]
|
|
189
|
+
The templates to add.
|
|
190
|
+
|
|
191
|
+
Raises
|
|
192
|
+
------
|
|
193
|
+
ValueError
|
|
194
|
+
If any template with same ID already exists.
|
|
195
|
+
|
|
196
|
+
Examples
|
|
197
|
+
--------
|
|
198
|
+
>>> from bead.resources import Slot
|
|
199
|
+
>>> collection = TemplateCollection(name="test")
|
|
200
|
+
>>> t1 = Template(
|
|
201
|
+
... name="t1", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
202
|
+
... )
|
|
203
|
+
>>> t2 = Template(
|
|
204
|
+
... name="t2", template_string="{y}.", slots={"y": Slot(name="y")}
|
|
205
|
+
... )
|
|
206
|
+
>>> collection.add_many([t1, t2])
|
|
207
|
+
>>> len(collection)
|
|
208
|
+
2
|
|
209
|
+
"""
|
|
210
|
+
for template in templates:
|
|
211
|
+
self.add(template)
|
|
212
|
+
|
|
213
|
+
def remove(self, template_id: UUID) -> Template:
|
|
214
|
+
"""Remove and return a template by ID.
|
|
215
|
+
|
|
216
|
+
Parameters
|
|
217
|
+
----------
|
|
218
|
+
template_id : UUID
|
|
219
|
+
The ID of the template to remove.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
Template
|
|
224
|
+
The removed template.
|
|
225
|
+
|
|
226
|
+
Raises
|
|
227
|
+
------
|
|
228
|
+
KeyError
|
|
229
|
+
If template ID not found.
|
|
230
|
+
|
|
231
|
+
Examples
|
|
232
|
+
--------
|
|
233
|
+
>>> from bead.resources import Slot
|
|
234
|
+
>>> collection = TemplateCollection(name="test")
|
|
235
|
+
>>> template = Template(
|
|
236
|
+
... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
237
|
+
... )
|
|
238
|
+
>>> collection.add(template)
|
|
239
|
+
>>> removed = collection.remove(template.id)
|
|
240
|
+
>>> removed.name
|
|
241
|
+
'test'
|
|
242
|
+
>>> len(collection)
|
|
243
|
+
0
|
|
244
|
+
"""
|
|
245
|
+
if template_id not in self.templates:
|
|
246
|
+
raise KeyError(f"Template with ID {template_id} not found in collection")
|
|
247
|
+
template = self.templates.pop(template_id)
|
|
248
|
+
self.update_modified_time()
|
|
249
|
+
return template
|
|
250
|
+
|
|
251
|
+
def get(self, template_id: UUID) -> Template | None:
|
|
252
|
+
"""Get a template by ID, or None if not found.
|
|
253
|
+
|
|
254
|
+
Parameters
|
|
255
|
+
----------
|
|
256
|
+
template_id : UUID
|
|
257
|
+
The ID of the template to get.
|
|
258
|
+
|
|
259
|
+
Returns
|
|
260
|
+
-------
|
|
261
|
+
Template | None
|
|
262
|
+
The template if found, None otherwise.
|
|
263
|
+
|
|
264
|
+
Examples
|
|
265
|
+
--------
|
|
266
|
+
>>> from bead.resources import Slot
|
|
267
|
+
>>> collection = TemplateCollection(name="test")
|
|
268
|
+
>>> template = Template(
|
|
269
|
+
... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
270
|
+
... )
|
|
271
|
+
>>> collection.add(template)
|
|
272
|
+
>>> retrieved = collection.get(template.id)
|
|
273
|
+
>>> retrieved.name # doctest: +SKIP
|
|
274
|
+
'test'
|
|
275
|
+
>>> from uuid import uuid4
|
|
276
|
+
>>> collection.get(uuid4()) is None
|
|
277
|
+
True
|
|
278
|
+
"""
|
|
279
|
+
return self.templates.get(template_id)
|
|
280
|
+
|
|
281
|
+
def filter(self, predicate: Callable[[Template], bool]) -> TemplateCollection:
|
|
282
|
+
"""Filter templates by a predicate function.
|
|
283
|
+
|
|
284
|
+
Creates a new collection containing only templates that satisfy the predicate.
|
|
285
|
+
|
|
286
|
+
Parameters
|
|
287
|
+
----------
|
|
288
|
+
predicate : Callable[[Template], bool]
|
|
289
|
+
Function that returns True for templates to include.
|
|
290
|
+
|
|
291
|
+
Returns
|
|
292
|
+
-------
|
|
293
|
+
TemplateCollection
|
|
294
|
+
New collection with filtered templates.
|
|
295
|
+
|
|
296
|
+
Examples
|
|
297
|
+
--------
|
|
298
|
+
>>> from bead.resources import Slot
|
|
299
|
+
>>> collection = TemplateCollection(name="test")
|
|
300
|
+
>>> t1 = Template(
|
|
301
|
+
... name="t1",
|
|
302
|
+
... template_string="{x}.",
|
|
303
|
+
... slots={"x": Slot(name="x")},
|
|
304
|
+
... tags=["simple"],
|
|
305
|
+
... )
|
|
306
|
+
>>> t2 = Template(
|
|
307
|
+
... name="t2",
|
|
308
|
+
... template_string="{y} {z}.",
|
|
309
|
+
... slots={"y": Slot(name="y"), "z": Slot(name="z")},
|
|
310
|
+
... tags=["complex"],
|
|
311
|
+
... )
|
|
312
|
+
>>> collection.add(t1)
|
|
313
|
+
>>> collection.add(t2)
|
|
314
|
+
>>> simple = collection.filter(lambda t: "simple" in t.tags)
|
|
315
|
+
>>> len(simple.templates)
|
|
316
|
+
1
|
|
317
|
+
"""
|
|
318
|
+
filtered = TemplateCollection(
|
|
319
|
+
name=f"{self.name}_filtered",
|
|
320
|
+
description=self.description,
|
|
321
|
+
language_code=self.language_code,
|
|
322
|
+
tags=self.tags.copy(),
|
|
323
|
+
)
|
|
324
|
+
filtered.templates = {
|
|
325
|
+
template_id: template
|
|
326
|
+
for template_id, template in self.templates.items()
|
|
327
|
+
if predicate(template)
|
|
328
|
+
}
|
|
329
|
+
return filtered
|
|
330
|
+
|
|
331
|
+
def filter_by_tag(self, tag: str) -> TemplateCollection:
|
|
332
|
+
"""Filter templates by tag.
|
|
333
|
+
|
|
334
|
+
Parameters
|
|
335
|
+
----------
|
|
336
|
+
tag : str
|
|
337
|
+
The tag to filter by.
|
|
338
|
+
|
|
339
|
+
Returns
|
|
340
|
+
-------
|
|
341
|
+
TemplateCollection
|
|
342
|
+
New collection with templates having the specified tag.
|
|
343
|
+
|
|
344
|
+
Examples
|
|
345
|
+
--------
|
|
346
|
+
>>> from bead.resources import Slot
|
|
347
|
+
>>> collection = TemplateCollection(name="test")
|
|
348
|
+
>>> t1 = Template(
|
|
349
|
+
... name="t1",
|
|
350
|
+
... template_string="{x}.",
|
|
351
|
+
... slots={"x": Slot(name="x")},
|
|
352
|
+
... tags=["simple"],
|
|
353
|
+
... )
|
|
354
|
+
>>> t2 = Template(
|
|
355
|
+
... name="t2",
|
|
356
|
+
... template_string="{y}.",
|
|
357
|
+
... slots={"y": Slot(name="y")},
|
|
358
|
+
... tags=["complex"],
|
|
359
|
+
... )
|
|
360
|
+
>>> collection.add(t1)
|
|
361
|
+
>>> collection.add(t2)
|
|
362
|
+
>>> simple = collection.filter_by_tag("simple")
|
|
363
|
+
>>> len(simple.templates)
|
|
364
|
+
1
|
|
365
|
+
"""
|
|
366
|
+
return self.filter(lambda template: tag in template.tags)
|
|
367
|
+
|
|
368
|
+
def filter_by_slot_count(self, count: int) -> TemplateCollection:
|
|
369
|
+
"""Filter templates by number of slots.
|
|
370
|
+
|
|
371
|
+
Parameters
|
|
372
|
+
----------
|
|
373
|
+
count : int
|
|
374
|
+
The number of slots to filter by.
|
|
375
|
+
|
|
376
|
+
Returns
|
|
377
|
+
-------
|
|
378
|
+
TemplateCollection
|
|
379
|
+
New collection with templates having the specified slot count.
|
|
380
|
+
|
|
381
|
+
Examples
|
|
382
|
+
--------
|
|
383
|
+
>>> from bead.resources import Slot
|
|
384
|
+
>>> collection = TemplateCollection(name="test")
|
|
385
|
+
>>> t1 = Template(
|
|
386
|
+
... name="t1", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
387
|
+
... )
|
|
388
|
+
>>> t2 = Template(
|
|
389
|
+
... name="t2",
|
|
390
|
+
... template_string="{y} {z}.",
|
|
391
|
+
... slots={"y": Slot(name="y"), "z": Slot(name="z")},
|
|
392
|
+
... )
|
|
393
|
+
>>> collection.add(t1)
|
|
394
|
+
>>> collection.add(t2)
|
|
395
|
+
>>> single_slot = collection.filter_by_slot_count(1)
|
|
396
|
+
>>> len(single_slot.templates)
|
|
397
|
+
1
|
|
398
|
+
"""
|
|
399
|
+
return self.filter(lambda template: len(template.slots) == count)
|
|
400
|
+
|
|
401
|
+
def search(self, query: str, field: str = "name") -> TemplateCollection:
|
|
402
|
+
"""Search for templates containing query string in specified field.
|
|
403
|
+
|
|
404
|
+
Parameters
|
|
405
|
+
----------
|
|
406
|
+
query : str
|
|
407
|
+
Search string (case-insensitive substring match).
|
|
408
|
+
field : str
|
|
409
|
+
Field to search in ("name", "template_string").
|
|
410
|
+
|
|
411
|
+
Returns
|
|
412
|
+
-------
|
|
413
|
+
TemplateCollection
|
|
414
|
+
New collection with matching templates.
|
|
415
|
+
|
|
416
|
+
Raises
|
|
417
|
+
------
|
|
418
|
+
ValueError
|
|
419
|
+
If field is not a valid searchable field.
|
|
420
|
+
|
|
421
|
+
Examples
|
|
422
|
+
--------
|
|
423
|
+
>>> from bead.resources import Slot
|
|
424
|
+
>>> collection = TemplateCollection(name="test")
|
|
425
|
+
>>> template = Template(
|
|
426
|
+
... name="transitive",
|
|
427
|
+
... template_string="{x}.",
|
|
428
|
+
... slots={"x": Slot(name="x")},
|
|
429
|
+
... )
|
|
430
|
+
>>> collection.add(template)
|
|
431
|
+
>>> results = collection.search("trans")
|
|
432
|
+
>>> len(results.templates)
|
|
433
|
+
1
|
|
434
|
+
"""
|
|
435
|
+
query_lower = query.lower()
|
|
436
|
+
|
|
437
|
+
if field == "name":
|
|
438
|
+
return self.filter(lambda template: query_lower in template.name.lower())
|
|
439
|
+
elif field == "template_string":
|
|
440
|
+
return self.filter(
|
|
441
|
+
lambda template: query_lower in template.template_string.lower()
|
|
442
|
+
)
|
|
443
|
+
else:
|
|
444
|
+
raise ValueError(
|
|
445
|
+
f"Invalid field '{field}'. Must be 'name' or 'template_string'."
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
def merge(
|
|
449
|
+
self,
|
|
450
|
+
other: TemplateCollection,
|
|
451
|
+
strategy: Literal["keep_first", "keep_second", "error"] = "keep_first",
|
|
452
|
+
) -> TemplateCollection:
|
|
453
|
+
"""Merge with another collection.
|
|
454
|
+
|
|
455
|
+
Parameters
|
|
456
|
+
----------
|
|
457
|
+
other : TemplateCollection
|
|
458
|
+
The collection to merge with.
|
|
459
|
+
strategy : Literal["keep_first", "keep_second", "error"]
|
|
460
|
+
How to handle duplicate IDs:
|
|
461
|
+
- "keep_first": Keep template from self
|
|
462
|
+
- "keep_second": Keep template from other
|
|
463
|
+
- "error": Raise error on duplicates
|
|
464
|
+
|
|
465
|
+
Returns
|
|
466
|
+
-------
|
|
467
|
+
TemplateCollection
|
|
468
|
+
New merged collection.
|
|
469
|
+
|
|
470
|
+
Raises
|
|
471
|
+
------
|
|
472
|
+
ValueError
|
|
473
|
+
If strategy is "error" and duplicates found.
|
|
474
|
+
|
|
475
|
+
Examples
|
|
476
|
+
--------
|
|
477
|
+
>>> from bead.resources import Slot
|
|
478
|
+
>>> c1 = TemplateCollection(name="c1")
|
|
479
|
+
>>> c1.add(
|
|
480
|
+
... Template(
|
|
481
|
+
... name="t1", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
482
|
+
... )
|
|
483
|
+
... )
|
|
484
|
+
>>> c2 = TemplateCollection(name="c2")
|
|
485
|
+
>>> c2.add(
|
|
486
|
+
... Template(
|
|
487
|
+
... name="t2", template_string="{y}.", slots={"y": Slot(name="y")}
|
|
488
|
+
... )
|
|
489
|
+
... )
|
|
490
|
+
>>> merged = c1.merge(c2)
|
|
491
|
+
>>> len(merged.templates)
|
|
492
|
+
2
|
|
493
|
+
"""
|
|
494
|
+
# Check for duplicates if strategy is "error"
|
|
495
|
+
if strategy == "error":
|
|
496
|
+
duplicates = set(self.templates.keys()) & set(other.templates.keys())
|
|
497
|
+
if duplicates:
|
|
498
|
+
raise ValueError(
|
|
499
|
+
f"Duplicate template IDs found: {duplicates}. "
|
|
500
|
+
"Use strategy='keep_first' or 'keep_second' to resolve."
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
# Create merged collection
|
|
504
|
+
# Use language_code from self, or other if self's is None
|
|
505
|
+
language_code = self.language_code or other.language_code
|
|
506
|
+
|
|
507
|
+
merged = TemplateCollection(
|
|
508
|
+
name=f"{self.name}_merged",
|
|
509
|
+
description=self.description,
|
|
510
|
+
language_code=language_code,
|
|
511
|
+
tags=list(set(self.tags + other.tags)),
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# Add templates based on strategy
|
|
515
|
+
if strategy == "keep_first":
|
|
516
|
+
merged.templates = {**other.templates, **self.templates}
|
|
517
|
+
elif strategy == "keep_second":
|
|
518
|
+
merged.templates = {**self.templates, **other.templates}
|
|
519
|
+
else: # strategy == "error" already handled above
|
|
520
|
+
merged.templates = {**self.templates, **other.templates}
|
|
521
|
+
|
|
522
|
+
return merged
|
|
523
|
+
|
|
524
|
+
def to_dataframe(
|
|
525
|
+
self, backend: Literal["pandas", "polars"] = "pandas"
|
|
526
|
+
) -> DataFrame:
|
|
527
|
+
"""Convert collection to DataFrame.
|
|
528
|
+
|
|
529
|
+
Parameters
|
|
530
|
+
----------
|
|
531
|
+
backend : Literal["pandas", "polars"]
|
|
532
|
+
DataFrame backend to use (default: "pandas").
|
|
533
|
+
|
|
534
|
+
Returns
|
|
535
|
+
-------
|
|
536
|
+
DataFrame
|
|
537
|
+
pandas or polars DataFrame with columns: id, name, template_string,
|
|
538
|
+
description, slot_count, slot_names, tags, created_at, modified_at.
|
|
539
|
+
|
|
540
|
+
Examples
|
|
541
|
+
--------
|
|
542
|
+
>>> from bead.resources import Slot
|
|
543
|
+
>>> collection = TemplateCollection(name="test")
|
|
544
|
+
>>> template = Template(
|
|
545
|
+
... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
546
|
+
... )
|
|
547
|
+
>>> collection.add(template)
|
|
548
|
+
>>> df = collection.to_dataframe()
|
|
549
|
+
>>> "name" in df.columns
|
|
550
|
+
True
|
|
551
|
+
>>> "template_string" in df.columns
|
|
552
|
+
True
|
|
553
|
+
"""
|
|
554
|
+
if not self.templates:
|
|
555
|
+
# Return empty DataFrame with expected columns
|
|
556
|
+
columns = [
|
|
557
|
+
"id",
|
|
558
|
+
"name",
|
|
559
|
+
"template_string",
|
|
560
|
+
"description",
|
|
561
|
+
"slot_count",
|
|
562
|
+
"slot_names",
|
|
563
|
+
"tags",
|
|
564
|
+
"created_at",
|
|
565
|
+
"modified_at",
|
|
566
|
+
]
|
|
567
|
+
if backend == "pandas":
|
|
568
|
+
return pd.DataFrame(columns=columns)
|
|
569
|
+
else:
|
|
570
|
+
schema: dict[str, type[pl.Utf8]] = dict.fromkeys(columns, pl.Utf8)
|
|
571
|
+
return pl.DataFrame(schema=schema)
|
|
572
|
+
|
|
573
|
+
rows = []
|
|
574
|
+
for template in self.templates.values():
|
|
575
|
+
row = {
|
|
576
|
+
"id": str(template.id),
|
|
577
|
+
"name": template.name,
|
|
578
|
+
"template_string": template.template_string,
|
|
579
|
+
"description": template.description,
|
|
580
|
+
"slot_count": len(template.slots),
|
|
581
|
+
"slot_names": ",".join(sorted(template.slots.keys())),
|
|
582
|
+
"tags": ",".join(template.tags),
|
|
583
|
+
"created_at": template.created_at.isoformat(),
|
|
584
|
+
"modified_at": template.modified_at.isoformat(),
|
|
585
|
+
}
|
|
586
|
+
rows.append(row) # type: ignore[arg-type]
|
|
587
|
+
|
|
588
|
+
if backend == "pandas":
|
|
589
|
+
return pd.DataFrame(rows)
|
|
590
|
+
else:
|
|
591
|
+
return pl.DataFrame(rows)
|
|
592
|
+
|
|
593
|
+
@classmethod
|
|
594
|
+
def from_dataframe(cls, df: DataFrame, name: str) -> TemplateCollection:
|
|
595
|
+
"""Create collection from DataFrame.
|
|
596
|
+
|
|
597
|
+
Note: This method creates templates without slot definitions since
|
|
598
|
+
DataFrame representation doesn't include full slot information.
|
|
599
|
+
Use from_jsonl for full template serialization.
|
|
600
|
+
|
|
601
|
+
Parameters
|
|
602
|
+
----------
|
|
603
|
+
df : DataFrame
|
|
604
|
+
pandas or polars DataFrame with at minimum 'name' and
|
|
605
|
+
'template_string' columns.
|
|
606
|
+
name : str
|
|
607
|
+
Name for the collection.
|
|
608
|
+
|
|
609
|
+
Returns
|
|
610
|
+
-------
|
|
611
|
+
TemplateCollection
|
|
612
|
+
New collection created from DataFrame.
|
|
613
|
+
|
|
614
|
+
Raises
|
|
615
|
+
------
|
|
616
|
+
ValueError
|
|
617
|
+
If DataFrame does not have required columns.
|
|
618
|
+
|
|
619
|
+
Examples
|
|
620
|
+
--------
|
|
621
|
+
>>> import pandas as pd
|
|
622
|
+
>>> df = pd.DataFrame({
|
|
623
|
+
... "name": ["t1", "t2"],
|
|
624
|
+
... "template_string": ["{x}.", "{y}."],
|
|
625
|
+
... "slot_names": ["x", "y"]
|
|
626
|
+
... })
|
|
627
|
+
>>> collection = TemplateCollection.from_dataframe(df, "test") # doctest: +SKIP
|
|
628
|
+
"""
|
|
629
|
+
# Get columns, handling both pandas and polars
|
|
630
|
+
is_polars = isinstance(df, pl.DataFrame)
|
|
631
|
+
if is_polars:
|
|
632
|
+
assert isinstance(df, pl.DataFrame)
|
|
633
|
+
columns_list: list[str] = df.columns
|
|
634
|
+
else:
|
|
635
|
+
assert isinstance(df, pd.DataFrame)
|
|
636
|
+
columns_list = list(df.columns)
|
|
637
|
+
|
|
638
|
+
if "name" not in columns_list or "template_string" not in columns_list:
|
|
639
|
+
raise ValueError("DataFrame must have 'name' and 'template_string' columns")
|
|
640
|
+
|
|
641
|
+
collection = cls(name=name)
|
|
642
|
+
|
|
643
|
+
# Note: We cannot fully reconstruct templates from DataFrames since
|
|
644
|
+
# slot information is complex. This is a simplified reconstruction.
|
|
645
|
+
# For full serialization, use to_jsonl/from_jsonl.
|
|
646
|
+
|
|
647
|
+
return collection
|
|
648
|
+
|
|
649
|
+
def to_jsonl(self, path: str) -> None:
|
|
650
|
+
"""Save collection to JSONLines file (one template per line).
|
|
651
|
+
|
|
652
|
+
Parameters
|
|
653
|
+
----------
|
|
654
|
+
path : str
|
|
655
|
+
Path to the output file.
|
|
656
|
+
|
|
657
|
+
Examples
|
|
658
|
+
--------
|
|
659
|
+
>>> from bead.resources import Slot
|
|
660
|
+
>>> collection = TemplateCollection(name="test")
|
|
661
|
+
>>> template = Template(
|
|
662
|
+
... name="test", template_string="{x}.", slots={"x": Slot(name="x")}
|
|
663
|
+
... )
|
|
664
|
+
>>> collection.add(template)
|
|
665
|
+
>>> collection.to_jsonl("/tmp/templates.jsonl") # doctest: +SKIP
|
|
666
|
+
"""
|
|
667
|
+
file_path = Path(path)
|
|
668
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
669
|
+
|
|
670
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
671
|
+
for template in self.templates.values():
|
|
672
|
+
f.write(template.model_dump_json() + "\n")
|
|
673
|
+
|
|
674
|
+
@classmethod
|
|
675
|
+
def from_jsonl(cls, path: str, name: str) -> TemplateCollection:
|
|
676
|
+
"""Load collection from JSONLines file.
|
|
677
|
+
|
|
678
|
+
Parameters
|
|
679
|
+
----------
|
|
680
|
+
path : str
|
|
681
|
+
Path to the input file.
|
|
682
|
+
name : str
|
|
683
|
+
Name for the collection.
|
|
684
|
+
|
|
685
|
+
Returns
|
|
686
|
+
-------
|
|
687
|
+
TemplateCollection
|
|
688
|
+
New collection loaded from file.
|
|
689
|
+
|
|
690
|
+
Examples
|
|
691
|
+
--------
|
|
692
|
+
>>> collection = TemplateCollection.from_jsonl(
|
|
693
|
+
... "/tmp/templates.jsonl", "loaded"
|
|
694
|
+
... ) # doctest: +SKIP
|
|
695
|
+
"""
|
|
696
|
+
collection = cls(name=name)
|
|
697
|
+
file_path = Path(path)
|
|
698
|
+
|
|
699
|
+
with open(file_path, encoding="utf-8") as f:
|
|
700
|
+
for line in f:
|
|
701
|
+
line = line.strip()
|
|
702
|
+
if line:
|
|
703
|
+
template_data = json.loads(line)
|
|
704
|
+
template = Template(**template_data)
|
|
705
|
+
collection.add(template)
|
|
706
|
+
|
|
707
|
+
return collection
|