bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Lexicon loading utilities for various data formats.
|
|
2
|
+
|
|
3
|
+
This module provides class methods for loading Lexicon objects from
|
|
4
|
+
various data formats (CSV, TSV) with flexible column mapping.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Iterator
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from pandas import DataFrame, Series
|
|
15
|
+
|
|
16
|
+
from bead.data.language_codes import LanguageCode
|
|
17
|
+
from bead.resources.lexical_item import LexicalItem
|
|
18
|
+
from bead.resources.lexicon import Lexicon
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def from_csv(
|
|
22
|
+
path: str | Path,
|
|
23
|
+
name: str,
|
|
24
|
+
*,
|
|
25
|
+
language_code: LanguageCode,
|
|
26
|
+
column_mapping: dict[str, str] | None = None,
|
|
27
|
+
feature_columns: list[str] | None = None,
|
|
28
|
+
pos: str | None = None,
|
|
29
|
+
description: str | None = None,
|
|
30
|
+
**csv_kwargs: Any,
|
|
31
|
+
) -> Lexicon:
|
|
32
|
+
"""Load lexicon from CSV file with flexible column mapping.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
path : str | Path
|
|
37
|
+
Path to the CSV file.
|
|
38
|
+
name : str
|
|
39
|
+
Name for the lexicon.
|
|
40
|
+
language_code : LanguageCode
|
|
41
|
+
ISO 639-3 language code for all items.
|
|
42
|
+
column_mapping : dict[str, str] | None
|
|
43
|
+
Mapping from CSV column names to feature names.
|
|
44
|
+
Example: {"word": "lemma"}
|
|
45
|
+
feature_columns : list[str] | None
|
|
46
|
+
CSV column names to include in features dict.
|
|
47
|
+
Example: ["number", "tense", "countability", "semantic_class"]
|
|
48
|
+
pos : str | None
|
|
49
|
+
Part-of-speech tag to assign to all items (e.g., "NOUN", "VERB").
|
|
50
|
+
Will be added to features dict as "pos".
|
|
51
|
+
description : str | None
|
|
52
|
+
Optional description of the lexicon.
|
|
53
|
+
**csv_kwargs : Any
|
|
54
|
+
Additional keyword arguments passed to pandas.read_csv().
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
Lexicon
|
|
59
|
+
New lexicon loaded from CSV.
|
|
60
|
+
|
|
61
|
+
Raises
|
|
62
|
+
------
|
|
63
|
+
ValueError
|
|
64
|
+
If required "lemma" column/mapping is missing.
|
|
65
|
+
FileNotFoundError
|
|
66
|
+
If CSV file does not exist.
|
|
67
|
+
|
|
68
|
+
Examples
|
|
69
|
+
--------
|
|
70
|
+
>>> lexicon = from_csv(
|
|
71
|
+
... "bleached_nouns.csv",
|
|
72
|
+
... "nouns",
|
|
73
|
+
... language_code="eng",
|
|
74
|
+
... column_mapping={"word": "lemma"},
|
|
75
|
+
... feature_columns=["number", "countability", "semantic_class"],
|
|
76
|
+
... pos="NOUN"
|
|
77
|
+
... ) # doctest: +SKIP
|
|
78
|
+
"""
|
|
79
|
+
file_path = Path(path)
|
|
80
|
+
if not file_path.exists():
|
|
81
|
+
raise FileNotFoundError(f"CSV file not found: {file_path}")
|
|
82
|
+
|
|
83
|
+
# read CSV
|
|
84
|
+
df: DataFrame = pd.read_csv(file_path, **csv_kwargs)
|
|
85
|
+
|
|
86
|
+
# set up column mapping
|
|
87
|
+
mapping = column_mapping or {}
|
|
88
|
+
reverse_mapping = {v: k for k, v in mapping.items()}
|
|
89
|
+
|
|
90
|
+
# check for required lemma column
|
|
91
|
+
lemma_col = reverse_mapping.get("lemma", "lemma")
|
|
92
|
+
columns_list = list(df.columns)
|
|
93
|
+
if lemma_col not in columns_list:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"CSV must have a 'lemma' column or provide column_mapping. "
|
|
96
|
+
f"Available columns: {columns_list}"
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# create lexicon
|
|
100
|
+
lexicon = Lexicon(
|
|
101
|
+
name=name,
|
|
102
|
+
description=description,
|
|
103
|
+
language_code=language_code,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# process each row
|
|
107
|
+
row_iter: Iterator[tuple[int | str, Series[Any]]] = df.iterrows()
|
|
108
|
+
for _, row_data in row_iter:
|
|
109
|
+
row: Series[Any] = row_data
|
|
110
|
+
|
|
111
|
+
# get lemma
|
|
112
|
+
lemma_col = reverse_mapping.get("lemma", "lemma")
|
|
113
|
+
lemma = str(row[lemma_col])
|
|
114
|
+
|
|
115
|
+
# build features dict
|
|
116
|
+
features: dict[str, Any] = {}
|
|
117
|
+
|
|
118
|
+
# add POS if provided
|
|
119
|
+
if pos:
|
|
120
|
+
features["pos"] = pos
|
|
121
|
+
|
|
122
|
+
# handle mapped "pos" column
|
|
123
|
+
pos_col = reverse_mapping.get("pos")
|
|
124
|
+
if pos_col and pos_col in columns_list and pd.notna(row[pos_col]):
|
|
125
|
+
features["pos"] = str(row[pos_col])
|
|
126
|
+
|
|
127
|
+
# add feature columns
|
|
128
|
+
if feature_columns:
|
|
129
|
+
for col in feature_columns:
|
|
130
|
+
if col in columns_list and pd.notna(row[col]):
|
|
131
|
+
# store feature value, converting to string if needed
|
|
132
|
+
val = row[col]
|
|
133
|
+
if not isinstance(val, str | int | float | bool):
|
|
134
|
+
features[col] = str(val)
|
|
135
|
+
else:
|
|
136
|
+
features[col] = val
|
|
137
|
+
|
|
138
|
+
# create and add item
|
|
139
|
+
item = LexicalItem(
|
|
140
|
+
lemma=lemma,
|
|
141
|
+
language_code=language_code,
|
|
142
|
+
features=features if features else {},
|
|
143
|
+
source=None,
|
|
144
|
+
)
|
|
145
|
+
lexicon.add(item)
|
|
146
|
+
|
|
147
|
+
return lexicon
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def from_tsv(
|
|
151
|
+
path: str | Path,
|
|
152
|
+
name: str,
|
|
153
|
+
*,
|
|
154
|
+
language_code: LanguageCode,
|
|
155
|
+
column_mapping: dict[str, str] | None = None,
|
|
156
|
+
feature_columns: list[str] | None = None,
|
|
157
|
+
pos: str | None = None,
|
|
158
|
+
description: str | None = None,
|
|
159
|
+
**tsv_kwargs: Any,
|
|
160
|
+
) -> Lexicon:
|
|
161
|
+
r"""Load lexicon from TSV file with flexible column mapping.
|
|
162
|
+
|
|
163
|
+
This is a convenience wrapper around from_csv() that sets sep="\t".
|
|
164
|
+
|
|
165
|
+
Parameters
|
|
166
|
+
----------
|
|
167
|
+
path : str | Path
|
|
168
|
+
Path to the TSV file.
|
|
169
|
+
name : str
|
|
170
|
+
Name for the lexicon.
|
|
171
|
+
language_code : LanguageCode
|
|
172
|
+
ISO 639-3 language code for all items.
|
|
173
|
+
column_mapping : dict[str, str] | None
|
|
174
|
+
Mapping from TSV column names to feature names.
|
|
175
|
+
feature_columns : list[str] | None
|
|
176
|
+
TSV column names to include in features dict.
|
|
177
|
+
pos : str | None
|
|
178
|
+
Part-of-speech tag to assign to all items.
|
|
179
|
+
description : str | None
|
|
180
|
+
Optional description of the lexicon.
|
|
181
|
+
**tsv_kwargs : Any
|
|
182
|
+
Additional keyword arguments passed to pandas.read_csv().
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
Lexicon
|
|
187
|
+
New lexicon loaded from TSV.
|
|
188
|
+
|
|
189
|
+
Examples
|
|
190
|
+
--------
|
|
191
|
+
>>> lexicon = from_tsv(
|
|
192
|
+
... "verbs.tsv",
|
|
193
|
+
... "verbs",
|
|
194
|
+
... language_code="eng",
|
|
195
|
+
... feature_columns=["tense", "aspect"],
|
|
196
|
+
... pos="VERB"
|
|
197
|
+
... ) # doctest: +SKIP
|
|
198
|
+
"""
|
|
199
|
+
return from_csv(
|
|
200
|
+
path=path,
|
|
201
|
+
name=name,
|
|
202
|
+
language_code=language_code,
|
|
203
|
+
column_mapping=column_mapping,
|
|
204
|
+
feature_columns=feature_columns,
|
|
205
|
+
pos=pos,
|
|
206
|
+
description=description,
|
|
207
|
+
sep="\t",
|
|
208
|
+
**tsv_kwargs,
|
|
209
|
+
)
|
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
"""Template and structure models for sentence generation.
|
|
2
|
+
|
|
3
|
+
This module provides models for sentence templates and their structures.
|
|
4
|
+
Templates contain slots that are filled with lexical items during
|
|
5
|
+
sentence generation.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
from pydantic import Field, field_validator, model_validator
|
|
14
|
+
|
|
15
|
+
from bead.data.base import BeadBaseModel
|
|
16
|
+
from bead.data.language_codes import LanguageCode
|
|
17
|
+
from bead.resources.constraints import Constraint
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from bead.items.item_template import MetadataValue
|
|
21
|
+
from bead.templates.filler import FilledTemplate
|
|
22
|
+
else:
|
|
23
|
+
# Recursive type for metadata values
|
|
24
|
+
type MetadataValue = (
|
|
25
|
+
str | int | float | bool | None | dict[str, MetadataValue] | list[MetadataValue]
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _empty_constraint_list() -> list[Constraint]:
|
|
30
|
+
"""Create an empty constraint list."""
|
|
31
|
+
return []
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _empty_str_list() -> list[str]:
|
|
35
|
+
"""Create an empty string list."""
|
|
36
|
+
return []
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class Slot(BeadBaseModel):
|
|
40
|
+
"""A slot in a template that can be filled with a lexical item.
|
|
41
|
+
|
|
42
|
+
Attributes
|
|
43
|
+
----------
|
|
44
|
+
name : str
|
|
45
|
+
Unique name for the slot within the template.
|
|
46
|
+
description : str | None
|
|
47
|
+
Human-readable description of the slot's purpose.
|
|
48
|
+
constraints : list[Constraint]
|
|
49
|
+
Constraints that determine valid fillers.
|
|
50
|
+
required : bool
|
|
51
|
+
Whether the slot must be filled.
|
|
52
|
+
default_value : str | None
|
|
53
|
+
Default value if slot is not filled.
|
|
54
|
+
|
|
55
|
+
Examples
|
|
56
|
+
--------
|
|
57
|
+
>>> from bead.resources.constraints import Constraint
|
|
58
|
+
>>> slot = Slot(
|
|
59
|
+
... name="subject",
|
|
60
|
+
... description="The subject of the sentence",
|
|
61
|
+
... constraints=[
|
|
62
|
+
... Constraint(expression="self.features.pos == 'NOUN'")
|
|
63
|
+
... ],
|
|
64
|
+
... required=True
|
|
65
|
+
... )
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
name: str
|
|
69
|
+
description: str | None = None
|
|
70
|
+
constraints: list[Constraint] = Field(default_factory=_empty_constraint_list)
|
|
71
|
+
required: bool = True
|
|
72
|
+
default_value: str | None = None
|
|
73
|
+
|
|
74
|
+
@field_validator("name")
|
|
75
|
+
@classmethod
|
|
76
|
+
def validate_name(cls, v: str) -> str:
|
|
77
|
+
"""Validate that name is a valid Python identifier.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
v : str
|
|
82
|
+
The slot name to validate.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
str
|
|
87
|
+
The validated slot name.
|
|
88
|
+
|
|
89
|
+
Raises
|
|
90
|
+
------
|
|
91
|
+
ValueError
|
|
92
|
+
If name is not a valid Python identifier.
|
|
93
|
+
"""
|
|
94
|
+
if not v or not v.strip():
|
|
95
|
+
raise ValueError("name must be non-empty")
|
|
96
|
+
if not v.isidentifier():
|
|
97
|
+
raise ValueError(f"name '{v}' must be a valid Python identifier")
|
|
98
|
+
return v
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class Template(BeadBaseModel):
|
|
102
|
+
"""A sentence template with slots for lexical items.
|
|
103
|
+
|
|
104
|
+
Templates define the structure of generated sentences. They contain:
|
|
105
|
+
- A template string with slot placeholders (e.g., "{subject} {verb} {object}")
|
|
106
|
+
- Slot definitions with constraints
|
|
107
|
+
- Optional language code
|
|
108
|
+
- Optional metadata
|
|
109
|
+
|
|
110
|
+
Attributes
|
|
111
|
+
----------
|
|
112
|
+
name : str
|
|
113
|
+
Unique name for the template.
|
|
114
|
+
template_string : str
|
|
115
|
+
Template with {slot_name} placeholders.
|
|
116
|
+
slots : dict[str, Slot]
|
|
117
|
+
Slot definitions keyed by slot name.
|
|
118
|
+
constraints : list[Constraint]
|
|
119
|
+
Multi-slot constraints (slot names as variables in DSL expressions).
|
|
120
|
+
description : str | None
|
|
121
|
+
Human-readable description.
|
|
122
|
+
language_code : LanguageCode | None
|
|
123
|
+
ISO 639-1 (2-letter) or ISO 639-3 (3-letter) language code.
|
|
124
|
+
Examples: "en", "eng", "ko", "kor", "zu", "zul".
|
|
125
|
+
Required for cross-linguistic classification via TemplateClass.
|
|
126
|
+
tags : list[str]
|
|
127
|
+
Tags for categorization.
|
|
128
|
+
metadata : dict[str, MetadataValue]
|
|
129
|
+
Additional metadata.
|
|
130
|
+
|
|
131
|
+
Examples
|
|
132
|
+
--------
|
|
133
|
+
>>> template = Template(
|
|
134
|
+
... name="simple_transitive",
|
|
135
|
+
... template_string="{subject} {verb} {object}.",
|
|
136
|
+
... slots={
|
|
137
|
+
... "subject": Slot(name="subject", required=True),
|
|
138
|
+
... "verb": Slot(name="verb", required=True),
|
|
139
|
+
... "object": Slot(name="object", required=True)
|
|
140
|
+
... },
|
|
141
|
+
... tags=["transitive", "simple"]
|
|
142
|
+
... )
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
name: str
|
|
146
|
+
template_string: str
|
|
147
|
+
slots: dict[str, Slot] = Field(default_factory=dict)
|
|
148
|
+
constraints: list[Constraint] = Field(default_factory=_empty_constraint_list)
|
|
149
|
+
description: str | None = None
|
|
150
|
+
language_code: LanguageCode | None = None
|
|
151
|
+
tags: list[str] = Field(default_factory=_empty_str_list)
|
|
152
|
+
metadata: dict[str, MetadataValue] = Field(default_factory=dict)
|
|
153
|
+
|
|
154
|
+
@field_validator("name")
|
|
155
|
+
@classmethod
|
|
156
|
+
def validate_name(cls, v: str) -> str:
|
|
157
|
+
"""Validate that name is non-empty.
|
|
158
|
+
|
|
159
|
+
Parameters
|
|
160
|
+
----------
|
|
161
|
+
v : str
|
|
162
|
+
The template name to validate.
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
str
|
|
167
|
+
The validated template name.
|
|
168
|
+
|
|
169
|
+
Raises
|
|
170
|
+
------
|
|
171
|
+
ValueError
|
|
172
|
+
If name is empty.
|
|
173
|
+
"""
|
|
174
|
+
if not v or not v.strip():
|
|
175
|
+
raise ValueError("name must be non-empty")
|
|
176
|
+
return v
|
|
177
|
+
|
|
178
|
+
@field_validator("template_string")
|
|
179
|
+
@classmethod
|
|
180
|
+
def validate_template_string(cls, v: str) -> str:
|
|
181
|
+
"""Validate that template_string is non-empty.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
v : str
|
|
186
|
+
The template string to validate.
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
str
|
|
191
|
+
The validated template string.
|
|
192
|
+
|
|
193
|
+
Raises
|
|
194
|
+
------
|
|
195
|
+
ValueError
|
|
196
|
+
If template_string is empty.
|
|
197
|
+
"""
|
|
198
|
+
if not v or not v.strip():
|
|
199
|
+
raise ValueError("template_string must be non-empty")
|
|
200
|
+
return v
|
|
201
|
+
|
|
202
|
+
@model_validator(mode="after")
|
|
203
|
+
def validate_slots_match_template(self) -> Template:
|
|
204
|
+
"""Validate that template_string and slots are consistent.
|
|
205
|
+
|
|
206
|
+
Ensures that:
|
|
207
|
+
- All slot names in template_string exist in slots dict
|
|
208
|
+
- All slots in dict are referenced in template_string
|
|
209
|
+
- Slot names match their keys in the dict
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
Template
|
|
214
|
+
The validated template.
|
|
215
|
+
|
|
216
|
+
Raises
|
|
217
|
+
------
|
|
218
|
+
ValueError
|
|
219
|
+
If template_string and slots are inconsistent.
|
|
220
|
+
"""
|
|
221
|
+
# Extract slot names from template string
|
|
222
|
+
template_slots = set(re.findall(r"\{(\w+)\}", self.template_string))
|
|
223
|
+
|
|
224
|
+
# Get slot names from slots dict
|
|
225
|
+
dict_slots = set(self.slots.keys())
|
|
226
|
+
|
|
227
|
+
# Check that all template slots exist in dict
|
|
228
|
+
missing_in_dict = template_slots - dict_slots
|
|
229
|
+
if missing_in_dict:
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"Template references slots not in slots dict: {missing_in_dict}"
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Check that all dict slots are referenced in template
|
|
235
|
+
missing_in_template = dict_slots - template_slots
|
|
236
|
+
if missing_in_template:
|
|
237
|
+
raise ValueError(
|
|
238
|
+
f"Slots dict contains slots not referenced in template: "
|
|
239
|
+
f"{missing_in_template}"
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Check that slot names match their keys
|
|
243
|
+
for key, slot in self.slots.items():
|
|
244
|
+
if slot.name != key:
|
|
245
|
+
raise ValueError(
|
|
246
|
+
f"Slot key '{key}' does not match slot name '{slot.name}'"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return self
|
|
250
|
+
|
|
251
|
+
@property
|
|
252
|
+
def required_slot_names(self) -> set[str]:
|
|
253
|
+
"""Get names of all required slots.
|
|
254
|
+
|
|
255
|
+
Returns
|
|
256
|
+
-------
|
|
257
|
+
set[str]
|
|
258
|
+
Set of slot names where required=True.
|
|
259
|
+
"""
|
|
260
|
+
return {name for name, slot in self.slots.items() if slot.required}
|
|
261
|
+
|
|
262
|
+
def fill_with_values(
|
|
263
|
+
self, slot_values: dict[str, str], strategy_name: str = "manual"
|
|
264
|
+
) -> FilledTemplate:
|
|
265
|
+
"""Create a FilledTemplate by filling slots with string values.
|
|
266
|
+
|
|
267
|
+
This is a lightweight alternative to CSPFiller for cases where
|
|
268
|
+
you already have the values and just need a FilledTemplate object.
|
|
269
|
+
|
|
270
|
+
Parameters
|
|
271
|
+
----------
|
|
272
|
+
slot_values : dict[str, str]
|
|
273
|
+
Mapping of slot names to string values to fill them with.
|
|
274
|
+
strategy_name : str
|
|
275
|
+
Name of strategy used (for metadata).
|
|
276
|
+
|
|
277
|
+
Returns
|
|
278
|
+
-------
|
|
279
|
+
FilledTemplate
|
|
280
|
+
A filled template with the provided values.
|
|
281
|
+
|
|
282
|
+
Examples
|
|
283
|
+
--------
|
|
284
|
+
>>> template = Template(
|
|
285
|
+
... name="test",
|
|
286
|
+
... template_string="{subj} {verb}.",
|
|
287
|
+
... slots={"subj": Slot(name="subj"), "verb": Slot(name="verb")}
|
|
288
|
+
... )
|
|
289
|
+
>>> filled = template.fill_with_values({"subj": "cat", "verb": "runs"})
|
|
290
|
+
>>> filled.rendered_text
|
|
291
|
+
'cat runs.'
|
|
292
|
+
"""
|
|
293
|
+
from bead.resources.lexical_item import LexicalItem # noqa: PLC0415
|
|
294
|
+
from bead.templates.filler import FilledTemplate # noqa: PLC0415
|
|
295
|
+
|
|
296
|
+
# Create LexicalItem objects for each value
|
|
297
|
+
slot_fillers = {}
|
|
298
|
+
for slot_name, value in slot_values.items():
|
|
299
|
+
if slot_name in self.slots:
|
|
300
|
+
# Create a minimal LexicalItem with just the lemma
|
|
301
|
+
slot_fillers[slot_name] = LexicalItem(
|
|
302
|
+
lemma=value,
|
|
303
|
+
language_code=self.language_code or "eng",
|
|
304
|
+
features={"pos": "UNKNOWN"},
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Render text by replacing slot placeholders
|
|
308
|
+
rendered_text = self.template_string
|
|
309
|
+
for slot_name, value in slot_values.items():
|
|
310
|
+
rendered_text = rendered_text.replace(f"{{{slot_name}}}", value)
|
|
311
|
+
|
|
312
|
+
# Create template_slots mapping (slot_name -> is_required)
|
|
313
|
+
template_slots = {name: slot.required for name, slot in self.slots.items()}
|
|
314
|
+
|
|
315
|
+
return FilledTemplate(
|
|
316
|
+
template_id=str(self.id),
|
|
317
|
+
template_name=self.name,
|
|
318
|
+
slot_fillers=slot_fillers,
|
|
319
|
+
rendered_text=rendered_text,
|
|
320
|
+
strategy_name=strategy_name,
|
|
321
|
+
template_slots=template_slots,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def _empty_template_list() -> list[Template]:
|
|
326
|
+
"""Create an empty template list."""
|
|
327
|
+
return []
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class TemplateSequence(BeadBaseModel):
|
|
331
|
+
"""A sequence of templates to be filled together.
|
|
332
|
+
|
|
333
|
+
Template sequences allow multiple templates to be filled with
|
|
334
|
+
related constraints (e.g., relational constraints across templates).
|
|
335
|
+
|
|
336
|
+
Attributes
|
|
337
|
+
----------
|
|
338
|
+
name : str
|
|
339
|
+
Unique name for the sequence.
|
|
340
|
+
templates : list[Template]
|
|
341
|
+
Ordered list of templates.
|
|
342
|
+
constraints : list[Constraint]
|
|
343
|
+
Cross-template constraints (span multiple templates).
|
|
344
|
+
|
|
345
|
+
Examples
|
|
346
|
+
--------
|
|
347
|
+
>>> sequence = TemplateSequence(
|
|
348
|
+
... name="question_answer",
|
|
349
|
+
... templates=[question_template, answer_template],
|
|
350
|
+
... constraints=[...]
|
|
351
|
+
... )
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
name: str
|
|
355
|
+
templates: list[Template] = Field(default_factory=_empty_template_list)
|
|
356
|
+
constraints: list[Constraint] = Field(default_factory=_empty_constraint_list)
|
|
357
|
+
|
|
358
|
+
@field_validator("name")
|
|
359
|
+
@classmethod
|
|
360
|
+
def validate_name(cls, v: str) -> str:
|
|
361
|
+
"""Validate that name is non-empty.
|
|
362
|
+
|
|
363
|
+
Parameters
|
|
364
|
+
----------
|
|
365
|
+
v : str
|
|
366
|
+
The sequence name to validate.
|
|
367
|
+
|
|
368
|
+
Returns
|
|
369
|
+
-------
|
|
370
|
+
str
|
|
371
|
+
The validated sequence name.
|
|
372
|
+
|
|
373
|
+
Raises
|
|
374
|
+
------
|
|
375
|
+
ValueError
|
|
376
|
+
If name is empty.
|
|
377
|
+
"""
|
|
378
|
+
if not v or not v.strip():
|
|
379
|
+
raise ValueError("name must be non-empty")
|
|
380
|
+
return v
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _empty_tree_list() -> list[TemplateTree]:
|
|
384
|
+
"""Create an empty template tree list."""
|
|
385
|
+
return []
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class TemplateTree(BeadBaseModel):
|
|
389
|
+
"""A tree structure of templates.
|
|
390
|
+
|
|
391
|
+
Template trees represent hierarchical relationships between
|
|
392
|
+
templates (e.g., a discourse structure).
|
|
393
|
+
|
|
394
|
+
Attributes
|
|
395
|
+
----------
|
|
396
|
+
name : str
|
|
397
|
+
Unique name for the tree.
|
|
398
|
+
root : Template
|
|
399
|
+
Root template.
|
|
400
|
+
children : list[TemplateTree]
|
|
401
|
+
Child subtrees.
|
|
402
|
+
|
|
403
|
+
Examples
|
|
404
|
+
--------
|
|
405
|
+
>>> tree = TemplateTree(
|
|
406
|
+
... name="discourse",
|
|
407
|
+
... root=intro_template,
|
|
408
|
+
... children=[
|
|
409
|
+
... TemplateTree(name="body", root=body_template, children=[]),
|
|
410
|
+
... TemplateTree(name="conclusion", root=conclusion_template, children=[])
|
|
411
|
+
... ]
|
|
412
|
+
... )
|
|
413
|
+
"""
|
|
414
|
+
|
|
415
|
+
name: str
|
|
416
|
+
root: Template
|
|
417
|
+
children: list[TemplateTree] = Field(default_factory=_empty_tree_list)
|
|
418
|
+
|
|
419
|
+
@field_validator("name")
|
|
420
|
+
@classmethod
|
|
421
|
+
def validate_name(cls, v: str) -> str:
|
|
422
|
+
"""Validate that name is non-empty.
|
|
423
|
+
|
|
424
|
+
Parameters
|
|
425
|
+
----------
|
|
426
|
+
v : str
|
|
427
|
+
The tree name to validate.
|
|
428
|
+
|
|
429
|
+
Returns
|
|
430
|
+
-------
|
|
431
|
+
str
|
|
432
|
+
The validated tree name.
|
|
433
|
+
|
|
434
|
+
Raises
|
|
435
|
+
------
|
|
436
|
+
ValueError
|
|
437
|
+
If name is empty.
|
|
438
|
+
"""
|
|
439
|
+
if not v or not v.strip():
|
|
440
|
+
raise ValueError("name must be non-empty")
|
|
441
|
+
return v
|