bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/resources.py
ADDED
|
@@ -0,0 +1,1036 @@
|
|
|
1
|
+
"""Resource management commands for bead CLI.
|
|
2
|
+
|
|
3
|
+
This module provides commands for creating, listing, and validating
|
|
4
|
+
lexicons and templates (Stage 1 of the bead pipeline).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import csv
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from itertools import product
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, cast
|
|
15
|
+
|
|
16
|
+
import click
|
|
17
|
+
from pydantic import ValidationError
|
|
18
|
+
from rich.console import Console
|
|
19
|
+
from rich.table import Table
|
|
20
|
+
|
|
21
|
+
from bead.cli.constraint_builders import create_constraint
|
|
22
|
+
from bead.cli.resource_loaders import (
|
|
23
|
+
import_framenet,
|
|
24
|
+
import_propbank,
|
|
25
|
+
import_unimorph,
|
|
26
|
+
import_verbnet,
|
|
27
|
+
)
|
|
28
|
+
from bead.cli.utils import print_error, print_info, print_success
|
|
29
|
+
from bead.data.base import JsonValue
|
|
30
|
+
from bead.resources.lexical_item import LexicalItem
|
|
31
|
+
from bead.resources.lexicon import Lexicon
|
|
32
|
+
from bead.resources.template import Slot, Template
|
|
33
|
+
from bead.resources.template_collection import TemplateCollection
|
|
34
|
+
|
|
35
|
+
console = Console()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@click.group()
|
|
39
|
+
def resources() -> None:
|
|
40
|
+
r"""Resource management commands (Stage 1).
|
|
41
|
+
|
|
42
|
+
Commands for creating, validating, and managing lexicons and templates.
|
|
43
|
+
|
|
44
|
+
\b
|
|
45
|
+
Examples:
|
|
46
|
+
$ bead resources create-lexicon lexicon.jsonl --name verbs \\
|
|
47
|
+
--from-csv verbs.csv
|
|
48
|
+
$ bead resources create-template template.jsonl --name transitive \\
|
|
49
|
+
--template-string "{subject} {verb} {object}"
|
|
50
|
+
$ bead resources list-lexicons --directory lexicons/
|
|
51
|
+
$ bead resources validate-lexicon lexicon.jsonl
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@resources.command()
|
|
56
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
57
|
+
@click.option("--name", required=True, help="Lexicon name")
|
|
58
|
+
@click.option(
|
|
59
|
+
"--from-csv",
|
|
60
|
+
"csv_file",
|
|
61
|
+
type=click.Path(exists=True, path_type=Path),
|
|
62
|
+
help="Create from CSV file (requires 'lemma' column, optional 'pos', 'form', etc.)",
|
|
63
|
+
)
|
|
64
|
+
@click.option(
|
|
65
|
+
"--from-json",
|
|
66
|
+
"json_file",
|
|
67
|
+
type=click.Path(exists=True, path_type=Path),
|
|
68
|
+
help="Create from JSON file (array of lexical item objects)",
|
|
69
|
+
)
|
|
70
|
+
@click.option("--language-code", help="ISO 639 language code (e.g., 'eng', 'en')")
|
|
71
|
+
@click.option("--description", help="Description of the lexicon")
|
|
72
|
+
@click.pass_context
|
|
73
|
+
def create_lexicon(
|
|
74
|
+
ctx: click.Context,
|
|
75
|
+
output_file: Path,
|
|
76
|
+
name: str,
|
|
77
|
+
csv_file: Path | None,
|
|
78
|
+
json_file: Path | None,
|
|
79
|
+
language_code: str | None,
|
|
80
|
+
description: str | None,
|
|
81
|
+
) -> None:
|
|
82
|
+
r"""Create a lexicon from various sources.
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
ctx : click.Context
|
|
87
|
+
Click context object.
|
|
88
|
+
output_file : Path
|
|
89
|
+
Path to output lexicon file.
|
|
90
|
+
name : str
|
|
91
|
+
Name for the lexicon.
|
|
92
|
+
csv_file : Path | None
|
|
93
|
+
Path to CSV source file.
|
|
94
|
+
json_file : Path | None
|
|
95
|
+
Path to JSON source file.
|
|
96
|
+
language_code : str | None
|
|
97
|
+
ISO 639 language code.
|
|
98
|
+
description : str | None
|
|
99
|
+
Description of the lexicon.
|
|
100
|
+
|
|
101
|
+
Examples
|
|
102
|
+
--------
|
|
103
|
+
# Create from CSV file
|
|
104
|
+
$ bead resources create-lexicon lexicon.jsonl --name verbs --from-csv verbs.csv
|
|
105
|
+
|
|
106
|
+
# Create from JSON file
|
|
107
|
+
$ bead resources create-lexicon lexicon.jsonl --name verbs --from-json verbs.json
|
|
108
|
+
|
|
109
|
+
# With language code
|
|
110
|
+
$ bead resources create-lexicon lexicon.jsonl --name verbs \\
|
|
111
|
+
--from-csv verbs.csv --language-code eng
|
|
112
|
+
"""
|
|
113
|
+
try:
|
|
114
|
+
# Validate that exactly one source is provided
|
|
115
|
+
sources = [csv_file, json_file]
|
|
116
|
+
provided_sources = [s for s in sources if s is not None]
|
|
117
|
+
|
|
118
|
+
if len(provided_sources) == 0:
|
|
119
|
+
print_error("Must provide one source: --from-csv or --from-json")
|
|
120
|
+
ctx.exit(1)
|
|
121
|
+
elif len(provided_sources) > 1:
|
|
122
|
+
print_error("Only one source allowed: --from-csv or --from-json")
|
|
123
|
+
ctx.exit(1)
|
|
124
|
+
|
|
125
|
+
# Create lexicon
|
|
126
|
+
lexicon = Lexicon(
|
|
127
|
+
name=name,
|
|
128
|
+
language_code=language_code,
|
|
129
|
+
description=description,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Determine language code for items (default to "eng" if not specified)
|
|
133
|
+
item_language_code = language_code or "eng"
|
|
134
|
+
|
|
135
|
+
# Load from source
|
|
136
|
+
if csv_file:
|
|
137
|
+
print_info(f"Loading lexical items from CSV: {csv_file}")
|
|
138
|
+
with open(csv_file, encoding="utf-8") as f:
|
|
139
|
+
reader = csv.DictReader(f)
|
|
140
|
+
for row in reader:
|
|
141
|
+
if "lemma" not in row:
|
|
142
|
+
print_error("CSV must have 'lemma' column")
|
|
143
|
+
ctx.exit(1)
|
|
144
|
+
|
|
145
|
+
item_data: dict[str, Any] = {
|
|
146
|
+
"lemma": row["lemma"],
|
|
147
|
+
"language_code": item_language_code,
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if "form" in row and row["form"]:
|
|
151
|
+
item_data["form"] = row["form"]
|
|
152
|
+
if "source" in row and row["source"]:
|
|
153
|
+
item_data["source"] = row["source"]
|
|
154
|
+
|
|
155
|
+
# Build features dict from pos, feature_ columns, and attr_ columns
|
|
156
|
+
features: dict[str, Any] = {}
|
|
157
|
+
|
|
158
|
+
# Add pos to features if present
|
|
159
|
+
if "pos" in row and row["pos"]:
|
|
160
|
+
features["pos"] = row["pos"]
|
|
161
|
+
|
|
162
|
+
# Extract features (columns with feature_ prefix)
|
|
163
|
+
for key, value in row.items():
|
|
164
|
+
if key.startswith("feature_") and value:
|
|
165
|
+
features[key[8:]] = value
|
|
166
|
+
|
|
167
|
+
# Extract attributes (columns with attr_ prefix) into features
|
|
168
|
+
for key, value in row.items():
|
|
169
|
+
if key.startswith("attr_") and value:
|
|
170
|
+
features[key[5:]] = value
|
|
171
|
+
|
|
172
|
+
if features:
|
|
173
|
+
item_data["features"] = features
|
|
174
|
+
|
|
175
|
+
item = LexicalItem(**item_data)
|
|
176
|
+
lexicon.add(item)
|
|
177
|
+
|
|
178
|
+
elif json_file:
|
|
179
|
+
print_info(f"Loading lexical items from JSON: {json_file}")
|
|
180
|
+
with open(json_file, encoding="utf-8") as f:
|
|
181
|
+
raw_data = json.load(f)
|
|
182
|
+
|
|
183
|
+
if not isinstance(raw_data, list):
|
|
184
|
+
print_error("JSON file must contain an array of lexical items")
|
|
185
|
+
ctx.exit(1)
|
|
186
|
+
|
|
187
|
+
data = cast(list[dict[str, JsonValue]], raw_data)
|
|
188
|
+
|
|
189
|
+
for raw_item_untyped in data:
|
|
190
|
+
# Extract required lemma field
|
|
191
|
+
if "lemma" not in raw_item_untyped or not isinstance(
|
|
192
|
+
raw_item_untyped["lemma"], str
|
|
193
|
+
):
|
|
194
|
+
continue
|
|
195
|
+
lemma: str = raw_item_untyped["lemma"]
|
|
196
|
+
|
|
197
|
+
# Extract optional form field
|
|
198
|
+
form: str | None = None
|
|
199
|
+
if "form" in raw_item_untyped and isinstance(
|
|
200
|
+
raw_item_untyped["form"], str
|
|
201
|
+
):
|
|
202
|
+
form = raw_item_untyped["form"]
|
|
203
|
+
|
|
204
|
+
# Extract language_code
|
|
205
|
+
lang_code: str = item_language_code
|
|
206
|
+
if "language_code" in raw_item_untyped and isinstance(
|
|
207
|
+
raw_item_untyped["language_code"], str
|
|
208
|
+
):
|
|
209
|
+
lang_code = raw_item_untyped["language_code"]
|
|
210
|
+
|
|
211
|
+
# Extract optional source field
|
|
212
|
+
source: str | None = None
|
|
213
|
+
if "source" in raw_item_untyped and isinstance(
|
|
214
|
+
raw_item_untyped["source"], str
|
|
215
|
+
):
|
|
216
|
+
source = raw_item_untyped["source"]
|
|
217
|
+
|
|
218
|
+
# Handle features dict - copy all key-value pairs
|
|
219
|
+
json_features: dict[str, str | int | float | bool | None] = {}
|
|
220
|
+
if "features" in raw_item_untyped:
|
|
221
|
+
features_value = raw_item_untyped["features"]
|
|
222
|
+
if isinstance(features_value, dict):
|
|
223
|
+
for k, v in features_value.items():
|
|
224
|
+
if isinstance(v, str | int | float | bool) or v is None:
|
|
225
|
+
json_features[k] = v
|
|
226
|
+
|
|
227
|
+
# Move pos to features if present at top level
|
|
228
|
+
if "pos" in raw_item_untyped and isinstance(
|
|
229
|
+
raw_item_untyped["pos"], str
|
|
230
|
+
):
|
|
231
|
+
json_features["pos"] = raw_item_untyped["pos"]
|
|
232
|
+
|
|
233
|
+
# Build LexicalItem
|
|
234
|
+
if form is None and source is None:
|
|
235
|
+
item = LexicalItem(
|
|
236
|
+
lemma=lemma, language_code=lang_code, features=json_features
|
|
237
|
+
) # type: ignore[arg-type]
|
|
238
|
+
elif form is None:
|
|
239
|
+
item = LexicalItem(
|
|
240
|
+
lemma=lemma,
|
|
241
|
+
language_code=lang_code,
|
|
242
|
+
features=json_features,
|
|
243
|
+
source=source,
|
|
244
|
+
) # type: ignore[arg-type]
|
|
245
|
+
elif source is None:
|
|
246
|
+
item = LexicalItem(
|
|
247
|
+
lemma=lemma,
|
|
248
|
+
form=form,
|
|
249
|
+
language_code=lang_code,
|
|
250
|
+
features=json_features,
|
|
251
|
+
) # type: ignore[arg-type]
|
|
252
|
+
else:
|
|
253
|
+
item = LexicalItem(
|
|
254
|
+
lemma=lemma,
|
|
255
|
+
form=form,
|
|
256
|
+
language_code=lang_code,
|
|
257
|
+
features=json_features,
|
|
258
|
+
source=source,
|
|
259
|
+
) # type: ignore[arg-type]
|
|
260
|
+
|
|
261
|
+
lexicon.add(item)
|
|
262
|
+
|
|
263
|
+
# Save lexicon
|
|
264
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
265
|
+
lexicon.to_jsonl(str(output_file))
|
|
266
|
+
|
|
267
|
+
print_success(
|
|
268
|
+
f"Created lexicon '{name}' with {len(lexicon)} items: {output_file}"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
except ValidationError as e:
|
|
272
|
+
print_error(f"Validation error: {e}")
|
|
273
|
+
ctx.exit(1)
|
|
274
|
+
except Exception as e:
|
|
275
|
+
print_error(f"Failed to create lexicon: {e}")
|
|
276
|
+
ctx.exit(1)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
@resources.command()
|
|
280
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
281
|
+
@click.option("--name", required=True, help="Template name")
|
|
282
|
+
@click.option(
|
|
283
|
+
"--template-string",
|
|
284
|
+
required=True,
|
|
285
|
+
help="Template string with {slot_name} placeholders",
|
|
286
|
+
)
|
|
287
|
+
@click.option("--language-code", help="ISO 639 language code")
|
|
288
|
+
@click.option("--description", help="Template description")
|
|
289
|
+
@click.option(
|
|
290
|
+
"--slot",
|
|
291
|
+
"slots",
|
|
292
|
+
multiple=True,
|
|
293
|
+
help=(
|
|
294
|
+
"Slot definition in format: name:required "
|
|
295
|
+
"(e.g., 'subject:true', 'object:false')"
|
|
296
|
+
),
|
|
297
|
+
)
|
|
298
|
+
@click.pass_context
|
|
299
|
+
def create_template(
|
|
300
|
+
ctx: click.Context,
|
|
301
|
+
output_file: Path,
|
|
302
|
+
name: str,
|
|
303
|
+
template_string: str,
|
|
304
|
+
language_code: str | None,
|
|
305
|
+
description: str | None,
|
|
306
|
+
slots: tuple[str, ...],
|
|
307
|
+
) -> None:
|
|
308
|
+
r"""Create a template with slots and constraints.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
ctx : click.Context
|
|
313
|
+
Click context object.
|
|
314
|
+
output_file : Path
|
|
315
|
+
Path to output template file.
|
|
316
|
+
name : str
|
|
317
|
+
Name for the template.
|
|
318
|
+
template_string : str
|
|
319
|
+
Template string with {slot_name} placeholders.
|
|
320
|
+
language_code : str | None
|
|
321
|
+
ISO 639 language code.
|
|
322
|
+
description : str | None
|
|
323
|
+
Description of the template.
|
|
324
|
+
slots : tuple[str, ...]
|
|
325
|
+
Slot definitions in format "name:required".
|
|
326
|
+
|
|
327
|
+
Examples
|
|
328
|
+
--------
|
|
329
|
+
# Create simple template
|
|
330
|
+
$ bead resources create-template template.jsonl \\
|
|
331
|
+
--name transitive \\
|
|
332
|
+
--template-string "{subject} {verb} {object}"
|
|
333
|
+
|
|
334
|
+
# With slot specifications
|
|
335
|
+
$ bead resources create-template template.jsonl \\
|
|
336
|
+
--name transitive \\
|
|
337
|
+
--template-string "{subject} {verb} {object}" \\
|
|
338
|
+
--slot subject:true \\
|
|
339
|
+
--slot verb:true \\
|
|
340
|
+
--slot object:false
|
|
341
|
+
"""
|
|
342
|
+
try:
|
|
343
|
+
# Parse slot definitions
|
|
344
|
+
slot_dict: dict[str, Slot] = {}
|
|
345
|
+
|
|
346
|
+
# Extract slot names from template string
|
|
347
|
+
slot_names = re.findall(r"\{(\w+)\}", template_string)
|
|
348
|
+
|
|
349
|
+
if not slot_names:
|
|
350
|
+
print_error(
|
|
351
|
+
"Template string must contain at least one {slot_name} placeholder"
|
|
352
|
+
)
|
|
353
|
+
ctx.exit(1)
|
|
354
|
+
|
|
355
|
+
# Parse explicit slot definitions
|
|
356
|
+
explicit_slots: dict[str, bool] = {}
|
|
357
|
+
for slot_def in slots:
|
|
358
|
+
if ":" not in slot_def:
|
|
359
|
+
print_error(
|
|
360
|
+
f"Invalid slot definition: {slot_def}. Use format 'name:required'"
|
|
361
|
+
)
|
|
362
|
+
ctx.exit(1)
|
|
363
|
+
|
|
364
|
+
slot_name, required_str = slot_def.split(":", 1)
|
|
365
|
+
required = required_str.lower() in ("true", "yes", "1")
|
|
366
|
+
explicit_slots[slot_name] = required
|
|
367
|
+
|
|
368
|
+
# Create slot objects for all slot names in template
|
|
369
|
+
for slot_name in slot_names:
|
|
370
|
+
required = explicit_slots.get(slot_name, True)
|
|
371
|
+
slot_dict[slot_name] = Slot(name=slot_name, required=required)
|
|
372
|
+
|
|
373
|
+
# Create template
|
|
374
|
+
template = Template(
|
|
375
|
+
name=name,
|
|
376
|
+
template_string=template_string,
|
|
377
|
+
slots=slot_dict,
|
|
378
|
+
language_code=language_code,
|
|
379
|
+
description=description,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Create collection and add template
|
|
383
|
+
collection = TemplateCollection(
|
|
384
|
+
name=f"{name}_collection",
|
|
385
|
+
language_code=language_code,
|
|
386
|
+
)
|
|
387
|
+
collection.add(template)
|
|
388
|
+
|
|
389
|
+
# Save collection
|
|
390
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
391
|
+
collection.to_jsonl(str(output_file))
|
|
392
|
+
|
|
393
|
+
print_success(
|
|
394
|
+
f"Created template '{name}' with {len(slot_dict)} slots: {output_file}"
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
except ValidationError as e:
|
|
398
|
+
print_error(f"Validation error: {e}")
|
|
399
|
+
ctx.exit(1)
|
|
400
|
+
except Exception as e:
|
|
401
|
+
print_error(f"Failed to create template: {e}")
|
|
402
|
+
ctx.exit(1)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
@resources.command()
|
|
406
|
+
@click.option(
|
|
407
|
+
"--directory",
|
|
408
|
+
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
|
409
|
+
default=Path.cwd(),
|
|
410
|
+
help="Directory to search for lexicon files",
|
|
411
|
+
)
|
|
412
|
+
@click.option(
|
|
413
|
+
"--pattern",
|
|
414
|
+
default="*.jsonl",
|
|
415
|
+
help="File pattern to match (default: *.jsonl)",
|
|
416
|
+
)
|
|
417
|
+
@click.pass_context
|
|
418
|
+
def list_lexicons(
|
|
419
|
+
ctx: click.Context,
|
|
420
|
+
directory: Path,
|
|
421
|
+
pattern: str,
|
|
422
|
+
) -> None:
|
|
423
|
+
"""List available lexicons in a directory.
|
|
424
|
+
|
|
425
|
+
Parameters
|
|
426
|
+
----------
|
|
427
|
+
ctx : click.Context
|
|
428
|
+
Click context object.
|
|
429
|
+
directory : Path
|
|
430
|
+
Directory to search for lexicon files.
|
|
431
|
+
pattern : str
|
|
432
|
+
File pattern to match.
|
|
433
|
+
|
|
434
|
+
Examples
|
|
435
|
+
--------
|
|
436
|
+
$ bead resources list-lexicons
|
|
437
|
+
$ bead resources list-lexicons --directory lexicons/
|
|
438
|
+
$ bead resources list-lexicons --pattern "verb*.jsonl"
|
|
439
|
+
"""
|
|
440
|
+
try:
|
|
441
|
+
lexicon_files = list(directory.glob(pattern))
|
|
442
|
+
|
|
443
|
+
if not lexicon_files:
|
|
444
|
+
print_info(f"No lexicon files found in {directory} matching {pattern}")
|
|
445
|
+
return
|
|
446
|
+
|
|
447
|
+
table = Table(title=f"Lexicons in {directory}")
|
|
448
|
+
table.add_column("File", style="cyan")
|
|
449
|
+
table.add_column("Name", style="green")
|
|
450
|
+
table.add_column("Items", justify="right", style="yellow")
|
|
451
|
+
table.add_column("Language", style="magenta")
|
|
452
|
+
|
|
453
|
+
for file_path in sorted(lexicon_files):
|
|
454
|
+
try:
|
|
455
|
+
# Try to load first item to get lexicon metadata
|
|
456
|
+
with open(file_path, encoding="utf-8") as f:
|
|
457
|
+
first_line = f.readline().strip()
|
|
458
|
+
if not first_line:
|
|
459
|
+
continue
|
|
460
|
+
|
|
461
|
+
# Count total lines
|
|
462
|
+
with open(file_path, encoding="utf-8") as f:
|
|
463
|
+
item_count = sum(1 for line in f if line.strip())
|
|
464
|
+
|
|
465
|
+
# Parse first item to get metadata
|
|
466
|
+
item_data = json.loads(first_line)
|
|
467
|
+
lexicon_name = file_path.stem
|
|
468
|
+
language = item_data.get("language_code", "N/A")
|
|
469
|
+
|
|
470
|
+
table.add_row(
|
|
471
|
+
str(file_path.name),
|
|
472
|
+
lexicon_name,
|
|
473
|
+
str(item_count),
|
|
474
|
+
language,
|
|
475
|
+
)
|
|
476
|
+
except Exception:
|
|
477
|
+
# Skip files that can't be parsed
|
|
478
|
+
continue
|
|
479
|
+
|
|
480
|
+
console.print(table)
|
|
481
|
+
|
|
482
|
+
except Exception as e:
|
|
483
|
+
print_error(f"Failed to list lexicons: {e}")
|
|
484
|
+
ctx.exit(1)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
@resources.command()
|
|
488
|
+
@click.option(
|
|
489
|
+
"--directory",
|
|
490
|
+
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
|
491
|
+
default=Path.cwd(),
|
|
492
|
+
help="Directory to search for template files",
|
|
493
|
+
)
|
|
494
|
+
@click.option(
|
|
495
|
+
"--pattern",
|
|
496
|
+
default="*.jsonl",
|
|
497
|
+
help="File pattern to match (default: *.jsonl)",
|
|
498
|
+
)
|
|
499
|
+
@click.pass_context
|
|
500
|
+
def list_templates(
|
|
501
|
+
ctx: click.Context,
|
|
502
|
+
directory: Path,
|
|
503
|
+
pattern: str,
|
|
504
|
+
) -> None:
|
|
505
|
+
"""List available templates in a directory.
|
|
506
|
+
|
|
507
|
+
Parameters
|
|
508
|
+
----------
|
|
509
|
+
ctx : click.Context
|
|
510
|
+
Click context object.
|
|
511
|
+
directory : Path
|
|
512
|
+
Directory to search for template files.
|
|
513
|
+
pattern : str
|
|
514
|
+
File pattern to match.
|
|
515
|
+
|
|
516
|
+
Examples
|
|
517
|
+
--------
|
|
518
|
+
$ bead resources list-templates
|
|
519
|
+
$ bead resources list-templates --directory templates/
|
|
520
|
+
$ bead resources list-templates --pattern "trans*.jsonl"
|
|
521
|
+
"""
|
|
522
|
+
try:
|
|
523
|
+
template_files = list(directory.glob(pattern))
|
|
524
|
+
|
|
525
|
+
if not template_files:
|
|
526
|
+
print_info(f"No template files found in {directory} matching {pattern}")
|
|
527
|
+
return
|
|
528
|
+
|
|
529
|
+
table = Table(title=f"Templates in {directory}")
|
|
530
|
+
table.add_column("File", style="cyan")
|
|
531
|
+
table.add_column("Name", style="green")
|
|
532
|
+
table.add_column("Slots", justify="right", style="yellow")
|
|
533
|
+
table.add_column("Template String", style="white")
|
|
534
|
+
|
|
535
|
+
for file_path in sorted(template_files):
|
|
536
|
+
try:
|
|
537
|
+
# Load first template
|
|
538
|
+
with open(file_path, encoding="utf-8") as f:
|
|
539
|
+
first_line = f.readline().strip()
|
|
540
|
+
if not first_line:
|
|
541
|
+
continue
|
|
542
|
+
|
|
543
|
+
# Parse template
|
|
544
|
+
template_data = json.loads(first_line)
|
|
545
|
+
template_name = template_data.get("name", file_path.stem)
|
|
546
|
+
slot_count = len(template_data.get("slots", {}))
|
|
547
|
+
template_str = template_data.get("template_string", "N/A")
|
|
548
|
+
|
|
549
|
+
# Truncate long template strings
|
|
550
|
+
if len(template_str) > 50:
|
|
551
|
+
template_str = template_str[:47] + "..."
|
|
552
|
+
|
|
553
|
+
table.add_row(
|
|
554
|
+
str(file_path.name),
|
|
555
|
+
template_name,
|
|
556
|
+
str(slot_count),
|
|
557
|
+
template_str,
|
|
558
|
+
)
|
|
559
|
+
except Exception:
|
|
560
|
+
# Skip files that can't be parsed
|
|
561
|
+
continue
|
|
562
|
+
|
|
563
|
+
console.print(table)
|
|
564
|
+
|
|
565
|
+
except Exception as e:
|
|
566
|
+
print_error(f"Failed to list templates: {e}")
|
|
567
|
+
ctx.exit(1)
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
@resources.command()
|
|
571
|
+
@click.argument("lexicon_file", type=click.Path(exists=True, path_type=Path))
|
|
572
|
+
@click.pass_context
|
|
573
|
+
def validate_lexicon(ctx: click.Context, lexicon_file: Path) -> None:
|
|
574
|
+
"""Validate a lexicon file.
|
|
575
|
+
|
|
576
|
+
Checks that the lexicon file is properly formatted and all items are valid.
|
|
577
|
+
|
|
578
|
+
Parameters
|
|
579
|
+
----------
|
|
580
|
+
ctx : click.Context
|
|
581
|
+
Click context object.
|
|
582
|
+
lexicon_file : Path
|
|
583
|
+
Path to lexicon file to validate.
|
|
584
|
+
|
|
585
|
+
Examples
|
|
586
|
+
--------
|
|
587
|
+
$ bead resources validate-lexicon lexicon.jsonl
|
|
588
|
+
"""
|
|
589
|
+
try:
|
|
590
|
+
print_info(f"Validating lexicon: {lexicon_file}")
|
|
591
|
+
|
|
592
|
+
item_count = 0
|
|
593
|
+
errors: list[str] = []
|
|
594
|
+
|
|
595
|
+
with open(lexicon_file, encoding="utf-8") as f:
|
|
596
|
+
for line_num, line in enumerate(f, start=1):
|
|
597
|
+
line = line.strip()
|
|
598
|
+
if not line:
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
try:
|
|
602
|
+
item_data = json.loads(line)
|
|
603
|
+
LexicalItem(**item_data)
|
|
604
|
+
item_count += 1
|
|
605
|
+
except json.JSONDecodeError as e:
|
|
606
|
+
errors.append(f"Line {line_num}: Invalid JSON - {e}")
|
|
607
|
+
except ValidationError as e:
|
|
608
|
+
errors.append(f"Line {line_num}: Validation error - {e}")
|
|
609
|
+
|
|
610
|
+
if errors:
|
|
611
|
+
print_error(f"Validation failed with {len(errors)} errors:")
|
|
612
|
+
for error in errors[:10]: # Show first 10 errors
|
|
613
|
+
console.print(f" [red]✗[/red] {error}")
|
|
614
|
+
if len(errors) > 10:
|
|
615
|
+
console.print(f" ... and {len(errors) - 10} more errors")
|
|
616
|
+
ctx.exit(1)
|
|
617
|
+
else:
|
|
618
|
+
print_success(f"Lexicon is valid: {item_count} items")
|
|
619
|
+
|
|
620
|
+
except Exception as e:
|
|
621
|
+
print_error(f"Failed to validate lexicon: {e}")
|
|
622
|
+
ctx.exit(1)
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
# Add resource loader commands to resources group
|
|
626
|
+
resources.add_command(import_verbnet, name="import-verbnet")
|
|
627
|
+
resources.add_command(import_unimorph, name="import-unimorph")
|
|
628
|
+
resources.add_command(import_propbank, name="import-propbank")
|
|
629
|
+
resources.add_command(import_framenet, name="import-framenet")
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
@resources.command()
|
|
633
|
+
@click.argument("template_file", type=click.Path(exists=True, path_type=Path))
|
|
634
|
+
@click.pass_context
|
|
635
|
+
def validate_template(ctx: click.Context, template_file: Path) -> None:
|
|
636
|
+
"""Validate a template file.
|
|
637
|
+
|
|
638
|
+
Checks that the template file is properly formatted and all templates are valid.
|
|
639
|
+
|
|
640
|
+
Parameters
|
|
641
|
+
----------
|
|
642
|
+
ctx : click.Context
|
|
643
|
+
Click context object.
|
|
644
|
+
template_file : Path
|
|
645
|
+
Path to template file to validate.
|
|
646
|
+
|
|
647
|
+
Examples
|
|
648
|
+
--------
|
|
649
|
+
$ bead resources validate-template templates.jsonl
|
|
650
|
+
"""
|
|
651
|
+
try:
|
|
652
|
+
print_info(f"Validating template: {template_file}")
|
|
653
|
+
|
|
654
|
+
template_count = 0
|
|
655
|
+
errors: list[str] = []
|
|
656
|
+
|
|
657
|
+
with open(template_file, encoding="utf-8") as f:
|
|
658
|
+
for line_num, line in enumerate(f, start=1):
|
|
659
|
+
line = line.strip()
|
|
660
|
+
if not line:
|
|
661
|
+
continue
|
|
662
|
+
|
|
663
|
+
try:
|
|
664
|
+
template_data = json.loads(line)
|
|
665
|
+
Template(**template_data)
|
|
666
|
+
template_count += 1
|
|
667
|
+
except json.JSONDecodeError as e:
|
|
668
|
+
errors.append(f"Line {line_num}: Invalid JSON - {e}")
|
|
669
|
+
except ValidationError as e:
|
|
670
|
+
errors.append(f"Line {line_num}: Validation error - {e}")
|
|
671
|
+
|
|
672
|
+
if errors:
|
|
673
|
+
print_error(f"Validation failed with {len(errors)} errors:")
|
|
674
|
+
for error in errors[:10]: # Show first 10 errors
|
|
675
|
+
console.print(f" [red]✗[/red] {error}")
|
|
676
|
+
if len(errors) > 10:
|
|
677
|
+
console.print(f" ... and {len(errors) - 10} more errors")
|
|
678
|
+
ctx.exit(1)
|
|
679
|
+
else:
|
|
680
|
+
print_success(f"Template file is valid: {template_count} templates")
|
|
681
|
+
|
|
682
|
+
except Exception as e:
|
|
683
|
+
print_error(f"Failed to validate template: {e}")
|
|
684
|
+
ctx.exit(1)
|
|
685
|
+
|
|
686
|
+
|
|
687
|
+
@resources.command()
|
|
688
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
689
|
+
@click.option(
|
|
690
|
+
"--pattern",
|
|
691
|
+
required=True,
|
|
692
|
+
help="Template pattern with {slot_name} placeholders (e.g., '{subj} {verb}')",
|
|
693
|
+
)
|
|
694
|
+
@click.option(
|
|
695
|
+
"--name",
|
|
696
|
+
required=True,
|
|
697
|
+
help="Template name",
|
|
698
|
+
)
|
|
699
|
+
@click.option(
|
|
700
|
+
"--slot",
|
|
701
|
+
"slots",
|
|
702
|
+
multiple=True,
|
|
703
|
+
help="Slot specification: name:required (e.g., subject:true, object:false)",
|
|
704
|
+
)
|
|
705
|
+
@click.option(
|
|
706
|
+
"--description",
|
|
707
|
+
help="Description of the template",
|
|
708
|
+
)
|
|
709
|
+
@click.option(
|
|
710
|
+
"--language-code",
|
|
711
|
+
help="ISO 639 language code (e.g., 'eng', 'en')",
|
|
712
|
+
)
|
|
713
|
+
@click.option(
|
|
714
|
+
"--tags",
|
|
715
|
+
help="Comma-separated tags for categorization",
|
|
716
|
+
)
|
|
717
|
+
@click.pass_context
|
|
718
|
+
def generate_templates(
|
|
719
|
+
ctx: click.Context,
|
|
720
|
+
output_file: Path,
|
|
721
|
+
pattern: str,
|
|
722
|
+
name: str,
|
|
723
|
+
slots: tuple[str, ...],
|
|
724
|
+
description: str | None,
|
|
725
|
+
language_code: str | None,
|
|
726
|
+
tags: str | None,
|
|
727
|
+
) -> None:
|
|
728
|
+
r"""Generate templates from pattern specifications.
|
|
729
|
+
|
|
730
|
+
Creates template objects from a pattern string with slot placeholders.
|
|
731
|
+
Slots are automatically extracted from the pattern or explicitly specified.
|
|
732
|
+
|
|
733
|
+
Parameters
|
|
734
|
+
----------
|
|
735
|
+
ctx : click.Context
|
|
736
|
+
Click context object.
|
|
737
|
+
output_file : Path
|
|
738
|
+
Path to output template file (JSONL).
|
|
739
|
+
pattern : str
|
|
740
|
+
Template pattern with {slot_name} placeholders.
|
|
741
|
+
name : str
|
|
742
|
+
Template name.
|
|
743
|
+
slots : tuple[str, ...]
|
|
744
|
+
Slot specifications (name:required).
|
|
745
|
+
description : str | None
|
|
746
|
+
Template description.
|
|
747
|
+
language_code : str | None
|
|
748
|
+
ISO 639 language code.
|
|
749
|
+
tags : str | None
|
|
750
|
+
Comma-separated tags.
|
|
751
|
+
|
|
752
|
+
Examples
|
|
753
|
+
--------
|
|
754
|
+
# Generate simple template (auto-detect slots)
|
|
755
|
+
$ bead resources generate-templates template.jsonl \\
|
|
756
|
+
--pattern "{subject} {verb} {object}" \\
|
|
757
|
+
--name simple_transitive
|
|
758
|
+
|
|
759
|
+
# With explicit slot specifications
|
|
760
|
+
$ bead resources generate-templates template.jsonl \\
|
|
761
|
+
--pattern "{subject} {verb} {object}" \\
|
|
762
|
+
--name transitive \\
|
|
763
|
+
--slot subject:true \\
|
|
764
|
+
--slot verb:true \\
|
|
765
|
+
--slot object:false \\
|
|
766
|
+
--description "Transitive sentence template"
|
|
767
|
+
|
|
768
|
+
# With language and tags
|
|
769
|
+
$ bead resources generate-templates template.jsonl \\
|
|
770
|
+
--pattern "{subject} {verb} {object}" \\
|
|
771
|
+
--name transitive \\
|
|
772
|
+
--language-code eng \\
|
|
773
|
+
--tags "transitive,simple"
|
|
774
|
+
"""
|
|
775
|
+
try:
|
|
776
|
+
# Extract slot names from pattern
|
|
777
|
+
slot_names_in_pattern = set(re.findall(r"\{(\w+)\}", pattern))
|
|
778
|
+
|
|
779
|
+
if not slot_names_in_pattern:
|
|
780
|
+
print_error(
|
|
781
|
+
"No slot placeholders found in pattern.\n\n"
|
|
782
|
+
"Pattern must contain {slot_name} placeholders.\n\n"
|
|
783
|
+
"Example: '{subject} {verb} {object}'"
|
|
784
|
+
)
|
|
785
|
+
ctx.exit(1)
|
|
786
|
+
|
|
787
|
+
# Build slot dictionary
|
|
788
|
+
slot_dict: dict[str, Slot] = {}
|
|
789
|
+
|
|
790
|
+
if slots:
|
|
791
|
+
# Use explicit slot specifications
|
|
792
|
+
for slot_spec in slots:
|
|
793
|
+
if ":" not in slot_spec:
|
|
794
|
+
print_error(
|
|
795
|
+
f"Invalid slot specification: {slot_spec}\n\n"
|
|
796
|
+
f"Format: name:required (e.g., subject:true, object:false)"
|
|
797
|
+
)
|
|
798
|
+
ctx.exit(1)
|
|
799
|
+
|
|
800
|
+
slot_name, required_str = slot_spec.split(":", 1)
|
|
801
|
+
required = required_str.lower() in ("true", "yes", "1", "t", "y")
|
|
802
|
+
|
|
803
|
+
if slot_name not in slot_names_in_pattern:
|
|
804
|
+
print_error(
|
|
805
|
+
f"Slot '{slot_name}' not found in pattern.\n\n"
|
|
806
|
+
f"Available slots: {', '.join(sorted(slot_names_in_pattern))}"
|
|
807
|
+
)
|
|
808
|
+
ctx.exit(1)
|
|
809
|
+
|
|
810
|
+
slot_dict[slot_name] = Slot(name=slot_name, required=required)
|
|
811
|
+
else:
|
|
812
|
+
# Auto-generate slots (all required)
|
|
813
|
+
for slot_name in slot_names_in_pattern:
|
|
814
|
+
slot_dict[slot_name] = Slot(name=slot_name, required=True)
|
|
815
|
+
|
|
816
|
+
# Build template
|
|
817
|
+
template_data: dict[str, Any] = {
|
|
818
|
+
"name": name,
|
|
819
|
+
"template_string": pattern,
|
|
820
|
+
"slots": slot_dict,
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
if description:
|
|
824
|
+
template_data["description"] = description
|
|
825
|
+
if language_code:
|
|
826
|
+
template_data["language_code"] = language_code
|
|
827
|
+
if tags:
|
|
828
|
+
template_data["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
|
|
829
|
+
|
|
830
|
+
template = Template(**template_data)
|
|
831
|
+
|
|
832
|
+
# Save to JSONL
|
|
833
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
834
|
+
mode = "a" if output_file.exists() else "w"
|
|
835
|
+
with open(output_file, mode, encoding="utf-8") as f:
|
|
836
|
+
f.write(template.model_dump_json() + "\n")
|
|
837
|
+
|
|
838
|
+
print_success(
|
|
839
|
+
f"Created template '{name}' with {len(slot_dict)} slots: {output_file}"
|
|
840
|
+
)
|
|
841
|
+
|
|
842
|
+
# Show slot details
|
|
843
|
+
console.print("\n[cyan]Slots:[/cyan]")
|
|
844
|
+
for slot_name, slot in sorted(slot_dict.items()):
|
|
845
|
+
required_str = (
|
|
846
|
+
"[green]required[/green]"
|
|
847
|
+
if slot.required
|
|
848
|
+
else "[yellow]optional[/yellow]"
|
|
849
|
+
)
|
|
850
|
+
console.print(f" • {slot_name}: {required_str}")
|
|
851
|
+
|
|
852
|
+
except ValidationError as e:
|
|
853
|
+
print_error(f"Validation error: {e}")
|
|
854
|
+
ctx.exit(1)
|
|
855
|
+
except Exception as e:
|
|
856
|
+
print_error(f"Failed to generate template: {e}")
|
|
857
|
+
ctx.exit(1)
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
@resources.command()
|
|
861
|
+
@click.argument("base_template_file", type=click.Path(exists=True, path_type=Path))
|
|
862
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
863
|
+
@click.option(
|
|
864
|
+
"--slot-variants",
|
|
865
|
+
help="JSON file with slot variant specs: {slot_name: [variant1, variant2]}",
|
|
866
|
+
type=click.Path(exists=True, path_type=Path),
|
|
867
|
+
)
|
|
868
|
+
@click.option(
|
|
869
|
+
"--name-pattern",
|
|
870
|
+
default="{base_name}_variant_{index}",
|
|
871
|
+
help="Pattern for variant names (default: {base_name}_variant_{index})",
|
|
872
|
+
)
|
|
873
|
+
@click.option(
|
|
874
|
+
"--max-variants",
|
|
875
|
+
type=int,
|
|
876
|
+
help="Maximum number of variants to generate",
|
|
877
|
+
)
|
|
878
|
+
@click.pass_context
|
|
879
|
+
def generate_template_variants(
|
|
880
|
+
ctx: click.Context,
|
|
881
|
+
base_template_file: Path,
|
|
882
|
+
output_file: Path,
|
|
883
|
+
slot_variants: Path | None,
|
|
884
|
+
name_pattern: str,
|
|
885
|
+
max_variants: int | None,
|
|
886
|
+
) -> None:
|
|
887
|
+
r"""Generate systematic variations of a base template.
|
|
888
|
+
|
|
889
|
+
Creates template variants by substituting slot configurations or
|
|
890
|
+
reordering slots while preserving the base structure.
|
|
891
|
+
|
|
892
|
+
Parameters
|
|
893
|
+
----------
|
|
894
|
+
ctx : click.Context
|
|
895
|
+
Click context object.
|
|
896
|
+
base_template_file : Path
|
|
897
|
+
Path to base template file (JSONL).
|
|
898
|
+
output_file : Path
|
|
899
|
+
Path to output variants file (JSONL).
|
|
900
|
+
slot_variants : Path | None
|
|
901
|
+
JSON file with slot variant specifications.
|
|
902
|
+
name_pattern : str
|
|
903
|
+
Pattern for variant names.
|
|
904
|
+
max_variants : int | None
|
|
905
|
+
Maximum number of variants to generate.
|
|
906
|
+
|
|
907
|
+
Examples
|
|
908
|
+
--------
|
|
909
|
+
# Generate variants with slot permutations
|
|
910
|
+
$ bead resources generate-template-variants base.jsonl variants.jsonl \\
|
|
911
|
+
--slot-variants slot_variants.json \\
|
|
912
|
+
--max-variants 10
|
|
913
|
+
|
|
914
|
+
Where slot_variants.json contains:
|
|
915
|
+
{
|
|
916
|
+
"subject": ["{subject}", "{object}"],
|
|
917
|
+
"object": ["{object}", "{subject}"]
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
This creates templates with swapped subject/object positions.
|
|
921
|
+
"""
|
|
922
|
+
try:
|
|
923
|
+
print_info(f"Loading base template from {base_template_file}")
|
|
924
|
+
|
|
925
|
+
# Load base template
|
|
926
|
+
with open(base_template_file, encoding="utf-8") as f:
|
|
927
|
+
first_line = f.readline().strip()
|
|
928
|
+
if not first_line:
|
|
929
|
+
print_error("Base template file is empty")
|
|
930
|
+
ctx.exit(1)
|
|
931
|
+
|
|
932
|
+
base_template_data = json.loads(first_line)
|
|
933
|
+
base_template = Template(**base_template_data)
|
|
934
|
+
|
|
935
|
+
variants: list[Template] = []
|
|
936
|
+
base_name = base_template.name
|
|
937
|
+
base_template_string = base_template.template_string
|
|
938
|
+
|
|
939
|
+
if slot_variants:
|
|
940
|
+
# Load slot variant specifications
|
|
941
|
+
print_info(f"Loading slot variants from {slot_variants}")
|
|
942
|
+
with open(slot_variants, encoding="utf-8") as f:
|
|
943
|
+
variant_spec = json.load(f)
|
|
944
|
+
|
|
945
|
+
# Generate all combinations of slot substitutions
|
|
946
|
+
slot_names = list(variant_spec.keys())
|
|
947
|
+
slot_options = [variant_spec[slot] for slot in slot_names]
|
|
948
|
+
|
|
949
|
+
# Generate all combinations
|
|
950
|
+
combinations = list(product(*slot_options))
|
|
951
|
+
|
|
952
|
+
# Limit to max_variants if specified
|
|
953
|
+
if max_variants and len(combinations) > max_variants:
|
|
954
|
+
print_info(
|
|
955
|
+
f"Limiting to {max_variants} variants "
|
|
956
|
+
f"(out of {len(combinations)} possible)"
|
|
957
|
+
)
|
|
958
|
+
combinations = combinations[:max_variants]
|
|
959
|
+
|
|
960
|
+
for idx, combo in enumerate(combinations):
|
|
961
|
+
# Create substitution map
|
|
962
|
+
substitution_map = dict(zip(slot_names, combo, strict=False))
|
|
963
|
+
|
|
964
|
+
# Apply substitutions to template_string
|
|
965
|
+
variant_template_string = base_template_string
|
|
966
|
+
for slot_name, replacement in substitution_map.items():
|
|
967
|
+
variant_template_string = variant_template_string.replace(
|
|
968
|
+
f"{{{slot_name}}}", replacement
|
|
969
|
+
)
|
|
970
|
+
|
|
971
|
+
# Skip if template_string didn't change (original)
|
|
972
|
+
if idx == 0 and variant_template_string == base_template_string:
|
|
973
|
+
continue
|
|
974
|
+
|
|
975
|
+
# Create variant template
|
|
976
|
+
variant_name = name_pattern.format(base_name=base_name, index=idx)
|
|
977
|
+
variant_data = base_template.model_dump()
|
|
978
|
+
variant_data["name"] = variant_name
|
|
979
|
+
variant_data["template_string"] = variant_template_string
|
|
980
|
+
variant_data["metadata"] = {
|
|
981
|
+
**variant_data.get("metadata", {}),
|
|
982
|
+
"variant_index": idx,
|
|
983
|
+
"base_template": base_name,
|
|
984
|
+
"substitutions": substitution_map,
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
variant = Template(**variant_data)
|
|
988
|
+
variants.append(variant)
|
|
989
|
+
|
|
990
|
+
print_success(f"Generated {len(variants)} slot-based template variants")
|
|
991
|
+
|
|
992
|
+
else:
|
|
993
|
+
# Generate simple metadata-only variants
|
|
994
|
+
print_info("No slot variants specified, generating metadata variants")
|
|
995
|
+
num_variants = max_variants or 3
|
|
996
|
+
|
|
997
|
+
for i in range(num_variants):
|
|
998
|
+
variant_name = name_pattern.format(base_name=base_name, index=i)
|
|
999
|
+
|
|
1000
|
+
variant_data = base_template.model_dump()
|
|
1001
|
+
variant_data["name"] = variant_name
|
|
1002
|
+
variant_data["metadata"] = {
|
|
1003
|
+
**variant_data.get("metadata", {}),
|
|
1004
|
+
"variant_index": i,
|
|
1005
|
+
"base_template": base_name,
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
variant = Template(**variant_data)
|
|
1009
|
+
variants.append(variant)
|
|
1010
|
+
|
|
1011
|
+
print_success(f"Generated {len(variants)} metadata-only template variants")
|
|
1012
|
+
|
|
1013
|
+
# Save variants
|
|
1014
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
1015
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
1016
|
+
for variant in variants:
|
|
1017
|
+
f.write(variant.model_dump_json() + "\n")
|
|
1018
|
+
|
|
1019
|
+
print_success(f"Saved variants to {output_file}")
|
|
1020
|
+
|
|
1021
|
+
except ValidationError as e:
|
|
1022
|
+
print_error(f"Validation error: {e}")
|
|
1023
|
+
ctx.exit(1)
|
|
1024
|
+
except Exception as e:
|
|
1025
|
+
print_error(f"Failed to generate template variants: {e}")
|
|
1026
|
+
ctx.exit(1)
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
# Register external resource loader commands
|
|
1030
|
+
resources.add_command(import_verbnet)
|
|
1031
|
+
resources.add_command(import_unimorph)
|
|
1032
|
+
resources.add_command(import_propbank)
|
|
1033
|
+
resources.add_command(import_framenet)
|
|
1034
|
+
|
|
1035
|
+
# Register constraint builder command
|
|
1036
|
+
resources.add_command(create_constraint)
|