bead 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bead/__init__.py +11 -0
- bead/__main__.py +11 -0
- bead/active_learning/__init__.py +15 -0
- bead/active_learning/config.py +231 -0
- bead/active_learning/loop.py +566 -0
- bead/active_learning/models/__init__.py +24 -0
- bead/active_learning/models/base.py +852 -0
- bead/active_learning/models/binary.py +910 -0
- bead/active_learning/models/categorical.py +943 -0
- bead/active_learning/models/cloze.py +862 -0
- bead/active_learning/models/forced_choice.py +956 -0
- bead/active_learning/models/free_text.py +773 -0
- bead/active_learning/models/lora.py +365 -0
- bead/active_learning/models/magnitude.py +835 -0
- bead/active_learning/models/multi_select.py +795 -0
- bead/active_learning/models/ordinal_scale.py +811 -0
- bead/active_learning/models/peft_adapter.py +155 -0
- bead/active_learning/models/random_effects.py +639 -0
- bead/active_learning/selection.py +354 -0
- bead/active_learning/strategies.py +391 -0
- bead/active_learning/trainers/__init__.py +26 -0
- bead/active_learning/trainers/base.py +210 -0
- bead/active_learning/trainers/data_collator.py +172 -0
- bead/active_learning/trainers/dataset_utils.py +261 -0
- bead/active_learning/trainers/huggingface.py +304 -0
- bead/active_learning/trainers/lightning.py +324 -0
- bead/active_learning/trainers/metrics.py +424 -0
- bead/active_learning/trainers/mixed_effects.py +551 -0
- bead/active_learning/trainers/model_wrapper.py +509 -0
- bead/active_learning/trainers/registry.py +104 -0
- bead/adapters/__init__.py +11 -0
- bead/adapters/huggingface.py +61 -0
- bead/behavioral/__init__.py +116 -0
- bead/behavioral/analytics.py +646 -0
- bead/behavioral/extraction.py +343 -0
- bead/behavioral/merging.py +343 -0
- bead/cli/__init__.py +11 -0
- bead/cli/active_learning.py +513 -0
- bead/cli/active_learning_commands.py +779 -0
- bead/cli/completion.py +359 -0
- bead/cli/config.py +624 -0
- bead/cli/constraint_builders.py +286 -0
- bead/cli/deployment.py +859 -0
- bead/cli/deployment_trials.py +493 -0
- bead/cli/deployment_ui.py +332 -0
- bead/cli/display.py +378 -0
- bead/cli/items.py +960 -0
- bead/cli/items_factories.py +776 -0
- bead/cli/list_constraints.py +714 -0
- bead/cli/lists.py +490 -0
- bead/cli/main.py +430 -0
- bead/cli/models.py +877 -0
- bead/cli/resource_loaders.py +621 -0
- bead/cli/resources.py +1036 -0
- bead/cli/shell.py +356 -0
- bead/cli/simulate.py +840 -0
- bead/cli/templates.py +1158 -0
- bead/cli/training.py +1080 -0
- bead/cli/utils.py +614 -0
- bead/cli/workflow.py +1273 -0
- bead/config/__init__.py +68 -0
- bead/config/active_learning.py +1009 -0
- bead/config/config.py +192 -0
- bead/config/defaults.py +118 -0
- bead/config/deployment.py +217 -0
- bead/config/env.py +147 -0
- bead/config/item.py +45 -0
- bead/config/list.py +193 -0
- bead/config/loader.py +149 -0
- bead/config/logging.py +42 -0
- bead/config/model.py +49 -0
- bead/config/paths.py +46 -0
- bead/config/profiles.py +320 -0
- bead/config/resources.py +47 -0
- bead/config/serialization.py +210 -0
- bead/config/simulation.py +206 -0
- bead/config/template.py +238 -0
- bead/config/validation.py +267 -0
- bead/data/__init__.py +65 -0
- bead/data/base.py +87 -0
- bead/data/identifiers.py +97 -0
- bead/data/language_codes.py +61 -0
- bead/data/metadata.py +270 -0
- bead/data/range.py +123 -0
- bead/data/repository.py +358 -0
- bead/data/serialization.py +249 -0
- bead/data/timestamps.py +89 -0
- bead/data/validation.py +349 -0
- bead/data_collection/__init__.py +11 -0
- bead/data_collection/jatos.py +223 -0
- bead/data_collection/merger.py +154 -0
- bead/data_collection/prolific.py +198 -0
- bead/deployment/__init__.py +5 -0
- bead/deployment/distribution.py +402 -0
- bead/deployment/jatos/__init__.py +1 -0
- bead/deployment/jatos/api.py +200 -0
- bead/deployment/jatos/exporter.py +210 -0
- bead/deployment/jspsych/__init__.py +9 -0
- bead/deployment/jspsych/biome.json +44 -0
- bead/deployment/jspsych/config.py +411 -0
- bead/deployment/jspsych/generator.py +598 -0
- bead/deployment/jspsych/package.json +51 -0
- bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
- bead/deployment/jspsych/randomizer.py +299 -0
- bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
- bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
- bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
- bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
- bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
- bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
- bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
- bead/deployment/jspsych/src/plugins/rating.ts +248 -0
- bead/deployment/jspsych/src/slopit/index.ts +9 -0
- bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
- bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
- bead/deployment/jspsych/templates/experiment.css +1 -0
- bead/deployment/jspsych/templates/experiment.js.template +289 -0
- bead/deployment/jspsych/templates/index.html +51 -0
- bead/deployment/jspsych/templates/randomizer.js +241 -0
- bead/deployment/jspsych/templates/randomizer.js.template +313 -0
- bead/deployment/jspsych/trials.py +723 -0
- bead/deployment/jspsych/tsconfig.json +23 -0
- bead/deployment/jspsych/tsup.config.ts +30 -0
- bead/deployment/jspsych/ui/__init__.py +1 -0
- bead/deployment/jspsych/ui/components.py +383 -0
- bead/deployment/jspsych/ui/styles.py +411 -0
- bead/dsl/__init__.py +80 -0
- bead/dsl/ast.py +168 -0
- bead/dsl/context.py +178 -0
- bead/dsl/errors.py +71 -0
- bead/dsl/evaluator.py +570 -0
- bead/dsl/grammar.lark +81 -0
- bead/dsl/parser.py +231 -0
- bead/dsl/stdlib.py +929 -0
- bead/evaluation/__init__.py +13 -0
- bead/evaluation/convergence.py +485 -0
- bead/evaluation/interannotator.py +398 -0
- bead/items/__init__.py +40 -0
- bead/items/adapters/__init__.py +70 -0
- bead/items/adapters/anthropic.py +224 -0
- bead/items/adapters/api_utils.py +167 -0
- bead/items/adapters/base.py +216 -0
- bead/items/adapters/google.py +259 -0
- bead/items/adapters/huggingface.py +1074 -0
- bead/items/adapters/openai.py +323 -0
- bead/items/adapters/registry.py +202 -0
- bead/items/adapters/sentence_transformers.py +224 -0
- bead/items/adapters/togetherai.py +309 -0
- bead/items/binary.py +515 -0
- bead/items/cache.py +558 -0
- bead/items/categorical.py +593 -0
- bead/items/cloze.py +757 -0
- bead/items/constructor.py +784 -0
- bead/items/forced_choice.py +413 -0
- bead/items/free_text.py +681 -0
- bead/items/generation.py +432 -0
- bead/items/item.py +396 -0
- bead/items/item_template.py +787 -0
- bead/items/magnitude.py +573 -0
- bead/items/multi_select.py +621 -0
- bead/items/ordinal_scale.py +569 -0
- bead/items/scoring.py +448 -0
- bead/items/validation.py +723 -0
- bead/lists/__init__.py +30 -0
- bead/lists/balancer.py +263 -0
- bead/lists/constraints.py +1067 -0
- bead/lists/experiment_list.py +286 -0
- bead/lists/list_collection.py +378 -0
- bead/lists/partitioner.py +1141 -0
- bead/lists/stratification.py +254 -0
- bead/participants/__init__.py +73 -0
- bead/participants/collection.py +699 -0
- bead/participants/merging.py +312 -0
- bead/participants/metadata_spec.py +491 -0
- bead/participants/models.py +276 -0
- bead/resources/__init__.py +29 -0
- bead/resources/adapters/__init__.py +19 -0
- bead/resources/adapters/base.py +104 -0
- bead/resources/adapters/cache.py +128 -0
- bead/resources/adapters/glazing.py +508 -0
- bead/resources/adapters/registry.py +117 -0
- bead/resources/adapters/unimorph.py +796 -0
- bead/resources/classification.py +856 -0
- bead/resources/constraint_builders.py +329 -0
- bead/resources/constraints.py +165 -0
- bead/resources/lexical_item.py +223 -0
- bead/resources/lexicon.py +744 -0
- bead/resources/loaders.py +209 -0
- bead/resources/template.py +441 -0
- bead/resources/template_collection.py +707 -0
- bead/resources/template_generation.py +349 -0
- bead/simulation/__init__.py +29 -0
- bead/simulation/annotators/__init__.py +15 -0
- bead/simulation/annotators/base.py +175 -0
- bead/simulation/annotators/distance_based.py +135 -0
- bead/simulation/annotators/lm_based.py +114 -0
- bead/simulation/annotators/oracle.py +182 -0
- bead/simulation/annotators/random.py +181 -0
- bead/simulation/dsl_extension/__init__.py +3 -0
- bead/simulation/noise_models/__init__.py +13 -0
- bead/simulation/noise_models/base.py +42 -0
- bead/simulation/noise_models/random_noise.py +82 -0
- bead/simulation/noise_models/systematic.py +132 -0
- bead/simulation/noise_models/temperature.py +86 -0
- bead/simulation/runner.py +144 -0
- bead/simulation/strategies/__init__.py +23 -0
- bead/simulation/strategies/base.py +123 -0
- bead/simulation/strategies/binary.py +103 -0
- bead/simulation/strategies/categorical.py +123 -0
- bead/simulation/strategies/cloze.py +224 -0
- bead/simulation/strategies/forced_choice.py +127 -0
- bead/simulation/strategies/free_text.py +105 -0
- bead/simulation/strategies/magnitude.py +116 -0
- bead/simulation/strategies/multi_select.py +129 -0
- bead/simulation/strategies/ordinal_scale.py +131 -0
- bead/templates/__init__.py +27 -0
- bead/templates/adapters/__init__.py +17 -0
- bead/templates/adapters/base.py +128 -0
- bead/templates/adapters/cache.py +178 -0
- bead/templates/adapters/huggingface.py +312 -0
- bead/templates/combinatorics.py +103 -0
- bead/templates/filler.py +605 -0
- bead/templates/renderers.py +177 -0
- bead/templates/resolver.py +178 -0
- bead/templates/strategies.py +1806 -0
- bead/templates/streaming.py +195 -0
- bead-0.1.0.dist-info/METADATA +212 -0
- bead-0.1.0.dist-info/RECORD +231 -0
- bead-0.1.0.dist-info/WHEEL +4 -0
- bead-0.1.0.dist-info/entry_points.txt +2 -0
- bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/templates.py
ADDED
|
@@ -0,0 +1,1158 @@
|
|
|
1
|
+
"""Template filling commands for bead CLI.
|
|
2
|
+
|
|
3
|
+
This module provides commands for filling templates with lexical items
|
|
4
|
+
(Stage 2 of the bead pipeline).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import csv as csv_module
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import click
|
|
14
|
+
from pydantic import ValidationError
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn
|
|
17
|
+
from rich.table import Table
|
|
18
|
+
|
|
19
|
+
from bead.cli.utils import print_error, print_info, print_success
|
|
20
|
+
from bead.data.base import JsonValue
|
|
21
|
+
from bead.dsl.evaluator import DSLEvaluator
|
|
22
|
+
from bead.dsl.parser import parse
|
|
23
|
+
from bead.resources.constraints import Constraint
|
|
24
|
+
from bead.resources.lexicon import Lexicon
|
|
25
|
+
from bead.resources.template_collection import TemplateCollection
|
|
26
|
+
from bead.templates.combinatorics import count_combinations
|
|
27
|
+
from bead.templates.filler import FilledTemplate
|
|
28
|
+
from bead.templates.strategies import (
|
|
29
|
+
ExhaustiveStrategy,
|
|
30
|
+
RandomStrategy,
|
|
31
|
+
StrategyFiller,
|
|
32
|
+
StratifiedStrategy,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
console = Console()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@click.group()
|
|
39
|
+
def templates() -> None:
|
|
40
|
+
r"""Template filling commands (Stage 2).
|
|
41
|
+
|
|
42
|
+
Commands for filling templates with lexical items using various strategies.
|
|
43
|
+
|
|
44
|
+
\b
|
|
45
|
+
Examples:
|
|
46
|
+
$ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
|
|
47
|
+
--strategy exhaustive
|
|
48
|
+
$ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
|
|
49
|
+
--strategy random --max-combinations 100
|
|
50
|
+
$ bead templates list-filled filled.jsonl
|
|
51
|
+
$ bead templates validate-filled filled.jsonl
|
|
52
|
+
$ bead templates show-stats filled.jsonl
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@click.command()
|
|
57
|
+
@click.argument("template_file", type=click.Path(exists=True, path_type=Path))
|
|
58
|
+
@click.argument(
|
|
59
|
+
"lexicon_files",
|
|
60
|
+
nargs=-1,
|
|
61
|
+
type=click.Path(exists=True, path_type=Path),
|
|
62
|
+
required=True,
|
|
63
|
+
)
|
|
64
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
65
|
+
@click.option(
|
|
66
|
+
"--strategy",
|
|
67
|
+
type=click.Choice(["exhaustive", "random", "stratified"]),
|
|
68
|
+
default="exhaustive",
|
|
69
|
+
help="Filling strategy to use",
|
|
70
|
+
)
|
|
71
|
+
@click.option(
|
|
72
|
+
"--max-combinations",
|
|
73
|
+
type=int,
|
|
74
|
+
help="Maximum combinations for random/stratified strategies",
|
|
75
|
+
)
|
|
76
|
+
@click.option(
|
|
77
|
+
"--random-seed",
|
|
78
|
+
type=int,
|
|
79
|
+
help="Random seed for reproducibility",
|
|
80
|
+
)
|
|
81
|
+
@click.option(
|
|
82
|
+
"--grouping-property",
|
|
83
|
+
help="Property for stratified strategy (e.g., 'pos', 'features.tense')",
|
|
84
|
+
)
|
|
85
|
+
@click.option(
|
|
86
|
+
"--language-code",
|
|
87
|
+
help="ISO 639 language code to filter items",
|
|
88
|
+
)
|
|
89
|
+
@click.option(
|
|
90
|
+
"--constraints",
|
|
91
|
+
type=click.Path(exists=True, path_type=Path),
|
|
92
|
+
help="Path to constraints file (JSONL) to apply during filling",
|
|
93
|
+
)
|
|
94
|
+
@click.pass_context
|
|
95
|
+
def fill(
|
|
96
|
+
ctx: click.Context,
|
|
97
|
+
template_file: Path,
|
|
98
|
+
lexicon_files: tuple[Path, ...],
|
|
99
|
+
output_file: Path,
|
|
100
|
+
strategy: str,
|
|
101
|
+
max_combinations: int | None,
|
|
102
|
+
random_seed: int | None,
|
|
103
|
+
grouping_property: str | None,
|
|
104
|
+
language_code: str | None,
|
|
105
|
+
constraints: Path | None,
|
|
106
|
+
) -> None:
|
|
107
|
+
r"""Fill templates with lexical items.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
ctx : click.Context
|
|
112
|
+
Click context object.
|
|
113
|
+
template_file : Path
|
|
114
|
+
Path to template file.
|
|
115
|
+
lexicon_files : tuple[Path, ...]
|
|
116
|
+
Paths to one or more lexicon files to merge.
|
|
117
|
+
output_file : Path
|
|
118
|
+
Path to output filled templates file.
|
|
119
|
+
strategy : str
|
|
120
|
+
Filling strategy name.
|
|
121
|
+
max_combinations : int | None
|
|
122
|
+
Maximum number of combinations.
|
|
123
|
+
random_seed : int | None
|
|
124
|
+
Random seed for reproducibility.
|
|
125
|
+
grouping_property : str | None
|
|
126
|
+
Property for stratified sampling.
|
|
127
|
+
language_code : str | None
|
|
128
|
+
ISO 639 language code filter.
|
|
129
|
+
constraints : Path | None
|
|
130
|
+
Path to constraints file (JSONL) to apply.
|
|
131
|
+
|
|
132
|
+
Examples
|
|
133
|
+
--------
|
|
134
|
+
# Exhaustive filling with single lexicon
|
|
135
|
+
$ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
|
|
136
|
+
--strategy exhaustive
|
|
137
|
+
|
|
138
|
+
# Multiple lexicons
|
|
139
|
+
$ bead templates fill tpl.jsonl nouns.jsonl verbs.jsonl filled.jsonl \\
|
|
140
|
+
--strategy exhaustive
|
|
141
|
+
|
|
142
|
+
# Random sampling
|
|
143
|
+
$ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
|
|
144
|
+
--strategy random --max-combinations 100 --random-seed 42
|
|
145
|
+
|
|
146
|
+
# Stratified sampling
|
|
147
|
+
$ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
|
|
148
|
+
--strategy stratified --max-combinations 100 --grouping-property pos
|
|
149
|
+
|
|
150
|
+
# With constraints
|
|
151
|
+
$ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
|
|
152
|
+
--strategy exhaustive --constraints constraints.jsonl
|
|
153
|
+
"""
|
|
154
|
+
try:
|
|
155
|
+
# Validate strategy-specific options
|
|
156
|
+
if strategy in ("random", "stratified") and max_combinations is None:
|
|
157
|
+
print_error(f"--max-combinations required for {strategy} strategy")
|
|
158
|
+
ctx.exit(1)
|
|
159
|
+
|
|
160
|
+
if strategy == "stratified" and grouping_property is None:
|
|
161
|
+
print_error("--grouping-property required for stratified strategy")
|
|
162
|
+
ctx.exit(1)
|
|
163
|
+
|
|
164
|
+
# Load and merge lexicons
|
|
165
|
+
if not lexicon_files:
|
|
166
|
+
print_error("At least one lexicon file is required")
|
|
167
|
+
ctx.exit(1)
|
|
168
|
+
|
|
169
|
+
print_info(f"Loading {len(lexicon_files)} lexicon(s)")
|
|
170
|
+
merged_lexicon = Lexicon(name="merged", items={})
|
|
171
|
+
|
|
172
|
+
for lex_file in lexicon_files:
|
|
173
|
+
lex = Lexicon.from_jsonl(str(lex_file), lex_file.stem)
|
|
174
|
+
print_info(f" Loaded {len(lex)} items from {lex_file.name}")
|
|
175
|
+
# Merge items
|
|
176
|
+
merged_lexicon.items.update(lex.items)
|
|
177
|
+
|
|
178
|
+
print_info(f"Total merged lexicon: {len(merged_lexicon)} items")
|
|
179
|
+
lexicon = merged_lexicon
|
|
180
|
+
|
|
181
|
+
# Load templates
|
|
182
|
+
print_info(f"Loading templates from {template_file}")
|
|
183
|
+
template_collection = TemplateCollection.from_jsonl(
|
|
184
|
+
str(template_file), "templates"
|
|
185
|
+
)
|
|
186
|
+
print_info(f"Loaded {len(template_collection)} templates")
|
|
187
|
+
|
|
188
|
+
# Load and apply constraints if provided
|
|
189
|
+
if constraints:
|
|
190
|
+
print_info(f"Loading constraints from {constraints}")
|
|
191
|
+
loaded_constraints: list[Constraint] = []
|
|
192
|
+
|
|
193
|
+
with open(constraints, encoding="utf-8") as f:
|
|
194
|
+
for line_num, line in enumerate(f, start=1):
|
|
195
|
+
line = line.strip()
|
|
196
|
+
if not line:
|
|
197
|
+
continue
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
constraint_data = json.loads(line)
|
|
201
|
+
constraint = Constraint(**constraint_data)
|
|
202
|
+
loaded_constraints.append(constraint)
|
|
203
|
+
except json.JSONDecodeError as e:
|
|
204
|
+
print_error(f"Invalid JSON on line {line_num}: {e}")
|
|
205
|
+
ctx.exit(1)
|
|
206
|
+
except ValidationError as e:
|
|
207
|
+
print_error(f"Invalid constraint on line {line_num}: {e}")
|
|
208
|
+
ctx.exit(1)
|
|
209
|
+
|
|
210
|
+
print_info(f"Loaded {len(loaded_constraints)} constraints")
|
|
211
|
+
|
|
212
|
+
# Apply constraints to all templates
|
|
213
|
+
for template in template_collection:
|
|
214
|
+
template.constraints.extend(loaded_constraints)
|
|
215
|
+
|
|
216
|
+
print_info(f"Applied constraints to {len(template_collection)} templates")
|
|
217
|
+
|
|
218
|
+
# Create strategy
|
|
219
|
+
filling_strategy: ExhaustiveStrategy | RandomStrategy | StratifiedStrategy
|
|
220
|
+
if strategy == "exhaustive":
|
|
221
|
+
filling_strategy = ExhaustiveStrategy()
|
|
222
|
+
elif strategy == "random":
|
|
223
|
+
assert max_combinations is not None
|
|
224
|
+
filling_strategy = RandomStrategy(
|
|
225
|
+
n_samples=max_combinations,
|
|
226
|
+
seed=random_seed,
|
|
227
|
+
)
|
|
228
|
+
elif strategy == "stratified":
|
|
229
|
+
assert max_combinations is not None
|
|
230
|
+
assert grouping_property is not None
|
|
231
|
+
filling_strategy = StratifiedStrategy(
|
|
232
|
+
n_samples=max_combinations,
|
|
233
|
+
grouping_property=grouping_property,
|
|
234
|
+
seed=random_seed,
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
print_error(f"Unknown strategy: {strategy}")
|
|
238
|
+
ctx.exit(1)
|
|
239
|
+
|
|
240
|
+
# Create filler
|
|
241
|
+
filler = StrategyFiller(lexicon=lexicon, strategy=filling_strategy)
|
|
242
|
+
|
|
243
|
+
# Fill templates with progress
|
|
244
|
+
all_filled: list[FilledTemplate] = []
|
|
245
|
+
|
|
246
|
+
with Progress(
|
|
247
|
+
SpinnerColumn(),
|
|
248
|
+
TextColumn("[progress.description]{task.description}"),
|
|
249
|
+
console=console,
|
|
250
|
+
) as progress:
|
|
251
|
+
task = progress.add_task(
|
|
252
|
+
f"Filling {len(template_collection)} templates...",
|
|
253
|
+
total=len(template_collection),
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
for template in template_collection:
|
|
257
|
+
try:
|
|
258
|
+
filled_templates = filler.fill(template, language_code)
|
|
259
|
+
all_filled.extend(filled_templates)
|
|
260
|
+
progress.advance(task)
|
|
261
|
+
except ValueError as e:
|
|
262
|
+
print_error(f"Failed to fill template '{template.name}': {e}")
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
# Save filled templates
|
|
266
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
267
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
268
|
+
for filled in all_filled:
|
|
269
|
+
f.write(filled.model_dump_json() + "\n")
|
|
270
|
+
|
|
271
|
+
print_success(
|
|
272
|
+
f"Created {len(all_filled)} filled templates from "
|
|
273
|
+
f"{len(template_collection)} templates: {output_file}"
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
except ValidationError as e:
|
|
277
|
+
print_error(f"Validation error: {e}")
|
|
278
|
+
ctx.exit(1)
|
|
279
|
+
except Exception as e:
|
|
280
|
+
print_error(f"Failed to fill templates: {e}")
|
|
281
|
+
ctx.exit(1)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
@click.command()
|
|
285
|
+
@click.option(
|
|
286
|
+
"--directory",
|
|
287
|
+
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
|
288
|
+
default=Path.cwd(),
|
|
289
|
+
help="Directory to search for filled template files",
|
|
290
|
+
)
|
|
291
|
+
@click.option(
|
|
292
|
+
"--pattern",
|
|
293
|
+
default="*.jsonl",
|
|
294
|
+
help="File pattern to match (default: *.jsonl)",
|
|
295
|
+
)
|
|
296
|
+
@click.option(
|
|
297
|
+
"--filter",
|
|
298
|
+
"filter_expr",
|
|
299
|
+
help="DSL expression to filter (e.g., 'slot_fillers.noun.lemma == \"cat\"')",
|
|
300
|
+
)
|
|
301
|
+
@click.pass_context
|
|
302
|
+
def list_filled(
|
|
303
|
+
ctx: click.Context,
|
|
304
|
+
directory: Path,
|
|
305
|
+
pattern: str,
|
|
306
|
+
filter_expr: str | None,
|
|
307
|
+
) -> None:
|
|
308
|
+
"""List filled template files in a directory.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
ctx : click.Context
|
|
313
|
+
Click context object.
|
|
314
|
+
directory : Path
|
|
315
|
+
Directory to search.
|
|
316
|
+
pattern : str
|
|
317
|
+
File pattern to match.
|
|
318
|
+
filter_expr : str | None
|
|
319
|
+
DSL expression to filter filled templates.
|
|
320
|
+
|
|
321
|
+
Examples
|
|
322
|
+
--------
|
|
323
|
+
$ bead templates list-filled
|
|
324
|
+
$ bead templates list-filled --directory filled_templates/
|
|
325
|
+
$ bead templates list-filled --pattern "filled_*.jsonl"
|
|
326
|
+
$ bead templates list-filled --filter "slot_fillers.noun.lemma == 'cat'"
|
|
327
|
+
$ bead templates list-filled --filter "len(slot_fillers) > 2"
|
|
328
|
+
"""
|
|
329
|
+
try:
|
|
330
|
+
files = list(directory.glob(pattern))
|
|
331
|
+
|
|
332
|
+
if not files:
|
|
333
|
+
print_info(f"No files found in {directory} matching {pattern}")
|
|
334
|
+
return
|
|
335
|
+
|
|
336
|
+
# Parse filter expression if provided
|
|
337
|
+
filter_ast = None
|
|
338
|
+
evaluator = None
|
|
339
|
+
if filter_expr:
|
|
340
|
+
try:
|
|
341
|
+
filter_ast = parse(filter_expr)
|
|
342
|
+
evaluator = DSLEvaluator()
|
|
343
|
+
print_info(f"Filtering with expression: {filter_expr}")
|
|
344
|
+
except Exception as e:
|
|
345
|
+
print_error(f"Invalid filter expression: {e}")
|
|
346
|
+
ctx.exit(1)
|
|
347
|
+
|
|
348
|
+
table = Table(title=f"Filled Templates in {directory}")
|
|
349
|
+
table.add_column("File", style="cyan")
|
|
350
|
+
table.add_column("Count", justify="right", style="yellow")
|
|
351
|
+
table.add_column("Filtered", justify="right", style="magenta")
|
|
352
|
+
table.add_column("Strategy", style="green")
|
|
353
|
+
table.add_column("Sample", style="white")
|
|
354
|
+
|
|
355
|
+
for file_path in sorted(files):
|
|
356
|
+
try:
|
|
357
|
+
# Count filled templates and get metadata
|
|
358
|
+
with open(file_path, encoding="utf-8") as f:
|
|
359
|
+
lines = [line.strip() for line in f if line.strip()]
|
|
360
|
+
|
|
361
|
+
if not lines:
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
# Apply filter if provided
|
|
365
|
+
filtered_count = 0
|
|
366
|
+
if filter_ast and evaluator:
|
|
367
|
+
for line in lines:
|
|
368
|
+
try:
|
|
369
|
+
filled_data = json.loads(line)
|
|
370
|
+
filled_template = FilledTemplate(**filled_data)
|
|
371
|
+
# Create evaluation context
|
|
372
|
+
context = {"self": filled_template}
|
|
373
|
+
# Evaluate filter
|
|
374
|
+
if evaluator.evaluate(filter_ast, context):
|
|
375
|
+
filtered_count += 1
|
|
376
|
+
except Exception:
|
|
377
|
+
continue
|
|
378
|
+
else:
|
|
379
|
+
filtered_count = len(lines)
|
|
380
|
+
|
|
381
|
+
if filtered_count == 0:
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
# Parse first filled template for metadata
|
|
385
|
+
first_data = json.loads(lines[0])
|
|
386
|
+
strategy_name = first_data.get("strategy_name", "N/A")
|
|
387
|
+
rendered = first_data.get("rendered_text", "N/A")
|
|
388
|
+
|
|
389
|
+
# Truncate long rendered text
|
|
390
|
+
if len(rendered) > 40:
|
|
391
|
+
rendered = rendered[:37] + "..."
|
|
392
|
+
|
|
393
|
+
table.add_row(
|
|
394
|
+
str(file_path.name),
|
|
395
|
+
str(len(lines)),
|
|
396
|
+
str(filtered_count) if filter_expr else "N/A",
|
|
397
|
+
strategy_name,
|
|
398
|
+
rendered,
|
|
399
|
+
)
|
|
400
|
+
except Exception:
|
|
401
|
+
# Skip files that can't be parsed
|
|
402
|
+
continue
|
|
403
|
+
|
|
404
|
+
console.print(table)
|
|
405
|
+
|
|
406
|
+
except Exception as e:
|
|
407
|
+
print_error(f"Failed to list filled templates: {e}")
|
|
408
|
+
ctx.exit(1)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
@click.command()
|
|
412
|
+
@click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
|
|
413
|
+
@click.pass_context
|
|
414
|
+
def validate_filled(ctx: click.Context, filled_file: Path) -> None:
|
|
415
|
+
"""Validate a filled templates file.
|
|
416
|
+
|
|
417
|
+
Checks that all filled templates are properly formatted.
|
|
418
|
+
|
|
419
|
+
Parameters
|
|
420
|
+
----------
|
|
421
|
+
ctx : click.Context
|
|
422
|
+
Click context object.
|
|
423
|
+
filled_file : Path
|
|
424
|
+
Path to filled templates file.
|
|
425
|
+
|
|
426
|
+
Examples
|
|
427
|
+
--------
|
|
428
|
+
$ bead templates validate-filled filled.jsonl
|
|
429
|
+
"""
|
|
430
|
+
try:
|
|
431
|
+
print_info(f"Validating filled templates: {filled_file}")
|
|
432
|
+
|
|
433
|
+
count = 0
|
|
434
|
+
errors: list[str] = []
|
|
435
|
+
|
|
436
|
+
with open(filled_file, encoding="utf-8") as f:
|
|
437
|
+
for line_num, line in enumerate(f, start=1):
|
|
438
|
+
line = line.strip()
|
|
439
|
+
if not line:
|
|
440
|
+
continue
|
|
441
|
+
|
|
442
|
+
try:
|
|
443
|
+
filled_data = json.loads(line)
|
|
444
|
+
FilledTemplate(**filled_data)
|
|
445
|
+
count += 1
|
|
446
|
+
except json.JSONDecodeError as e:
|
|
447
|
+
errors.append(f"Line {line_num}: Invalid JSON - {e}")
|
|
448
|
+
except ValidationError as e:
|
|
449
|
+
errors.append(f"Line {line_num}: Validation error - {e}")
|
|
450
|
+
|
|
451
|
+
if errors:
|
|
452
|
+
print_error(f"Validation failed with {len(errors)} errors:")
|
|
453
|
+
for error in errors[:10]:
|
|
454
|
+
console.print(f" [red]✗[/red] {error}")
|
|
455
|
+
if len(errors) > 10:
|
|
456
|
+
console.print(f" ... and {len(errors) - 10} more errors")
|
|
457
|
+
ctx.exit(1)
|
|
458
|
+
else:
|
|
459
|
+
print_success(f"Filled templates file is valid: {count} filled templates")
|
|
460
|
+
|
|
461
|
+
except Exception as e:
|
|
462
|
+
print_error(f"Failed to validate filled templates: {e}")
|
|
463
|
+
ctx.exit(1)
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
@click.command()
|
|
467
|
+
@click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
|
|
468
|
+
@click.pass_context
|
|
469
|
+
def show_stats(ctx: click.Context, filled_file: Path) -> None:
|
|
470
|
+
"""Show statistics about filled templates.
|
|
471
|
+
|
|
472
|
+
Parameters
|
|
473
|
+
----------
|
|
474
|
+
ctx : click.Context
|
|
475
|
+
Click context object.
|
|
476
|
+
filled_file : Path
|
|
477
|
+
Path to filled templates file.
|
|
478
|
+
|
|
479
|
+
Examples
|
|
480
|
+
--------
|
|
481
|
+
$ bead templates show-stats filled.jsonl
|
|
482
|
+
"""
|
|
483
|
+
try:
|
|
484
|
+
print_info(f"Analyzing filled templates: {filled_file}")
|
|
485
|
+
|
|
486
|
+
# Collect statistics
|
|
487
|
+
total_count = 0
|
|
488
|
+
templates_seen: set[str] = set()
|
|
489
|
+
strategies_used: dict[str, int] = {}
|
|
490
|
+
text_lengths: list[int] = []
|
|
491
|
+
|
|
492
|
+
with open(filled_file, encoding="utf-8") as f:
|
|
493
|
+
for line in f:
|
|
494
|
+
line = line.strip()
|
|
495
|
+
if not line:
|
|
496
|
+
continue
|
|
497
|
+
|
|
498
|
+
try:
|
|
499
|
+
filled_data = json.loads(line)
|
|
500
|
+
filled = FilledTemplate(**filled_data)
|
|
501
|
+
|
|
502
|
+
total_count += 1
|
|
503
|
+
templates_seen.add(filled.template_name)
|
|
504
|
+
strategies_used[filled.strategy_name] = (
|
|
505
|
+
strategies_used.get(filled.strategy_name, 0) + 1
|
|
506
|
+
)
|
|
507
|
+
text_lengths.append(len(filled.rendered_text))
|
|
508
|
+
|
|
509
|
+
except Exception:
|
|
510
|
+
continue
|
|
511
|
+
|
|
512
|
+
if total_count == 0:
|
|
513
|
+
print_error("No valid filled templates found")
|
|
514
|
+
ctx.exit(1)
|
|
515
|
+
|
|
516
|
+
# Calculate statistics
|
|
517
|
+
avg_length = sum(text_lengths) / len(text_lengths) if text_lengths else 0
|
|
518
|
+
min_length = min(text_lengths) if text_lengths else 0
|
|
519
|
+
max_length = max(text_lengths) if text_lengths else 0
|
|
520
|
+
|
|
521
|
+
# Display statistics table
|
|
522
|
+
table = Table(title="Filled Template Statistics")
|
|
523
|
+
table.add_column("Metric", style="cyan")
|
|
524
|
+
table.add_column("Value", style="green", justify="right")
|
|
525
|
+
|
|
526
|
+
table.add_row("Total Filled Templates", str(total_count))
|
|
527
|
+
table.add_row("Unique Template Names", str(len(templates_seen)))
|
|
528
|
+
table.add_row("", "") # Separator
|
|
529
|
+
|
|
530
|
+
for strategy, count in sorted(strategies_used.items()):
|
|
531
|
+
table.add_row(f"Strategy: {strategy}", str(count))
|
|
532
|
+
|
|
533
|
+
table.add_row("", "") # Separator
|
|
534
|
+
table.add_row("Avg Text Length", f"{avg_length:.1f}")
|
|
535
|
+
table.add_row("Min Text Length", str(min_length))
|
|
536
|
+
table.add_row("Max Text Length", str(max_length))
|
|
537
|
+
|
|
538
|
+
console.print(table)
|
|
539
|
+
|
|
540
|
+
# Show sample templates
|
|
541
|
+
if templates_seen:
|
|
542
|
+
console.print("\n[cyan]Sample Template Names:[/cyan]")
|
|
543
|
+
for name in sorted(templates_seen)[:5]:
|
|
544
|
+
console.print(f" • {name}")
|
|
545
|
+
if len(templates_seen) > 5:
|
|
546
|
+
console.print(f" ... and {len(templates_seen) - 5} more")
|
|
547
|
+
|
|
548
|
+
except Exception as e:
|
|
549
|
+
print_error(f"Failed to show statistics: {e}")
|
|
550
|
+
ctx.exit(1)
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
@click.command()
|
|
554
|
+
@click.argument("template_file", type=click.Path(exists=True, path_type=Path))
|
|
555
|
+
@click.argument(
|
|
556
|
+
"lexicon_files",
|
|
557
|
+
nargs=-1,
|
|
558
|
+
type=click.Path(exists=True, path_type=Path),
|
|
559
|
+
required=True,
|
|
560
|
+
)
|
|
561
|
+
@click.option(
|
|
562
|
+
"--language-code",
|
|
563
|
+
help="ISO 639 language code to filter items",
|
|
564
|
+
)
|
|
565
|
+
@click.pass_context
|
|
566
|
+
def estimate(
|
|
567
|
+
ctx: click.Context,
|
|
568
|
+
template_file: Path,
|
|
569
|
+
lexicon_files: tuple[Path, ...],
|
|
570
|
+
language_code: str | None,
|
|
571
|
+
) -> None:
|
|
572
|
+
r"""Estimate total combinations for exhaustive filling.
|
|
573
|
+
|
|
574
|
+
Calculates the total number of combinations that would be generated
|
|
575
|
+
by exhaustive template filling without actually generating them.
|
|
576
|
+
|
|
577
|
+
Parameters
|
|
578
|
+
----------
|
|
579
|
+
ctx : click.Context
|
|
580
|
+
Click context object.
|
|
581
|
+
template_file : Path
|
|
582
|
+
Path to template file.
|
|
583
|
+
lexicon_files : tuple[Path, ...]
|
|
584
|
+
Paths to one or more lexicon files to merge.
|
|
585
|
+
language_code : str | None
|
|
586
|
+
ISO 639 language code filter.
|
|
587
|
+
|
|
588
|
+
Examples
|
|
589
|
+
--------
|
|
590
|
+
# Estimate combinations with single lexicon
|
|
591
|
+
$ bead templates estimate template.jsonl lexicon.jsonl
|
|
592
|
+
|
|
593
|
+
# With multiple lexicons
|
|
594
|
+
$ bead templates estimate template.jsonl nouns.jsonl verbs.jsonl
|
|
595
|
+
|
|
596
|
+
# With language filter
|
|
597
|
+
$ bead templates estimate template.jsonl lexicon.jsonl --language-code eng
|
|
598
|
+
"""
|
|
599
|
+
try:
|
|
600
|
+
# Load and merge lexicons
|
|
601
|
+
if not lexicon_files:
|
|
602
|
+
print_error("At least one lexicon file is required")
|
|
603
|
+
ctx.exit(1)
|
|
604
|
+
|
|
605
|
+
print_info(f"Loading {len(lexicon_files)} lexicon(s)")
|
|
606
|
+
merged_lexicon = Lexicon(name="merged", items={})
|
|
607
|
+
|
|
608
|
+
for lex_file in lexicon_files:
|
|
609
|
+
lex = Lexicon.from_jsonl(str(lex_file), lex_file.stem)
|
|
610
|
+
merged_lexicon.items.update(lex.items)
|
|
611
|
+
|
|
612
|
+
print_info(f"Total merged lexicon: {len(merged_lexicon)} items")
|
|
613
|
+
lexicon = merged_lexicon
|
|
614
|
+
|
|
615
|
+
# Load templates
|
|
616
|
+
print_info(f"Loading templates from {template_file}")
|
|
617
|
+
template_collection = TemplateCollection.from_jsonl(
|
|
618
|
+
str(template_file), "templates"
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
# Calculate estimates for each template
|
|
622
|
+
table = Table(title="Combination Estimates")
|
|
623
|
+
table.add_column("Template", style="cyan")
|
|
624
|
+
table.add_column("Slots", justify="right", style="yellow")
|
|
625
|
+
table.add_column("Combinations", justify="right", style="green")
|
|
626
|
+
|
|
627
|
+
total_combinations = 0
|
|
628
|
+
|
|
629
|
+
for template in template_collection:
|
|
630
|
+
# Get lexical items for each slot
|
|
631
|
+
slot_lists: list[list[str]] = []
|
|
632
|
+
for _slot_name in template.slots:
|
|
633
|
+
items = [
|
|
634
|
+
item.lemma
|
|
635
|
+
for item in lexicon
|
|
636
|
+
if language_code is None or item.language_code == language_code
|
|
637
|
+
]
|
|
638
|
+
slot_lists.append(items)
|
|
639
|
+
|
|
640
|
+
# Estimate combinations
|
|
641
|
+
num_combos = count_combinations(*slot_lists)
|
|
642
|
+
total_combinations += num_combos
|
|
643
|
+
|
|
644
|
+
table.add_row(
|
|
645
|
+
template.name,
|
|
646
|
+
str(len(template.slots)),
|
|
647
|
+
f"{num_combos:,}",
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
# Add total row
|
|
651
|
+
table.add_section()
|
|
652
|
+
table.add_row(
|
|
653
|
+
"[bold]TOTAL[/bold]",
|
|
654
|
+
"",
|
|
655
|
+
f"[bold]{total_combinations:,}[/bold]",
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
console.print(table)
|
|
659
|
+
|
|
660
|
+
# Warn if combinations are very large
|
|
661
|
+
if total_combinations > 1_000_000:
|
|
662
|
+
print_info(
|
|
663
|
+
"\n⚠️ Warning: Exhaustive filling will generate over 1 million "
|
|
664
|
+
"combinations. Consider using random or stratified strategies instead."
|
|
665
|
+
)
|
|
666
|
+
elif total_combinations > 100_000:
|
|
667
|
+
print_info(
|
|
668
|
+
"\n⚠️ Warning: Exhaustive filling will generate over 100K "
|
|
669
|
+
"combinations. This may take significant time."
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
except Exception as e:
|
|
673
|
+
print_error(f"Failed to estimate combinations: {e}")
|
|
674
|
+
ctx.exit(1)
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
@click.command()
|
|
678
|
+
@click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
|
|
679
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
680
|
+
@click.option(
|
|
681
|
+
"--expression",
|
|
682
|
+
help="Filter expression (DSL) to apply to filled templates",
|
|
683
|
+
)
|
|
684
|
+
@click.option(
|
|
685
|
+
"--min-length",
|
|
686
|
+
type=int,
|
|
687
|
+
help="Minimum text length",
|
|
688
|
+
)
|
|
689
|
+
@click.option(
|
|
690
|
+
"--max-length",
|
|
691
|
+
type=int,
|
|
692
|
+
help="Maximum text length",
|
|
693
|
+
)
|
|
694
|
+
@click.option(
|
|
695
|
+
"--template-name",
|
|
696
|
+
help="Filter by template name (exact match)",
|
|
697
|
+
)
|
|
698
|
+
@click.option(
|
|
699
|
+
"--strategy",
|
|
700
|
+
help="Filter by strategy name",
|
|
701
|
+
)
|
|
702
|
+
@click.pass_context
|
|
703
|
+
def filter_filled(
|
|
704
|
+
ctx: click.Context,
|
|
705
|
+
filled_file: Path,
|
|
706
|
+
output_file: Path,
|
|
707
|
+
expression: str | None,
|
|
708
|
+
min_length: int | None,
|
|
709
|
+
max_length: int | None,
|
|
710
|
+
template_name: str | None,
|
|
711
|
+
strategy: str | None,
|
|
712
|
+
) -> None:
|
|
713
|
+
"""Filter filled templates by various criteria.
|
|
714
|
+
|
|
715
|
+
Parameters
|
|
716
|
+
----------
|
|
717
|
+
ctx : click.Context
|
|
718
|
+
Click context object.
|
|
719
|
+
filled_file : Path
|
|
720
|
+
Path to filled templates file.
|
|
721
|
+
output_file : Path
|
|
722
|
+
Path to output filtered file.
|
|
723
|
+
expression : str | None
|
|
724
|
+
DSL expression for filtering.
|
|
725
|
+
min_length : int | None
|
|
726
|
+
Minimum text length.
|
|
727
|
+
max_length : int | None
|
|
728
|
+
Maximum text length.
|
|
729
|
+
template_name : str | None
|
|
730
|
+
Template name filter.
|
|
731
|
+
strategy : str | None
|
|
732
|
+
Strategy name filter.
|
|
733
|
+
|
|
734
|
+
Examples
|
|
735
|
+
--------
|
|
736
|
+
$ bead templates filter-filled filled.jsonl filtered.jsonl --min-length 10
|
|
737
|
+
$ bead templates filter-filled filled.jsonl filtered.jsonl --template-name active
|
|
738
|
+
"""
|
|
739
|
+
try:
|
|
740
|
+
print_info(f"Filtering filled templates from: {filled_file}")
|
|
741
|
+
|
|
742
|
+
filtered_count = 0
|
|
743
|
+
total_count = 0
|
|
744
|
+
|
|
745
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
746
|
+
with open(output_file, "w", encoding="utf-8") as out_f:
|
|
747
|
+
with open(filled_file, encoding="utf-8") as in_f:
|
|
748
|
+
for line in in_f:
|
|
749
|
+
line = line.strip()
|
|
750
|
+
if not line:
|
|
751
|
+
continue
|
|
752
|
+
|
|
753
|
+
total_count += 1
|
|
754
|
+
|
|
755
|
+
try:
|
|
756
|
+
filled_data = json.loads(line)
|
|
757
|
+
filled = FilledTemplate(**filled_data)
|
|
758
|
+
|
|
759
|
+
# Apply filters
|
|
760
|
+
if min_length and len(filled.rendered_text) < min_length:
|
|
761
|
+
continue
|
|
762
|
+
if max_length and len(filled.rendered_text) > max_length:
|
|
763
|
+
continue
|
|
764
|
+
if template_name and filled.template_name != template_name:
|
|
765
|
+
continue
|
|
766
|
+
if strategy and filled.strategy_name != strategy:
|
|
767
|
+
continue
|
|
768
|
+
|
|
769
|
+
# DSL expression filtering would go here
|
|
770
|
+
if expression:
|
|
771
|
+
print_info(
|
|
772
|
+
"DSL expression filtering not yet implemented, skipping"
|
|
773
|
+
)
|
|
774
|
+
|
|
775
|
+
# Passed all filters
|
|
776
|
+
out_f.write(line + "\n")
|
|
777
|
+
filtered_count += 1
|
|
778
|
+
|
|
779
|
+
except Exception as e:
|
|
780
|
+
print_error(f"Error processing line: {e}")
|
|
781
|
+
continue
|
|
782
|
+
|
|
783
|
+
print_success(
|
|
784
|
+
f"Filtered {filtered_count} of {total_count} templates: {output_file}"
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
except Exception as e:
|
|
788
|
+
print_error(f"Failed to filter filled templates: {e}")
|
|
789
|
+
ctx.exit(1)
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
@click.command()
|
|
793
|
+
@click.argument("input_files", nargs=-1, type=click.Path(exists=True, path_type=Path))
|
|
794
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
795
|
+
@click.option(
|
|
796
|
+
"--deduplicate",
|
|
797
|
+
is_flag=True,
|
|
798
|
+
help="Remove duplicates by UUID",
|
|
799
|
+
)
|
|
800
|
+
@click.pass_context
|
|
801
|
+
def merge_filled(
|
|
802
|
+
ctx: click.Context,
|
|
803
|
+
input_files: tuple[Path, ...],
|
|
804
|
+
output_file: Path,
|
|
805
|
+
deduplicate: bool,
|
|
806
|
+
) -> None:
|
|
807
|
+
"""Merge multiple filled template files.
|
|
808
|
+
|
|
809
|
+
Parameters
|
|
810
|
+
----------
|
|
811
|
+
ctx : click.Context
|
|
812
|
+
Click context object.
|
|
813
|
+
input_files : tuple[Path, ...]
|
|
814
|
+
Input filled template files.
|
|
815
|
+
output_file : Path
|
|
816
|
+
Output merged file.
|
|
817
|
+
deduplicate : bool
|
|
818
|
+
Remove duplicates by UUID.
|
|
819
|
+
|
|
820
|
+
Examples
|
|
821
|
+
--------
|
|
822
|
+
$ bead templates merge-filled file1.jsonl file2.jsonl merged.jsonl
|
|
823
|
+
$ bead templates merge-filled *.jsonl merged.jsonl --deduplicate
|
|
824
|
+
"""
|
|
825
|
+
try:
|
|
826
|
+
if not input_files:
|
|
827
|
+
print_error("No input files provided")
|
|
828
|
+
ctx.exit(1)
|
|
829
|
+
|
|
830
|
+
print_info(f"Merging {len(input_files)} filled template files")
|
|
831
|
+
|
|
832
|
+
seen_ids: set[str] = set()
|
|
833
|
+
merged_count = 0
|
|
834
|
+
duplicate_count = 0
|
|
835
|
+
|
|
836
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
837
|
+
with open(output_file, "w", encoding="utf-8") as out_f:
|
|
838
|
+
for input_file in input_files:
|
|
839
|
+
print_info(f" Processing: {input_file}")
|
|
840
|
+
with open(input_file, encoding="utf-8") as in_f:
|
|
841
|
+
for line in in_f:
|
|
842
|
+
line = line.strip()
|
|
843
|
+
if not line:
|
|
844
|
+
continue
|
|
845
|
+
|
|
846
|
+
try:
|
|
847
|
+
filled_data = json.loads(line)
|
|
848
|
+
filled = FilledTemplate(**filled_data)
|
|
849
|
+
|
|
850
|
+
if deduplicate:
|
|
851
|
+
if str(filled.id) in seen_ids:
|
|
852
|
+
duplicate_count += 1
|
|
853
|
+
continue
|
|
854
|
+
seen_ids.add(str(filled.id))
|
|
855
|
+
|
|
856
|
+
out_f.write(line + "\n")
|
|
857
|
+
merged_count += 1
|
|
858
|
+
|
|
859
|
+
except Exception as e:
|
|
860
|
+
print_error(f"Error processing line from {input_file}: {e}")
|
|
861
|
+
continue
|
|
862
|
+
|
|
863
|
+
print_success(f"Merged {merged_count} filled templates: {output_file}")
|
|
864
|
+
if deduplicate and duplicate_count > 0:
|
|
865
|
+
print_info(f"Removed {duplicate_count} duplicates")
|
|
866
|
+
|
|
867
|
+
except Exception as e:
|
|
868
|
+
print_error(f"Failed to merge filled templates: {e}")
|
|
869
|
+
ctx.exit(1)
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
@click.command()
|
|
873
|
+
@click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
|
|
874
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
875
|
+
@click.pass_context
|
|
876
|
+
def export_csv(
|
|
877
|
+
ctx: click.Context,
|
|
878
|
+
filled_file: Path,
|
|
879
|
+
output_file: Path,
|
|
880
|
+
) -> None:
|
|
881
|
+
"""Export filled templates to CSV format.
|
|
882
|
+
|
|
883
|
+
Parameters
|
|
884
|
+
----------
|
|
885
|
+
ctx : click.Context
|
|
886
|
+
Click context object.
|
|
887
|
+
filled_file : Path
|
|
888
|
+
Input filled templates file (JSONL).
|
|
889
|
+
output_file : Path
|
|
890
|
+
Output CSV file.
|
|
891
|
+
|
|
892
|
+
Examples
|
|
893
|
+
--------
|
|
894
|
+
$ bead templates export-csv filled.jsonl filled.csv
|
|
895
|
+
"""
|
|
896
|
+
try:
|
|
897
|
+
print_info(f"Exporting filled templates to CSV: {output_file}")
|
|
898
|
+
|
|
899
|
+
filled_templates: list[FilledTemplate] = []
|
|
900
|
+
|
|
901
|
+
with open(filled_file, encoding="utf-8") as f:
|
|
902
|
+
for line in f:
|
|
903
|
+
line = line.strip()
|
|
904
|
+
if not line:
|
|
905
|
+
continue
|
|
906
|
+
|
|
907
|
+
try:
|
|
908
|
+
filled_data = json.loads(line)
|
|
909
|
+
filled = FilledTemplate(**filled_data)
|
|
910
|
+
filled_templates.append(filled)
|
|
911
|
+
except Exception:
|
|
912
|
+
continue
|
|
913
|
+
|
|
914
|
+
if not filled_templates:
|
|
915
|
+
print_error("No valid filled templates found")
|
|
916
|
+
ctx.exit(1)
|
|
917
|
+
|
|
918
|
+
# Write to CSV
|
|
919
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
920
|
+
with open(output_file, "w", newline="", encoding="utf-8") as f:
|
|
921
|
+
writer = csv_module.writer(f)
|
|
922
|
+
|
|
923
|
+
# Header
|
|
924
|
+
writer.writerow(
|
|
925
|
+
[
|
|
926
|
+
"id",
|
|
927
|
+
"template_id",
|
|
928
|
+
"template_name",
|
|
929
|
+
"rendered_text",
|
|
930
|
+
"strategy_name",
|
|
931
|
+
"slot_count",
|
|
932
|
+
]
|
|
933
|
+
)
|
|
934
|
+
|
|
935
|
+
# Data
|
|
936
|
+
for filled in filled_templates:
|
|
937
|
+
writer.writerow(
|
|
938
|
+
[
|
|
939
|
+
str(filled.id),
|
|
940
|
+
str(filled.template_id),
|
|
941
|
+
filled.template_name,
|
|
942
|
+
filled.rendered_text,
|
|
943
|
+
filled.strategy_name,
|
|
944
|
+
len(filled.slot_fillers),
|
|
945
|
+
]
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
print_success(
|
|
949
|
+
f"Exported {len(filled_templates)} filled templates to CSV: {output_file}"
|
|
950
|
+
)
|
|
951
|
+
|
|
952
|
+
except Exception as e:
|
|
953
|
+
print_error(f"Failed to export to CSV: {e}")
|
|
954
|
+
ctx.exit(1)
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
@click.command()
|
|
958
|
+
@click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
|
|
959
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
960
|
+
@click.option(
|
|
961
|
+
"--pretty",
|
|
962
|
+
is_flag=True,
|
|
963
|
+
help="Pretty-print JSON with indentation",
|
|
964
|
+
)
|
|
965
|
+
@click.pass_context
|
|
966
|
+
def export_json(
|
|
967
|
+
ctx: click.Context,
|
|
968
|
+
filled_file: Path,
|
|
969
|
+
output_file: Path,
|
|
970
|
+
pretty: bool,
|
|
971
|
+
) -> None:
|
|
972
|
+
"""Export filled templates to JSON array format.
|
|
973
|
+
|
|
974
|
+
Parameters
|
|
975
|
+
----------
|
|
976
|
+
ctx : click.Context
|
|
977
|
+
Click context object.
|
|
978
|
+
filled_file : Path
|
|
979
|
+
Input filled templates file (JSONL).
|
|
980
|
+
output_file : Path
|
|
981
|
+
Output JSON file.
|
|
982
|
+
pretty : bool
|
|
983
|
+
Pretty-print with indentation.
|
|
984
|
+
|
|
985
|
+
Examples
|
|
986
|
+
--------
|
|
987
|
+
$ bead templates export-json filled.jsonl filled.json
|
|
988
|
+
$ bead templates export-json filled.jsonl filled.json --pretty
|
|
989
|
+
"""
|
|
990
|
+
try:
|
|
991
|
+
print_info(f"Exporting filled templates to JSON: {output_file}")
|
|
992
|
+
|
|
993
|
+
filled_templates: list[dict[str, JsonValue]] = []
|
|
994
|
+
|
|
995
|
+
with open(filled_file, encoding="utf-8") as f:
|
|
996
|
+
for line in f:
|
|
997
|
+
line = line.strip()
|
|
998
|
+
if not line:
|
|
999
|
+
continue
|
|
1000
|
+
|
|
1001
|
+
try:
|
|
1002
|
+
filled_data = json.loads(line)
|
|
1003
|
+
FilledTemplate(**filled_data) # Validate
|
|
1004
|
+
filled_templates.append(filled_data)
|
|
1005
|
+
except Exception:
|
|
1006
|
+
continue
|
|
1007
|
+
|
|
1008
|
+
if not filled_templates:
|
|
1009
|
+
print_error("No valid filled templates found")
|
|
1010
|
+
ctx.exit(1)
|
|
1011
|
+
|
|
1012
|
+
# Write to JSON
|
|
1013
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
1014
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
1015
|
+
if pretty:
|
|
1016
|
+
json.dump(filled_templates, f, indent=2, ensure_ascii=False)
|
|
1017
|
+
else:
|
|
1018
|
+
json.dump(filled_templates, f, ensure_ascii=False)
|
|
1019
|
+
|
|
1020
|
+
print_success(
|
|
1021
|
+
f"Exported {len(filled_templates)} filled templates to JSON: {output_file}"
|
|
1022
|
+
)
|
|
1023
|
+
|
|
1024
|
+
except Exception as e:
|
|
1025
|
+
print_error(f"Failed to export to JSON: {e}")
|
|
1026
|
+
ctx.exit(1)
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
@click.command()
|
|
1030
|
+
@click.argument("template_file", type=click.Path(exists=True, path_type=Path))
|
|
1031
|
+
@click.argument(
|
|
1032
|
+
"lexicon_files",
|
|
1033
|
+
nargs=-1,
|
|
1034
|
+
type=click.Path(exists=True, path_type=Path),
|
|
1035
|
+
required=True,
|
|
1036
|
+
)
|
|
1037
|
+
@click.argument("output_file", type=click.Path(path_type=Path))
|
|
1038
|
+
@click.option(
|
|
1039
|
+
"--n-samples",
|
|
1040
|
+
type=int,
|
|
1041
|
+
required=True,
|
|
1042
|
+
help="Number of samples to generate",
|
|
1043
|
+
)
|
|
1044
|
+
@click.option(
|
|
1045
|
+
"--seed",
|
|
1046
|
+
type=int,
|
|
1047
|
+
help="Random seed for reproducibility",
|
|
1048
|
+
)
|
|
1049
|
+
@click.option(
|
|
1050
|
+
"--language-code",
|
|
1051
|
+
help="ISO 639 language code to filter items",
|
|
1052
|
+
)
|
|
1053
|
+
@click.pass_context
|
|
1054
|
+
def sample_combinations(
|
|
1055
|
+
ctx: click.Context,
|
|
1056
|
+
template_file: Path,
|
|
1057
|
+
lexicon_files: tuple[Path, ...],
|
|
1058
|
+
output_file: Path,
|
|
1059
|
+
n_samples: int,
|
|
1060
|
+
seed: int | None,
|
|
1061
|
+
language_code: str | None,
|
|
1062
|
+
) -> None:
|
|
1063
|
+
r"""Sample template-lexicon combinations with stratified sampling.
|
|
1064
|
+
|
|
1065
|
+
Uses stratified sampling to ensure diverse coverage of the combination space
|
|
1066
|
+
without exhaustive generation.
|
|
1067
|
+
|
|
1068
|
+
Parameters
|
|
1069
|
+
----------
|
|
1070
|
+
ctx : click.Context
|
|
1071
|
+
Click context object.
|
|
1072
|
+
template_file : Path
|
|
1073
|
+
Path to template file.
|
|
1074
|
+
lexicon_files : tuple[Path, ...]
|
|
1075
|
+
Paths to one or more lexicon files to merge.
|
|
1076
|
+
output_file : Path
|
|
1077
|
+
Path to output sampled combinations.
|
|
1078
|
+
n_samples : int
|
|
1079
|
+
Number of samples to generate.
|
|
1080
|
+
seed : int | None
|
|
1081
|
+
Random seed.
|
|
1082
|
+
language_code : str | None
|
|
1083
|
+
Language code filter.
|
|
1084
|
+
|
|
1085
|
+
Examples
|
|
1086
|
+
--------
|
|
1087
|
+
# Single lexicon
|
|
1088
|
+
$ bead templates sample-combinations template.jsonl lexicon.jsonl samples.jsonl \\
|
|
1089
|
+
--n-samples 1000 --seed 42
|
|
1090
|
+
|
|
1091
|
+
# Multiple lexicons
|
|
1092
|
+
$ bead templates sample-combinations tpl.jsonl nouns.jsonl verbs.jsonl out.jsonl \\
|
|
1093
|
+
--n-samples 1000 --seed 42
|
|
1094
|
+
"""
|
|
1095
|
+
try:
|
|
1096
|
+
# Load and merge lexicons
|
|
1097
|
+
if not lexicon_files:
|
|
1098
|
+
print_error("At least one lexicon file is required")
|
|
1099
|
+
ctx.exit(1)
|
|
1100
|
+
|
|
1101
|
+
print_info(f"Loading {len(lexicon_files)} lexicon(s)")
|
|
1102
|
+
merged_lexicon = Lexicon(name="merged", items={})
|
|
1103
|
+
|
|
1104
|
+
for lex_file in lexicon_files:
|
|
1105
|
+
lex = Lexicon.from_jsonl(str(lex_file), lex_file.stem)
|
|
1106
|
+
print_info(f" Loaded {len(lex)} items from {lex_file.name}")
|
|
1107
|
+
merged_lexicon.items.update(lex.items)
|
|
1108
|
+
|
|
1109
|
+
print_info(f"Total merged lexicon: {len(merged_lexicon)} items")
|
|
1110
|
+
lexicon = merged_lexicon
|
|
1111
|
+
|
|
1112
|
+
# Load templates
|
|
1113
|
+
print_info(f"Loading templates from {template_file}")
|
|
1114
|
+
template_collection = TemplateCollection.from_jsonl(
|
|
1115
|
+
str(template_file), "templates"
|
|
1116
|
+
)
|
|
1117
|
+
|
|
1118
|
+
# Use random strategy for sampling
|
|
1119
|
+
print_info(f"Generating {n_samples} stratified samples")
|
|
1120
|
+
strategy = RandomStrategy(n_samples=n_samples, seed=seed)
|
|
1121
|
+
filler = StrategyFiller(lexicon=lexicon, strategy=strategy)
|
|
1122
|
+
|
|
1123
|
+
# Fill templates
|
|
1124
|
+
all_filled: list[FilledTemplate] = []
|
|
1125
|
+
for template in template_collection:
|
|
1126
|
+
try:
|
|
1127
|
+
filled_templates = filler.fill(template, language_code)
|
|
1128
|
+
all_filled.extend(filled_templates)
|
|
1129
|
+
except ValueError as e:
|
|
1130
|
+
print_error(f"Failed to fill template '{template.name}': {e}")
|
|
1131
|
+
continue
|
|
1132
|
+
|
|
1133
|
+
# Save sampled combinations
|
|
1134
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
1135
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
1136
|
+
for filled in all_filled:
|
|
1137
|
+
f.write(filled.model_dump_json() + "\n")
|
|
1138
|
+
|
|
1139
|
+
print_success(
|
|
1140
|
+
f"Generated {len(all_filled)} sampled combinations: {output_file}"
|
|
1141
|
+
)
|
|
1142
|
+
|
|
1143
|
+
except Exception as e:
|
|
1144
|
+
print_error(f"Failed to sample combinations: {e}")
|
|
1145
|
+
ctx.exit(1)
|
|
1146
|
+
|
|
1147
|
+
|
|
1148
|
+
# Register commands
|
|
1149
|
+
templates.add_command(fill)
|
|
1150
|
+
templates.add_command(list_filled)
|
|
1151
|
+
templates.add_command(validate_filled)
|
|
1152
|
+
templates.add_command(show_stats)
|
|
1153
|
+
templates.add_command(estimate, name="estimate-combinations")
|
|
1154
|
+
templates.add_command(filter_filled)
|
|
1155
|
+
templates.add_command(merge_filled)
|
|
1156
|
+
templates.add_command(export_csv)
|
|
1157
|
+
templates.add_command(export_json)
|
|
1158
|
+
templates.add_command(sample_combinations)
|