bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/templates.py ADDED
@@ -0,0 +1,1158 @@
1
+ """Template filling commands for bead CLI.
2
+
3
+ This module provides commands for filling templates with lexical items
4
+ (Stage 2 of the bead pipeline).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import csv as csv_module
10
+ import json
11
+ from pathlib import Path
12
+
13
+ import click
14
+ from pydantic import ValidationError
15
+ from rich.console import Console
16
+ from rich.progress import Progress, SpinnerColumn, TextColumn
17
+ from rich.table import Table
18
+
19
+ from bead.cli.utils import print_error, print_info, print_success
20
+ from bead.data.base import JsonValue
21
+ from bead.dsl.evaluator import DSLEvaluator
22
+ from bead.dsl.parser import parse
23
+ from bead.resources.constraints import Constraint
24
+ from bead.resources.lexicon import Lexicon
25
+ from bead.resources.template_collection import TemplateCollection
26
+ from bead.templates.combinatorics import count_combinations
27
+ from bead.templates.filler import FilledTemplate
28
+ from bead.templates.strategies import (
29
+ ExhaustiveStrategy,
30
+ RandomStrategy,
31
+ StrategyFiller,
32
+ StratifiedStrategy,
33
+ )
34
+
35
+ console = Console()
36
+
37
+
38
+ @click.group()
39
+ def templates() -> None:
40
+ r"""Template filling commands (Stage 2).
41
+
42
+ Commands for filling templates with lexical items using various strategies.
43
+
44
+ \b
45
+ Examples:
46
+ $ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
47
+ --strategy exhaustive
48
+ $ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
49
+ --strategy random --max-combinations 100
50
+ $ bead templates list-filled filled.jsonl
51
+ $ bead templates validate-filled filled.jsonl
52
+ $ bead templates show-stats filled.jsonl
53
+ """
54
+
55
+
56
+ @click.command()
57
+ @click.argument("template_file", type=click.Path(exists=True, path_type=Path))
58
+ @click.argument(
59
+ "lexicon_files",
60
+ nargs=-1,
61
+ type=click.Path(exists=True, path_type=Path),
62
+ required=True,
63
+ )
64
+ @click.argument("output_file", type=click.Path(path_type=Path))
65
+ @click.option(
66
+ "--strategy",
67
+ type=click.Choice(["exhaustive", "random", "stratified"]),
68
+ default="exhaustive",
69
+ help="Filling strategy to use",
70
+ )
71
+ @click.option(
72
+ "--max-combinations",
73
+ type=int,
74
+ help="Maximum combinations for random/stratified strategies",
75
+ )
76
+ @click.option(
77
+ "--random-seed",
78
+ type=int,
79
+ help="Random seed for reproducibility",
80
+ )
81
+ @click.option(
82
+ "--grouping-property",
83
+ help="Property for stratified strategy (e.g., 'pos', 'features.tense')",
84
+ )
85
+ @click.option(
86
+ "--language-code",
87
+ help="ISO 639 language code to filter items",
88
+ )
89
+ @click.option(
90
+ "--constraints",
91
+ type=click.Path(exists=True, path_type=Path),
92
+ help="Path to constraints file (JSONL) to apply during filling",
93
+ )
94
+ @click.pass_context
95
+ def fill(
96
+ ctx: click.Context,
97
+ template_file: Path,
98
+ lexicon_files: tuple[Path, ...],
99
+ output_file: Path,
100
+ strategy: str,
101
+ max_combinations: int | None,
102
+ random_seed: int | None,
103
+ grouping_property: str | None,
104
+ language_code: str | None,
105
+ constraints: Path | None,
106
+ ) -> None:
107
+ r"""Fill templates with lexical items.
108
+
109
+ Parameters
110
+ ----------
111
+ ctx : click.Context
112
+ Click context object.
113
+ template_file : Path
114
+ Path to template file.
115
+ lexicon_files : tuple[Path, ...]
116
+ Paths to one or more lexicon files to merge.
117
+ output_file : Path
118
+ Path to output filled templates file.
119
+ strategy : str
120
+ Filling strategy name.
121
+ max_combinations : int | None
122
+ Maximum number of combinations.
123
+ random_seed : int | None
124
+ Random seed for reproducibility.
125
+ grouping_property : str | None
126
+ Property for stratified sampling.
127
+ language_code : str | None
128
+ ISO 639 language code filter.
129
+ constraints : Path | None
130
+ Path to constraints file (JSONL) to apply.
131
+
132
+ Examples
133
+ --------
134
+ # Exhaustive filling with single lexicon
135
+ $ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
136
+ --strategy exhaustive
137
+
138
+ # Multiple lexicons
139
+ $ bead templates fill tpl.jsonl nouns.jsonl verbs.jsonl filled.jsonl \\
140
+ --strategy exhaustive
141
+
142
+ # Random sampling
143
+ $ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
144
+ --strategy random --max-combinations 100 --random-seed 42
145
+
146
+ # Stratified sampling
147
+ $ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
148
+ --strategy stratified --max-combinations 100 --grouping-property pos
149
+
150
+ # With constraints
151
+ $ bead templates fill template.jsonl lexicon.jsonl filled.jsonl \\
152
+ --strategy exhaustive --constraints constraints.jsonl
153
+ """
154
+ try:
155
+ # Validate strategy-specific options
156
+ if strategy in ("random", "stratified") and max_combinations is None:
157
+ print_error(f"--max-combinations required for {strategy} strategy")
158
+ ctx.exit(1)
159
+
160
+ if strategy == "stratified" and grouping_property is None:
161
+ print_error("--grouping-property required for stratified strategy")
162
+ ctx.exit(1)
163
+
164
+ # Load and merge lexicons
165
+ if not lexicon_files:
166
+ print_error("At least one lexicon file is required")
167
+ ctx.exit(1)
168
+
169
+ print_info(f"Loading {len(lexicon_files)} lexicon(s)")
170
+ merged_lexicon = Lexicon(name="merged", items={})
171
+
172
+ for lex_file in lexicon_files:
173
+ lex = Lexicon.from_jsonl(str(lex_file), lex_file.stem)
174
+ print_info(f" Loaded {len(lex)} items from {lex_file.name}")
175
+ # Merge items
176
+ merged_lexicon.items.update(lex.items)
177
+
178
+ print_info(f"Total merged lexicon: {len(merged_lexicon)} items")
179
+ lexicon = merged_lexicon
180
+
181
+ # Load templates
182
+ print_info(f"Loading templates from {template_file}")
183
+ template_collection = TemplateCollection.from_jsonl(
184
+ str(template_file), "templates"
185
+ )
186
+ print_info(f"Loaded {len(template_collection)} templates")
187
+
188
+ # Load and apply constraints if provided
189
+ if constraints:
190
+ print_info(f"Loading constraints from {constraints}")
191
+ loaded_constraints: list[Constraint] = []
192
+
193
+ with open(constraints, encoding="utf-8") as f:
194
+ for line_num, line in enumerate(f, start=1):
195
+ line = line.strip()
196
+ if not line:
197
+ continue
198
+
199
+ try:
200
+ constraint_data = json.loads(line)
201
+ constraint = Constraint(**constraint_data)
202
+ loaded_constraints.append(constraint)
203
+ except json.JSONDecodeError as e:
204
+ print_error(f"Invalid JSON on line {line_num}: {e}")
205
+ ctx.exit(1)
206
+ except ValidationError as e:
207
+ print_error(f"Invalid constraint on line {line_num}: {e}")
208
+ ctx.exit(1)
209
+
210
+ print_info(f"Loaded {len(loaded_constraints)} constraints")
211
+
212
+ # Apply constraints to all templates
213
+ for template in template_collection:
214
+ template.constraints.extend(loaded_constraints)
215
+
216
+ print_info(f"Applied constraints to {len(template_collection)} templates")
217
+
218
+ # Create strategy
219
+ filling_strategy: ExhaustiveStrategy | RandomStrategy | StratifiedStrategy
220
+ if strategy == "exhaustive":
221
+ filling_strategy = ExhaustiveStrategy()
222
+ elif strategy == "random":
223
+ assert max_combinations is not None
224
+ filling_strategy = RandomStrategy(
225
+ n_samples=max_combinations,
226
+ seed=random_seed,
227
+ )
228
+ elif strategy == "stratified":
229
+ assert max_combinations is not None
230
+ assert grouping_property is not None
231
+ filling_strategy = StratifiedStrategy(
232
+ n_samples=max_combinations,
233
+ grouping_property=grouping_property,
234
+ seed=random_seed,
235
+ )
236
+ else:
237
+ print_error(f"Unknown strategy: {strategy}")
238
+ ctx.exit(1)
239
+
240
+ # Create filler
241
+ filler = StrategyFiller(lexicon=lexicon, strategy=filling_strategy)
242
+
243
+ # Fill templates with progress
244
+ all_filled: list[FilledTemplate] = []
245
+
246
+ with Progress(
247
+ SpinnerColumn(),
248
+ TextColumn("[progress.description]{task.description}"),
249
+ console=console,
250
+ ) as progress:
251
+ task = progress.add_task(
252
+ f"Filling {len(template_collection)} templates...",
253
+ total=len(template_collection),
254
+ )
255
+
256
+ for template in template_collection:
257
+ try:
258
+ filled_templates = filler.fill(template, language_code)
259
+ all_filled.extend(filled_templates)
260
+ progress.advance(task)
261
+ except ValueError as e:
262
+ print_error(f"Failed to fill template '{template.name}': {e}")
263
+ continue
264
+
265
+ # Save filled templates
266
+ output_file.parent.mkdir(parents=True, exist_ok=True)
267
+ with open(output_file, "w", encoding="utf-8") as f:
268
+ for filled in all_filled:
269
+ f.write(filled.model_dump_json() + "\n")
270
+
271
+ print_success(
272
+ f"Created {len(all_filled)} filled templates from "
273
+ f"{len(template_collection)} templates: {output_file}"
274
+ )
275
+
276
+ except ValidationError as e:
277
+ print_error(f"Validation error: {e}")
278
+ ctx.exit(1)
279
+ except Exception as e:
280
+ print_error(f"Failed to fill templates: {e}")
281
+ ctx.exit(1)
282
+
283
+
284
+ @click.command()
285
+ @click.option(
286
+ "--directory",
287
+ type=click.Path(exists=True, file_okay=False, path_type=Path),
288
+ default=Path.cwd(),
289
+ help="Directory to search for filled template files",
290
+ )
291
+ @click.option(
292
+ "--pattern",
293
+ default="*.jsonl",
294
+ help="File pattern to match (default: *.jsonl)",
295
+ )
296
+ @click.option(
297
+ "--filter",
298
+ "filter_expr",
299
+ help="DSL expression to filter (e.g., 'slot_fillers.noun.lemma == \"cat\"')",
300
+ )
301
+ @click.pass_context
302
+ def list_filled(
303
+ ctx: click.Context,
304
+ directory: Path,
305
+ pattern: str,
306
+ filter_expr: str | None,
307
+ ) -> None:
308
+ """List filled template files in a directory.
309
+
310
+ Parameters
311
+ ----------
312
+ ctx : click.Context
313
+ Click context object.
314
+ directory : Path
315
+ Directory to search.
316
+ pattern : str
317
+ File pattern to match.
318
+ filter_expr : str | None
319
+ DSL expression to filter filled templates.
320
+
321
+ Examples
322
+ --------
323
+ $ bead templates list-filled
324
+ $ bead templates list-filled --directory filled_templates/
325
+ $ bead templates list-filled --pattern "filled_*.jsonl"
326
+ $ bead templates list-filled --filter "slot_fillers.noun.lemma == 'cat'"
327
+ $ bead templates list-filled --filter "len(slot_fillers) > 2"
328
+ """
329
+ try:
330
+ files = list(directory.glob(pattern))
331
+
332
+ if not files:
333
+ print_info(f"No files found in {directory} matching {pattern}")
334
+ return
335
+
336
+ # Parse filter expression if provided
337
+ filter_ast = None
338
+ evaluator = None
339
+ if filter_expr:
340
+ try:
341
+ filter_ast = parse(filter_expr)
342
+ evaluator = DSLEvaluator()
343
+ print_info(f"Filtering with expression: {filter_expr}")
344
+ except Exception as e:
345
+ print_error(f"Invalid filter expression: {e}")
346
+ ctx.exit(1)
347
+
348
+ table = Table(title=f"Filled Templates in {directory}")
349
+ table.add_column("File", style="cyan")
350
+ table.add_column("Count", justify="right", style="yellow")
351
+ table.add_column("Filtered", justify="right", style="magenta")
352
+ table.add_column("Strategy", style="green")
353
+ table.add_column("Sample", style="white")
354
+
355
+ for file_path in sorted(files):
356
+ try:
357
+ # Count filled templates and get metadata
358
+ with open(file_path, encoding="utf-8") as f:
359
+ lines = [line.strip() for line in f if line.strip()]
360
+
361
+ if not lines:
362
+ continue
363
+
364
+ # Apply filter if provided
365
+ filtered_count = 0
366
+ if filter_ast and evaluator:
367
+ for line in lines:
368
+ try:
369
+ filled_data = json.loads(line)
370
+ filled_template = FilledTemplate(**filled_data)
371
+ # Create evaluation context
372
+ context = {"self": filled_template}
373
+ # Evaluate filter
374
+ if evaluator.evaluate(filter_ast, context):
375
+ filtered_count += 1
376
+ except Exception:
377
+ continue
378
+ else:
379
+ filtered_count = len(lines)
380
+
381
+ if filtered_count == 0:
382
+ continue
383
+
384
+ # Parse first filled template for metadata
385
+ first_data = json.loads(lines[0])
386
+ strategy_name = first_data.get("strategy_name", "N/A")
387
+ rendered = first_data.get("rendered_text", "N/A")
388
+
389
+ # Truncate long rendered text
390
+ if len(rendered) > 40:
391
+ rendered = rendered[:37] + "..."
392
+
393
+ table.add_row(
394
+ str(file_path.name),
395
+ str(len(lines)),
396
+ str(filtered_count) if filter_expr else "N/A",
397
+ strategy_name,
398
+ rendered,
399
+ )
400
+ except Exception:
401
+ # Skip files that can't be parsed
402
+ continue
403
+
404
+ console.print(table)
405
+
406
+ except Exception as e:
407
+ print_error(f"Failed to list filled templates: {e}")
408
+ ctx.exit(1)
409
+
410
+
411
+ @click.command()
412
+ @click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
413
+ @click.pass_context
414
+ def validate_filled(ctx: click.Context, filled_file: Path) -> None:
415
+ """Validate a filled templates file.
416
+
417
+ Checks that all filled templates are properly formatted.
418
+
419
+ Parameters
420
+ ----------
421
+ ctx : click.Context
422
+ Click context object.
423
+ filled_file : Path
424
+ Path to filled templates file.
425
+
426
+ Examples
427
+ --------
428
+ $ bead templates validate-filled filled.jsonl
429
+ """
430
+ try:
431
+ print_info(f"Validating filled templates: {filled_file}")
432
+
433
+ count = 0
434
+ errors: list[str] = []
435
+
436
+ with open(filled_file, encoding="utf-8") as f:
437
+ for line_num, line in enumerate(f, start=1):
438
+ line = line.strip()
439
+ if not line:
440
+ continue
441
+
442
+ try:
443
+ filled_data = json.loads(line)
444
+ FilledTemplate(**filled_data)
445
+ count += 1
446
+ except json.JSONDecodeError as e:
447
+ errors.append(f"Line {line_num}: Invalid JSON - {e}")
448
+ except ValidationError as e:
449
+ errors.append(f"Line {line_num}: Validation error - {e}")
450
+
451
+ if errors:
452
+ print_error(f"Validation failed with {len(errors)} errors:")
453
+ for error in errors[:10]:
454
+ console.print(f" [red]✗[/red] {error}")
455
+ if len(errors) > 10:
456
+ console.print(f" ... and {len(errors) - 10} more errors")
457
+ ctx.exit(1)
458
+ else:
459
+ print_success(f"Filled templates file is valid: {count} filled templates")
460
+
461
+ except Exception as e:
462
+ print_error(f"Failed to validate filled templates: {e}")
463
+ ctx.exit(1)
464
+
465
+
466
+ @click.command()
467
+ @click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
468
+ @click.pass_context
469
+ def show_stats(ctx: click.Context, filled_file: Path) -> None:
470
+ """Show statistics about filled templates.
471
+
472
+ Parameters
473
+ ----------
474
+ ctx : click.Context
475
+ Click context object.
476
+ filled_file : Path
477
+ Path to filled templates file.
478
+
479
+ Examples
480
+ --------
481
+ $ bead templates show-stats filled.jsonl
482
+ """
483
+ try:
484
+ print_info(f"Analyzing filled templates: {filled_file}")
485
+
486
+ # Collect statistics
487
+ total_count = 0
488
+ templates_seen: set[str] = set()
489
+ strategies_used: dict[str, int] = {}
490
+ text_lengths: list[int] = []
491
+
492
+ with open(filled_file, encoding="utf-8") as f:
493
+ for line in f:
494
+ line = line.strip()
495
+ if not line:
496
+ continue
497
+
498
+ try:
499
+ filled_data = json.loads(line)
500
+ filled = FilledTemplate(**filled_data)
501
+
502
+ total_count += 1
503
+ templates_seen.add(filled.template_name)
504
+ strategies_used[filled.strategy_name] = (
505
+ strategies_used.get(filled.strategy_name, 0) + 1
506
+ )
507
+ text_lengths.append(len(filled.rendered_text))
508
+
509
+ except Exception:
510
+ continue
511
+
512
+ if total_count == 0:
513
+ print_error("No valid filled templates found")
514
+ ctx.exit(1)
515
+
516
+ # Calculate statistics
517
+ avg_length = sum(text_lengths) / len(text_lengths) if text_lengths else 0
518
+ min_length = min(text_lengths) if text_lengths else 0
519
+ max_length = max(text_lengths) if text_lengths else 0
520
+
521
+ # Display statistics table
522
+ table = Table(title="Filled Template Statistics")
523
+ table.add_column("Metric", style="cyan")
524
+ table.add_column("Value", style="green", justify="right")
525
+
526
+ table.add_row("Total Filled Templates", str(total_count))
527
+ table.add_row("Unique Template Names", str(len(templates_seen)))
528
+ table.add_row("", "") # Separator
529
+
530
+ for strategy, count in sorted(strategies_used.items()):
531
+ table.add_row(f"Strategy: {strategy}", str(count))
532
+
533
+ table.add_row("", "") # Separator
534
+ table.add_row("Avg Text Length", f"{avg_length:.1f}")
535
+ table.add_row("Min Text Length", str(min_length))
536
+ table.add_row("Max Text Length", str(max_length))
537
+
538
+ console.print(table)
539
+
540
+ # Show sample templates
541
+ if templates_seen:
542
+ console.print("\n[cyan]Sample Template Names:[/cyan]")
543
+ for name in sorted(templates_seen)[:5]:
544
+ console.print(f" • {name}")
545
+ if len(templates_seen) > 5:
546
+ console.print(f" ... and {len(templates_seen) - 5} more")
547
+
548
+ except Exception as e:
549
+ print_error(f"Failed to show statistics: {e}")
550
+ ctx.exit(1)
551
+
552
+
553
+ @click.command()
554
+ @click.argument("template_file", type=click.Path(exists=True, path_type=Path))
555
+ @click.argument(
556
+ "lexicon_files",
557
+ nargs=-1,
558
+ type=click.Path(exists=True, path_type=Path),
559
+ required=True,
560
+ )
561
+ @click.option(
562
+ "--language-code",
563
+ help="ISO 639 language code to filter items",
564
+ )
565
+ @click.pass_context
566
+ def estimate(
567
+ ctx: click.Context,
568
+ template_file: Path,
569
+ lexicon_files: tuple[Path, ...],
570
+ language_code: str | None,
571
+ ) -> None:
572
+ r"""Estimate total combinations for exhaustive filling.
573
+
574
+ Calculates the total number of combinations that would be generated
575
+ by exhaustive template filling without actually generating them.
576
+
577
+ Parameters
578
+ ----------
579
+ ctx : click.Context
580
+ Click context object.
581
+ template_file : Path
582
+ Path to template file.
583
+ lexicon_files : tuple[Path, ...]
584
+ Paths to one or more lexicon files to merge.
585
+ language_code : str | None
586
+ ISO 639 language code filter.
587
+
588
+ Examples
589
+ --------
590
+ # Estimate combinations with single lexicon
591
+ $ bead templates estimate template.jsonl lexicon.jsonl
592
+
593
+ # With multiple lexicons
594
+ $ bead templates estimate template.jsonl nouns.jsonl verbs.jsonl
595
+
596
+ # With language filter
597
+ $ bead templates estimate template.jsonl lexicon.jsonl --language-code eng
598
+ """
599
+ try:
600
+ # Load and merge lexicons
601
+ if not lexicon_files:
602
+ print_error("At least one lexicon file is required")
603
+ ctx.exit(1)
604
+
605
+ print_info(f"Loading {len(lexicon_files)} lexicon(s)")
606
+ merged_lexicon = Lexicon(name="merged", items={})
607
+
608
+ for lex_file in lexicon_files:
609
+ lex = Lexicon.from_jsonl(str(lex_file), lex_file.stem)
610
+ merged_lexicon.items.update(lex.items)
611
+
612
+ print_info(f"Total merged lexicon: {len(merged_lexicon)} items")
613
+ lexicon = merged_lexicon
614
+
615
+ # Load templates
616
+ print_info(f"Loading templates from {template_file}")
617
+ template_collection = TemplateCollection.from_jsonl(
618
+ str(template_file), "templates"
619
+ )
620
+
621
+ # Calculate estimates for each template
622
+ table = Table(title="Combination Estimates")
623
+ table.add_column("Template", style="cyan")
624
+ table.add_column("Slots", justify="right", style="yellow")
625
+ table.add_column("Combinations", justify="right", style="green")
626
+
627
+ total_combinations = 0
628
+
629
+ for template in template_collection:
630
+ # Get lexical items for each slot
631
+ slot_lists: list[list[str]] = []
632
+ for _slot_name in template.slots:
633
+ items = [
634
+ item.lemma
635
+ for item in lexicon
636
+ if language_code is None or item.language_code == language_code
637
+ ]
638
+ slot_lists.append(items)
639
+
640
+ # Estimate combinations
641
+ num_combos = count_combinations(*slot_lists)
642
+ total_combinations += num_combos
643
+
644
+ table.add_row(
645
+ template.name,
646
+ str(len(template.slots)),
647
+ f"{num_combos:,}",
648
+ )
649
+
650
+ # Add total row
651
+ table.add_section()
652
+ table.add_row(
653
+ "[bold]TOTAL[/bold]",
654
+ "",
655
+ f"[bold]{total_combinations:,}[/bold]",
656
+ )
657
+
658
+ console.print(table)
659
+
660
+ # Warn if combinations are very large
661
+ if total_combinations > 1_000_000:
662
+ print_info(
663
+ "\n⚠️ Warning: Exhaustive filling will generate over 1 million "
664
+ "combinations. Consider using random or stratified strategies instead."
665
+ )
666
+ elif total_combinations > 100_000:
667
+ print_info(
668
+ "\n⚠️ Warning: Exhaustive filling will generate over 100K "
669
+ "combinations. This may take significant time."
670
+ )
671
+
672
+ except Exception as e:
673
+ print_error(f"Failed to estimate combinations: {e}")
674
+ ctx.exit(1)
675
+
676
+
677
+ @click.command()
678
+ @click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
679
+ @click.argument("output_file", type=click.Path(path_type=Path))
680
+ @click.option(
681
+ "--expression",
682
+ help="Filter expression (DSL) to apply to filled templates",
683
+ )
684
+ @click.option(
685
+ "--min-length",
686
+ type=int,
687
+ help="Minimum text length",
688
+ )
689
+ @click.option(
690
+ "--max-length",
691
+ type=int,
692
+ help="Maximum text length",
693
+ )
694
+ @click.option(
695
+ "--template-name",
696
+ help="Filter by template name (exact match)",
697
+ )
698
+ @click.option(
699
+ "--strategy",
700
+ help="Filter by strategy name",
701
+ )
702
+ @click.pass_context
703
+ def filter_filled(
704
+ ctx: click.Context,
705
+ filled_file: Path,
706
+ output_file: Path,
707
+ expression: str | None,
708
+ min_length: int | None,
709
+ max_length: int | None,
710
+ template_name: str | None,
711
+ strategy: str | None,
712
+ ) -> None:
713
+ """Filter filled templates by various criteria.
714
+
715
+ Parameters
716
+ ----------
717
+ ctx : click.Context
718
+ Click context object.
719
+ filled_file : Path
720
+ Path to filled templates file.
721
+ output_file : Path
722
+ Path to output filtered file.
723
+ expression : str | None
724
+ DSL expression for filtering.
725
+ min_length : int | None
726
+ Minimum text length.
727
+ max_length : int | None
728
+ Maximum text length.
729
+ template_name : str | None
730
+ Template name filter.
731
+ strategy : str | None
732
+ Strategy name filter.
733
+
734
+ Examples
735
+ --------
736
+ $ bead templates filter-filled filled.jsonl filtered.jsonl --min-length 10
737
+ $ bead templates filter-filled filled.jsonl filtered.jsonl --template-name active
738
+ """
739
+ try:
740
+ print_info(f"Filtering filled templates from: {filled_file}")
741
+
742
+ filtered_count = 0
743
+ total_count = 0
744
+
745
+ output_file.parent.mkdir(parents=True, exist_ok=True)
746
+ with open(output_file, "w", encoding="utf-8") as out_f:
747
+ with open(filled_file, encoding="utf-8") as in_f:
748
+ for line in in_f:
749
+ line = line.strip()
750
+ if not line:
751
+ continue
752
+
753
+ total_count += 1
754
+
755
+ try:
756
+ filled_data = json.loads(line)
757
+ filled = FilledTemplate(**filled_data)
758
+
759
+ # Apply filters
760
+ if min_length and len(filled.rendered_text) < min_length:
761
+ continue
762
+ if max_length and len(filled.rendered_text) > max_length:
763
+ continue
764
+ if template_name and filled.template_name != template_name:
765
+ continue
766
+ if strategy and filled.strategy_name != strategy:
767
+ continue
768
+
769
+ # DSL expression filtering would go here
770
+ if expression:
771
+ print_info(
772
+ "DSL expression filtering not yet implemented, skipping"
773
+ )
774
+
775
+ # Passed all filters
776
+ out_f.write(line + "\n")
777
+ filtered_count += 1
778
+
779
+ except Exception as e:
780
+ print_error(f"Error processing line: {e}")
781
+ continue
782
+
783
+ print_success(
784
+ f"Filtered {filtered_count} of {total_count} templates: {output_file}"
785
+ )
786
+
787
+ except Exception as e:
788
+ print_error(f"Failed to filter filled templates: {e}")
789
+ ctx.exit(1)
790
+
791
+
792
+ @click.command()
793
+ @click.argument("input_files", nargs=-1, type=click.Path(exists=True, path_type=Path))
794
+ @click.argument("output_file", type=click.Path(path_type=Path))
795
+ @click.option(
796
+ "--deduplicate",
797
+ is_flag=True,
798
+ help="Remove duplicates by UUID",
799
+ )
800
+ @click.pass_context
801
+ def merge_filled(
802
+ ctx: click.Context,
803
+ input_files: tuple[Path, ...],
804
+ output_file: Path,
805
+ deduplicate: bool,
806
+ ) -> None:
807
+ """Merge multiple filled template files.
808
+
809
+ Parameters
810
+ ----------
811
+ ctx : click.Context
812
+ Click context object.
813
+ input_files : tuple[Path, ...]
814
+ Input filled template files.
815
+ output_file : Path
816
+ Output merged file.
817
+ deduplicate : bool
818
+ Remove duplicates by UUID.
819
+
820
+ Examples
821
+ --------
822
+ $ bead templates merge-filled file1.jsonl file2.jsonl merged.jsonl
823
+ $ bead templates merge-filled *.jsonl merged.jsonl --deduplicate
824
+ """
825
+ try:
826
+ if not input_files:
827
+ print_error("No input files provided")
828
+ ctx.exit(1)
829
+
830
+ print_info(f"Merging {len(input_files)} filled template files")
831
+
832
+ seen_ids: set[str] = set()
833
+ merged_count = 0
834
+ duplicate_count = 0
835
+
836
+ output_file.parent.mkdir(parents=True, exist_ok=True)
837
+ with open(output_file, "w", encoding="utf-8") as out_f:
838
+ for input_file in input_files:
839
+ print_info(f" Processing: {input_file}")
840
+ with open(input_file, encoding="utf-8") as in_f:
841
+ for line in in_f:
842
+ line = line.strip()
843
+ if not line:
844
+ continue
845
+
846
+ try:
847
+ filled_data = json.loads(line)
848
+ filled = FilledTemplate(**filled_data)
849
+
850
+ if deduplicate:
851
+ if str(filled.id) in seen_ids:
852
+ duplicate_count += 1
853
+ continue
854
+ seen_ids.add(str(filled.id))
855
+
856
+ out_f.write(line + "\n")
857
+ merged_count += 1
858
+
859
+ except Exception as e:
860
+ print_error(f"Error processing line from {input_file}: {e}")
861
+ continue
862
+
863
+ print_success(f"Merged {merged_count} filled templates: {output_file}")
864
+ if deduplicate and duplicate_count > 0:
865
+ print_info(f"Removed {duplicate_count} duplicates")
866
+
867
+ except Exception as e:
868
+ print_error(f"Failed to merge filled templates: {e}")
869
+ ctx.exit(1)
870
+
871
+
872
+ @click.command()
873
+ @click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
874
+ @click.argument("output_file", type=click.Path(path_type=Path))
875
+ @click.pass_context
876
+ def export_csv(
877
+ ctx: click.Context,
878
+ filled_file: Path,
879
+ output_file: Path,
880
+ ) -> None:
881
+ """Export filled templates to CSV format.
882
+
883
+ Parameters
884
+ ----------
885
+ ctx : click.Context
886
+ Click context object.
887
+ filled_file : Path
888
+ Input filled templates file (JSONL).
889
+ output_file : Path
890
+ Output CSV file.
891
+
892
+ Examples
893
+ --------
894
+ $ bead templates export-csv filled.jsonl filled.csv
895
+ """
896
+ try:
897
+ print_info(f"Exporting filled templates to CSV: {output_file}")
898
+
899
+ filled_templates: list[FilledTemplate] = []
900
+
901
+ with open(filled_file, encoding="utf-8") as f:
902
+ for line in f:
903
+ line = line.strip()
904
+ if not line:
905
+ continue
906
+
907
+ try:
908
+ filled_data = json.loads(line)
909
+ filled = FilledTemplate(**filled_data)
910
+ filled_templates.append(filled)
911
+ except Exception:
912
+ continue
913
+
914
+ if not filled_templates:
915
+ print_error("No valid filled templates found")
916
+ ctx.exit(1)
917
+
918
+ # Write to CSV
919
+ output_file.parent.mkdir(parents=True, exist_ok=True)
920
+ with open(output_file, "w", newline="", encoding="utf-8") as f:
921
+ writer = csv_module.writer(f)
922
+
923
+ # Header
924
+ writer.writerow(
925
+ [
926
+ "id",
927
+ "template_id",
928
+ "template_name",
929
+ "rendered_text",
930
+ "strategy_name",
931
+ "slot_count",
932
+ ]
933
+ )
934
+
935
+ # Data
936
+ for filled in filled_templates:
937
+ writer.writerow(
938
+ [
939
+ str(filled.id),
940
+ str(filled.template_id),
941
+ filled.template_name,
942
+ filled.rendered_text,
943
+ filled.strategy_name,
944
+ len(filled.slot_fillers),
945
+ ]
946
+ )
947
+
948
+ print_success(
949
+ f"Exported {len(filled_templates)} filled templates to CSV: {output_file}"
950
+ )
951
+
952
+ except Exception as e:
953
+ print_error(f"Failed to export to CSV: {e}")
954
+ ctx.exit(1)
955
+
956
+
957
+ @click.command()
958
+ @click.argument("filled_file", type=click.Path(exists=True, path_type=Path))
959
+ @click.argument("output_file", type=click.Path(path_type=Path))
960
+ @click.option(
961
+ "--pretty",
962
+ is_flag=True,
963
+ help="Pretty-print JSON with indentation",
964
+ )
965
+ @click.pass_context
966
+ def export_json(
967
+ ctx: click.Context,
968
+ filled_file: Path,
969
+ output_file: Path,
970
+ pretty: bool,
971
+ ) -> None:
972
+ """Export filled templates to JSON array format.
973
+
974
+ Parameters
975
+ ----------
976
+ ctx : click.Context
977
+ Click context object.
978
+ filled_file : Path
979
+ Input filled templates file (JSONL).
980
+ output_file : Path
981
+ Output JSON file.
982
+ pretty : bool
983
+ Pretty-print with indentation.
984
+
985
+ Examples
986
+ --------
987
+ $ bead templates export-json filled.jsonl filled.json
988
+ $ bead templates export-json filled.jsonl filled.json --pretty
989
+ """
990
+ try:
991
+ print_info(f"Exporting filled templates to JSON: {output_file}")
992
+
993
+ filled_templates: list[dict[str, JsonValue]] = []
994
+
995
+ with open(filled_file, encoding="utf-8") as f:
996
+ for line in f:
997
+ line = line.strip()
998
+ if not line:
999
+ continue
1000
+
1001
+ try:
1002
+ filled_data = json.loads(line)
1003
+ FilledTemplate(**filled_data) # Validate
1004
+ filled_templates.append(filled_data)
1005
+ except Exception:
1006
+ continue
1007
+
1008
+ if not filled_templates:
1009
+ print_error("No valid filled templates found")
1010
+ ctx.exit(1)
1011
+
1012
+ # Write to JSON
1013
+ output_file.parent.mkdir(parents=True, exist_ok=True)
1014
+ with open(output_file, "w", encoding="utf-8") as f:
1015
+ if pretty:
1016
+ json.dump(filled_templates, f, indent=2, ensure_ascii=False)
1017
+ else:
1018
+ json.dump(filled_templates, f, ensure_ascii=False)
1019
+
1020
+ print_success(
1021
+ f"Exported {len(filled_templates)} filled templates to JSON: {output_file}"
1022
+ )
1023
+
1024
+ except Exception as e:
1025
+ print_error(f"Failed to export to JSON: {e}")
1026
+ ctx.exit(1)
1027
+
1028
+
1029
+ @click.command()
1030
+ @click.argument("template_file", type=click.Path(exists=True, path_type=Path))
1031
+ @click.argument(
1032
+ "lexicon_files",
1033
+ nargs=-1,
1034
+ type=click.Path(exists=True, path_type=Path),
1035
+ required=True,
1036
+ )
1037
+ @click.argument("output_file", type=click.Path(path_type=Path))
1038
+ @click.option(
1039
+ "--n-samples",
1040
+ type=int,
1041
+ required=True,
1042
+ help="Number of samples to generate",
1043
+ )
1044
+ @click.option(
1045
+ "--seed",
1046
+ type=int,
1047
+ help="Random seed for reproducibility",
1048
+ )
1049
+ @click.option(
1050
+ "--language-code",
1051
+ help="ISO 639 language code to filter items",
1052
+ )
1053
+ @click.pass_context
1054
+ def sample_combinations(
1055
+ ctx: click.Context,
1056
+ template_file: Path,
1057
+ lexicon_files: tuple[Path, ...],
1058
+ output_file: Path,
1059
+ n_samples: int,
1060
+ seed: int | None,
1061
+ language_code: str | None,
1062
+ ) -> None:
1063
+ r"""Sample template-lexicon combinations with stratified sampling.
1064
+
1065
+ Uses stratified sampling to ensure diverse coverage of the combination space
1066
+ without exhaustive generation.
1067
+
1068
+ Parameters
1069
+ ----------
1070
+ ctx : click.Context
1071
+ Click context object.
1072
+ template_file : Path
1073
+ Path to template file.
1074
+ lexicon_files : tuple[Path, ...]
1075
+ Paths to one or more lexicon files to merge.
1076
+ output_file : Path
1077
+ Path to output sampled combinations.
1078
+ n_samples : int
1079
+ Number of samples to generate.
1080
+ seed : int | None
1081
+ Random seed.
1082
+ language_code : str | None
1083
+ Language code filter.
1084
+
1085
+ Examples
1086
+ --------
1087
+ # Single lexicon
1088
+ $ bead templates sample-combinations template.jsonl lexicon.jsonl samples.jsonl \\
1089
+ --n-samples 1000 --seed 42
1090
+
1091
+ # Multiple lexicons
1092
+ $ bead templates sample-combinations tpl.jsonl nouns.jsonl verbs.jsonl out.jsonl \\
1093
+ --n-samples 1000 --seed 42
1094
+ """
1095
+ try:
1096
+ # Load and merge lexicons
1097
+ if not lexicon_files:
1098
+ print_error("At least one lexicon file is required")
1099
+ ctx.exit(1)
1100
+
1101
+ print_info(f"Loading {len(lexicon_files)} lexicon(s)")
1102
+ merged_lexicon = Lexicon(name="merged", items={})
1103
+
1104
+ for lex_file in lexicon_files:
1105
+ lex = Lexicon.from_jsonl(str(lex_file), lex_file.stem)
1106
+ print_info(f" Loaded {len(lex)} items from {lex_file.name}")
1107
+ merged_lexicon.items.update(lex.items)
1108
+
1109
+ print_info(f"Total merged lexicon: {len(merged_lexicon)} items")
1110
+ lexicon = merged_lexicon
1111
+
1112
+ # Load templates
1113
+ print_info(f"Loading templates from {template_file}")
1114
+ template_collection = TemplateCollection.from_jsonl(
1115
+ str(template_file), "templates"
1116
+ )
1117
+
1118
+ # Use random strategy for sampling
1119
+ print_info(f"Generating {n_samples} stratified samples")
1120
+ strategy = RandomStrategy(n_samples=n_samples, seed=seed)
1121
+ filler = StrategyFiller(lexicon=lexicon, strategy=strategy)
1122
+
1123
+ # Fill templates
1124
+ all_filled: list[FilledTemplate] = []
1125
+ for template in template_collection:
1126
+ try:
1127
+ filled_templates = filler.fill(template, language_code)
1128
+ all_filled.extend(filled_templates)
1129
+ except ValueError as e:
1130
+ print_error(f"Failed to fill template '{template.name}': {e}")
1131
+ continue
1132
+
1133
+ # Save sampled combinations
1134
+ output_file.parent.mkdir(parents=True, exist_ok=True)
1135
+ with open(output_file, "w", encoding="utf-8") as f:
1136
+ for filled in all_filled:
1137
+ f.write(filled.model_dump_json() + "\n")
1138
+
1139
+ print_success(
1140
+ f"Generated {len(all_filled)} sampled combinations: {output_file}"
1141
+ )
1142
+
1143
+ except Exception as e:
1144
+ print_error(f"Failed to sample combinations: {e}")
1145
+ ctx.exit(1)
1146
+
1147
+
1148
+ # Register commands
1149
+ templates.add_command(fill)
1150
+ templates.add_command(list_filled)
1151
+ templates.add_command(validate_filled)
1152
+ templates.add_command(show_stats)
1153
+ templates.add_command(estimate, name="estimate-combinations")
1154
+ templates.add_command(filter_filled)
1155
+ templates.add_command(merge_filled)
1156
+ templates.add_command(export_csv)
1157
+ templates.add_command(export_json)
1158
+ templates.add_command(sample_combinations)