bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/resources.py ADDED
@@ -0,0 +1,1036 @@
1
+ """Resource management commands for bead CLI.
2
+
3
+ This module provides commands for creating, listing, and validating
4
+ lexicons and templates (Stage 1 of the bead pipeline).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import csv
10
+ import json
11
+ import re
12
+ from itertools import product
13
+ from pathlib import Path
14
+ from typing import Any, cast
15
+
16
+ import click
17
+ from pydantic import ValidationError
18
+ from rich.console import Console
19
+ from rich.table import Table
20
+
21
+ from bead.cli.constraint_builders import create_constraint
22
+ from bead.cli.resource_loaders import (
23
+ import_framenet,
24
+ import_propbank,
25
+ import_unimorph,
26
+ import_verbnet,
27
+ )
28
+ from bead.cli.utils import print_error, print_info, print_success
29
+ from bead.data.base import JsonValue
30
+ from bead.resources.lexical_item import LexicalItem
31
+ from bead.resources.lexicon import Lexicon
32
+ from bead.resources.template import Slot, Template
33
+ from bead.resources.template_collection import TemplateCollection
34
+
35
+ console = Console()
36
+
37
+
38
+ @click.group()
39
+ def resources() -> None:
40
+ r"""Resource management commands (Stage 1).
41
+
42
+ Commands for creating, validating, and managing lexicons and templates.
43
+
44
+ \b
45
+ Examples:
46
+ $ bead resources create-lexicon lexicon.jsonl --name verbs \\
47
+ --from-csv verbs.csv
48
+ $ bead resources create-template template.jsonl --name transitive \\
49
+ --template-string "{subject} {verb} {object}"
50
+ $ bead resources list-lexicons --directory lexicons/
51
+ $ bead resources validate-lexicon lexicon.jsonl
52
+ """
53
+
54
+
55
+ @resources.command()
56
+ @click.argument("output_file", type=click.Path(path_type=Path))
57
+ @click.option("--name", required=True, help="Lexicon name")
58
+ @click.option(
59
+ "--from-csv",
60
+ "csv_file",
61
+ type=click.Path(exists=True, path_type=Path),
62
+ help="Create from CSV file (requires 'lemma' column, optional 'pos', 'form', etc.)",
63
+ )
64
+ @click.option(
65
+ "--from-json",
66
+ "json_file",
67
+ type=click.Path(exists=True, path_type=Path),
68
+ help="Create from JSON file (array of lexical item objects)",
69
+ )
70
+ @click.option("--language-code", help="ISO 639 language code (e.g., 'eng', 'en')")
71
+ @click.option("--description", help="Description of the lexicon")
72
+ @click.pass_context
73
+ def create_lexicon(
74
+ ctx: click.Context,
75
+ output_file: Path,
76
+ name: str,
77
+ csv_file: Path | None,
78
+ json_file: Path | None,
79
+ language_code: str | None,
80
+ description: str | None,
81
+ ) -> None:
82
+ r"""Create a lexicon from various sources.
83
+
84
+ Parameters
85
+ ----------
86
+ ctx : click.Context
87
+ Click context object.
88
+ output_file : Path
89
+ Path to output lexicon file.
90
+ name : str
91
+ Name for the lexicon.
92
+ csv_file : Path | None
93
+ Path to CSV source file.
94
+ json_file : Path | None
95
+ Path to JSON source file.
96
+ language_code : str | None
97
+ ISO 639 language code.
98
+ description : str | None
99
+ Description of the lexicon.
100
+
101
+ Examples
102
+ --------
103
+ # Create from CSV file
104
+ $ bead resources create-lexicon lexicon.jsonl --name verbs --from-csv verbs.csv
105
+
106
+ # Create from JSON file
107
+ $ bead resources create-lexicon lexicon.jsonl --name verbs --from-json verbs.json
108
+
109
+ # With language code
110
+ $ bead resources create-lexicon lexicon.jsonl --name verbs \\
111
+ --from-csv verbs.csv --language-code eng
112
+ """
113
+ try:
114
+ # Validate that exactly one source is provided
115
+ sources = [csv_file, json_file]
116
+ provided_sources = [s for s in sources if s is not None]
117
+
118
+ if len(provided_sources) == 0:
119
+ print_error("Must provide one source: --from-csv or --from-json")
120
+ ctx.exit(1)
121
+ elif len(provided_sources) > 1:
122
+ print_error("Only one source allowed: --from-csv or --from-json")
123
+ ctx.exit(1)
124
+
125
+ # Create lexicon
126
+ lexicon = Lexicon(
127
+ name=name,
128
+ language_code=language_code,
129
+ description=description,
130
+ )
131
+
132
+ # Determine language code for items (default to "eng" if not specified)
133
+ item_language_code = language_code or "eng"
134
+
135
+ # Load from source
136
+ if csv_file:
137
+ print_info(f"Loading lexical items from CSV: {csv_file}")
138
+ with open(csv_file, encoding="utf-8") as f:
139
+ reader = csv.DictReader(f)
140
+ for row in reader:
141
+ if "lemma" not in row:
142
+ print_error("CSV must have 'lemma' column")
143
+ ctx.exit(1)
144
+
145
+ item_data: dict[str, Any] = {
146
+ "lemma": row["lemma"],
147
+ "language_code": item_language_code,
148
+ }
149
+
150
+ if "form" in row and row["form"]:
151
+ item_data["form"] = row["form"]
152
+ if "source" in row and row["source"]:
153
+ item_data["source"] = row["source"]
154
+
155
+ # Build features dict from pos, feature_ columns, and attr_ columns
156
+ features: dict[str, Any] = {}
157
+
158
+ # Add pos to features if present
159
+ if "pos" in row and row["pos"]:
160
+ features["pos"] = row["pos"]
161
+
162
+ # Extract features (columns with feature_ prefix)
163
+ for key, value in row.items():
164
+ if key.startswith("feature_") and value:
165
+ features[key[8:]] = value
166
+
167
+ # Extract attributes (columns with attr_ prefix) into features
168
+ for key, value in row.items():
169
+ if key.startswith("attr_") and value:
170
+ features[key[5:]] = value
171
+
172
+ if features:
173
+ item_data["features"] = features
174
+
175
+ item = LexicalItem(**item_data)
176
+ lexicon.add(item)
177
+
178
+ elif json_file:
179
+ print_info(f"Loading lexical items from JSON: {json_file}")
180
+ with open(json_file, encoding="utf-8") as f:
181
+ raw_data = json.load(f)
182
+
183
+ if not isinstance(raw_data, list):
184
+ print_error("JSON file must contain an array of lexical items")
185
+ ctx.exit(1)
186
+
187
+ data = cast(list[dict[str, JsonValue]], raw_data)
188
+
189
+ for raw_item_untyped in data:
190
+ # Extract required lemma field
191
+ if "lemma" not in raw_item_untyped or not isinstance(
192
+ raw_item_untyped["lemma"], str
193
+ ):
194
+ continue
195
+ lemma: str = raw_item_untyped["lemma"]
196
+
197
+ # Extract optional form field
198
+ form: str | None = None
199
+ if "form" in raw_item_untyped and isinstance(
200
+ raw_item_untyped["form"], str
201
+ ):
202
+ form = raw_item_untyped["form"]
203
+
204
+ # Extract language_code
205
+ lang_code: str = item_language_code
206
+ if "language_code" in raw_item_untyped and isinstance(
207
+ raw_item_untyped["language_code"], str
208
+ ):
209
+ lang_code = raw_item_untyped["language_code"]
210
+
211
+ # Extract optional source field
212
+ source: str | None = None
213
+ if "source" in raw_item_untyped and isinstance(
214
+ raw_item_untyped["source"], str
215
+ ):
216
+ source = raw_item_untyped["source"]
217
+
218
+ # Handle features dict - copy all key-value pairs
219
+ json_features: dict[str, str | int | float | bool | None] = {}
220
+ if "features" in raw_item_untyped:
221
+ features_value = raw_item_untyped["features"]
222
+ if isinstance(features_value, dict):
223
+ for k, v in features_value.items():
224
+ if isinstance(v, str | int | float | bool) or v is None:
225
+ json_features[k] = v
226
+
227
+ # Move pos to features if present at top level
228
+ if "pos" in raw_item_untyped and isinstance(
229
+ raw_item_untyped["pos"], str
230
+ ):
231
+ json_features["pos"] = raw_item_untyped["pos"]
232
+
233
+ # Build LexicalItem
234
+ if form is None and source is None:
235
+ item = LexicalItem(
236
+ lemma=lemma, language_code=lang_code, features=json_features
237
+ ) # type: ignore[arg-type]
238
+ elif form is None:
239
+ item = LexicalItem(
240
+ lemma=lemma,
241
+ language_code=lang_code,
242
+ features=json_features,
243
+ source=source,
244
+ ) # type: ignore[arg-type]
245
+ elif source is None:
246
+ item = LexicalItem(
247
+ lemma=lemma,
248
+ form=form,
249
+ language_code=lang_code,
250
+ features=json_features,
251
+ ) # type: ignore[arg-type]
252
+ else:
253
+ item = LexicalItem(
254
+ lemma=lemma,
255
+ form=form,
256
+ language_code=lang_code,
257
+ features=json_features,
258
+ source=source,
259
+ ) # type: ignore[arg-type]
260
+
261
+ lexicon.add(item)
262
+
263
+ # Save lexicon
264
+ output_file.parent.mkdir(parents=True, exist_ok=True)
265
+ lexicon.to_jsonl(str(output_file))
266
+
267
+ print_success(
268
+ f"Created lexicon '{name}' with {len(lexicon)} items: {output_file}"
269
+ )
270
+
271
+ except ValidationError as e:
272
+ print_error(f"Validation error: {e}")
273
+ ctx.exit(1)
274
+ except Exception as e:
275
+ print_error(f"Failed to create lexicon: {e}")
276
+ ctx.exit(1)
277
+
278
+
279
+ @resources.command()
280
+ @click.argument("output_file", type=click.Path(path_type=Path))
281
+ @click.option("--name", required=True, help="Template name")
282
+ @click.option(
283
+ "--template-string",
284
+ required=True,
285
+ help="Template string with {slot_name} placeholders",
286
+ )
287
+ @click.option("--language-code", help="ISO 639 language code")
288
+ @click.option("--description", help="Template description")
289
+ @click.option(
290
+ "--slot",
291
+ "slots",
292
+ multiple=True,
293
+ help=(
294
+ "Slot definition in format: name:required "
295
+ "(e.g., 'subject:true', 'object:false')"
296
+ ),
297
+ )
298
+ @click.pass_context
299
+ def create_template(
300
+ ctx: click.Context,
301
+ output_file: Path,
302
+ name: str,
303
+ template_string: str,
304
+ language_code: str | None,
305
+ description: str | None,
306
+ slots: tuple[str, ...],
307
+ ) -> None:
308
+ r"""Create a template with slots and constraints.
309
+
310
+ Parameters
311
+ ----------
312
+ ctx : click.Context
313
+ Click context object.
314
+ output_file : Path
315
+ Path to output template file.
316
+ name : str
317
+ Name for the template.
318
+ template_string : str
319
+ Template string with {slot_name} placeholders.
320
+ language_code : str | None
321
+ ISO 639 language code.
322
+ description : str | None
323
+ Description of the template.
324
+ slots : tuple[str, ...]
325
+ Slot definitions in format "name:required".
326
+
327
+ Examples
328
+ --------
329
+ # Create simple template
330
+ $ bead resources create-template template.jsonl \\
331
+ --name transitive \\
332
+ --template-string "{subject} {verb} {object}"
333
+
334
+ # With slot specifications
335
+ $ bead resources create-template template.jsonl \\
336
+ --name transitive \\
337
+ --template-string "{subject} {verb} {object}" \\
338
+ --slot subject:true \\
339
+ --slot verb:true \\
340
+ --slot object:false
341
+ """
342
+ try:
343
+ # Parse slot definitions
344
+ slot_dict: dict[str, Slot] = {}
345
+
346
+ # Extract slot names from template string
347
+ slot_names = re.findall(r"\{(\w+)\}", template_string)
348
+
349
+ if not slot_names:
350
+ print_error(
351
+ "Template string must contain at least one {slot_name} placeholder"
352
+ )
353
+ ctx.exit(1)
354
+
355
+ # Parse explicit slot definitions
356
+ explicit_slots: dict[str, bool] = {}
357
+ for slot_def in slots:
358
+ if ":" not in slot_def:
359
+ print_error(
360
+ f"Invalid slot definition: {slot_def}. Use format 'name:required'"
361
+ )
362
+ ctx.exit(1)
363
+
364
+ slot_name, required_str = slot_def.split(":", 1)
365
+ required = required_str.lower() in ("true", "yes", "1")
366
+ explicit_slots[slot_name] = required
367
+
368
+ # Create slot objects for all slot names in template
369
+ for slot_name in slot_names:
370
+ required = explicit_slots.get(slot_name, True)
371
+ slot_dict[slot_name] = Slot(name=slot_name, required=required)
372
+
373
+ # Create template
374
+ template = Template(
375
+ name=name,
376
+ template_string=template_string,
377
+ slots=slot_dict,
378
+ language_code=language_code,
379
+ description=description,
380
+ )
381
+
382
+ # Create collection and add template
383
+ collection = TemplateCollection(
384
+ name=f"{name}_collection",
385
+ language_code=language_code,
386
+ )
387
+ collection.add(template)
388
+
389
+ # Save collection
390
+ output_file.parent.mkdir(parents=True, exist_ok=True)
391
+ collection.to_jsonl(str(output_file))
392
+
393
+ print_success(
394
+ f"Created template '{name}' with {len(slot_dict)} slots: {output_file}"
395
+ )
396
+
397
+ except ValidationError as e:
398
+ print_error(f"Validation error: {e}")
399
+ ctx.exit(1)
400
+ except Exception as e:
401
+ print_error(f"Failed to create template: {e}")
402
+ ctx.exit(1)
403
+
404
+
405
+ @resources.command()
406
+ @click.option(
407
+ "--directory",
408
+ type=click.Path(exists=True, file_okay=False, path_type=Path),
409
+ default=Path.cwd(),
410
+ help="Directory to search for lexicon files",
411
+ )
412
+ @click.option(
413
+ "--pattern",
414
+ default="*.jsonl",
415
+ help="File pattern to match (default: *.jsonl)",
416
+ )
417
+ @click.pass_context
418
+ def list_lexicons(
419
+ ctx: click.Context,
420
+ directory: Path,
421
+ pattern: str,
422
+ ) -> None:
423
+ """List available lexicons in a directory.
424
+
425
+ Parameters
426
+ ----------
427
+ ctx : click.Context
428
+ Click context object.
429
+ directory : Path
430
+ Directory to search for lexicon files.
431
+ pattern : str
432
+ File pattern to match.
433
+
434
+ Examples
435
+ --------
436
+ $ bead resources list-lexicons
437
+ $ bead resources list-lexicons --directory lexicons/
438
+ $ bead resources list-lexicons --pattern "verb*.jsonl"
439
+ """
440
+ try:
441
+ lexicon_files = list(directory.glob(pattern))
442
+
443
+ if not lexicon_files:
444
+ print_info(f"No lexicon files found in {directory} matching {pattern}")
445
+ return
446
+
447
+ table = Table(title=f"Lexicons in {directory}")
448
+ table.add_column("File", style="cyan")
449
+ table.add_column("Name", style="green")
450
+ table.add_column("Items", justify="right", style="yellow")
451
+ table.add_column("Language", style="magenta")
452
+
453
+ for file_path in sorted(lexicon_files):
454
+ try:
455
+ # Try to load first item to get lexicon metadata
456
+ with open(file_path, encoding="utf-8") as f:
457
+ first_line = f.readline().strip()
458
+ if not first_line:
459
+ continue
460
+
461
+ # Count total lines
462
+ with open(file_path, encoding="utf-8") as f:
463
+ item_count = sum(1 for line in f if line.strip())
464
+
465
+ # Parse first item to get metadata
466
+ item_data = json.loads(first_line)
467
+ lexicon_name = file_path.stem
468
+ language = item_data.get("language_code", "N/A")
469
+
470
+ table.add_row(
471
+ str(file_path.name),
472
+ lexicon_name,
473
+ str(item_count),
474
+ language,
475
+ )
476
+ except Exception:
477
+ # Skip files that can't be parsed
478
+ continue
479
+
480
+ console.print(table)
481
+
482
+ except Exception as e:
483
+ print_error(f"Failed to list lexicons: {e}")
484
+ ctx.exit(1)
485
+
486
+
487
+ @resources.command()
488
+ @click.option(
489
+ "--directory",
490
+ type=click.Path(exists=True, file_okay=False, path_type=Path),
491
+ default=Path.cwd(),
492
+ help="Directory to search for template files",
493
+ )
494
+ @click.option(
495
+ "--pattern",
496
+ default="*.jsonl",
497
+ help="File pattern to match (default: *.jsonl)",
498
+ )
499
+ @click.pass_context
500
+ def list_templates(
501
+ ctx: click.Context,
502
+ directory: Path,
503
+ pattern: str,
504
+ ) -> None:
505
+ """List available templates in a directory.
506
+
507
+ Parameters
508
+ ----------
509
+ ctx : click.Context
510
+ Click context object.
511
+ directory : Path
512
+ Directory to search for template files.
513
+ pattern : str
514
+ File pattern to match.
515
+
516
+ Examples
517
+ --------
518
+ $ bead resources list-templates
519
+ $ bead resources list-templates --directory templates/
520
+ $ bead resources list-templates --pattern "trans*.jsonl"
521
+ """
522
+ try:
523
+ template_files = list(directory.glob(pattern))
524
+
525
+ if not template_files:
526
+ print_info(f"No template files found in {directory} matching {pattern}")
527
+ return
528
+
529
+ table = Table(title=f"Templates in {directory}")
530
+ table.add_column("File", style="cyan")
531
+ table.add_column("Name", style="green")
532
+ table.add_column("Slots", justify="right", style="yellow")
533
+ table.add_column("Template String", style="white")
534
+
535
+ for file_path in sorted(template_files):
536
+ try:
537
+ # Load first template
538
+ with open(file_path, encoding="utf-8") as f:
539
+ first_line = f.readline().strip()
540
+ if not first_line:
541
+ continue
542
+
543
+ # Parse template
544
+ template_data = json.loads(first_line)
545
+ template_name = template_data.get("name", file_path.stem)
546
+ slot_count = len(template_data.get("slots", {}))
547
+ template_str = template_data.get("template_string", "N/A")
548
+
549
+ # Truncate long template strings
550
+ if len(template_str) > 50:
551
+ template_str = template_str[:47] + "..."
552
+
553
+ table.add_row(
554
+ str(file_path.name),
555
+ template_name,
556
+ str(slot_count),
557
+ template_str,
558
+ )
559
+ except Exception:
560
+ # Skip files that can't be parsed
561
+ continue
562
+
563
+ console.print(table)
564
+
565
+ except Exception as e:
566
+ print_error(f"Failed to list templates: {e}")
567
+ ctx.exit(1)
568
+
569
+
570
+ @resources.command()
571
+ @click.argument("lexicon_file", type=click.Path(exists=True, path_type=Path))
572
+ @click.pass_context
573
+ def validate_lexicon(ctx: click.Context, lexicon_file: Path) -> None:
574
+ """Validate a lexicon file.
575
+
576
+ Checks that the lexicon file is properly formatted and all items are valid.
577
+
578
+ Parameters
579
+ ----------
580
+ ctx : click.Context
581
+ Click context object.
582
+ lexicon_file : Path
583
+ Path to lexicon file to validate.
584
+
585
+ Examples
586
+ --------
587
+ $ bead resources validate-lexicon lexicon.jsonl
588
+ """
589
+ try:
590
+ print_info(f"Validating lexicon: {lexicon_file}")
591
+
592
+ item_count = 0
593
+ errors: list[str] = []
594
+
595
+ with open(lexicon_file, encoding="utf-8") as f:
596
+ for line_num, line in enumerate(f, start=1):
597
+ line = line.strip()
598
+ if not line:
599
+ continue
600
+
601
+ try:
602
+ item_data = json.loads(line)
603
+ LexicalItem(**item_data)
604
+ item_count += 1
605
+ except json.JSONDecodeError as e:
606
+ errors.append(f"Line {line_num}: Invalid JSON - {e}")
607
+ except ValidationError as e:
608
+ errors.append(f"Line {line_num}: Validation error - {e}")
609
+
610
+ if errors:
611
+ print_error(f"Validation failed with {len(errors)} errors:")
612
+ for error in errors[:10]: # Show first 10 errors
613
+ console.print(f" [red]✗[/red] {error}")
614
+ if len(errors) > 10:
615
+ console.print(f" ... and {len(errors) - 10} more errors")
616
+ ctx.exit(1)
617
+ else:
618
+ print_success(f"Lexicon is valid: {item_count} items")
619
+
620
+ except Exception as e:
621
+ print_error(f"Failed to validate lexicon: {e}")
622
+ ctx.exit(1)
623
+
624
+
625
+ # Add resource loader commands to resources group
626
+ resources.add_command(import_verbnet, name="import-verbnet")
627
+ resources.add_command(import_unimorph, name="import-unimorph")
628
+ resources.add_command(import_propbank, name="import-propbank")
629
+ resources.add_command(import_framenet, name="import-framenet")
630
+
631
+
632
+ @resources.command()
633
+ @click.argument("template_file", type=click.Path(exists=True, path_type=Path))
634
+ @click.pass_context
635
+ def validate_template(ctx: click.Context, template_file: Path) -> None:
636
+ """Validate a template file.
637
+
638
+ Checks that the template file is properly formatted and all templates are valid.
639
+
640
+ Parameters
641
+ ----------
642
+ ctx : click.Context
643
+ Click context object.
644
+ template_file : Path
645
+ Path to template file to validate.
646
+
647
+ Examples
648
+ --------
649
+ $ bead resources validate-template templates.jsonl
650
+ """
651
+ try:
652
+ print_info(f"Validating template: {template_file}")
653
+
654
+ template_count = 0
655
+ errors: list[str] = []
656
+
657
+ with open(template_file, encoding="utf-8") as f:
658
+ for line_num, line in enumerate(f, start=1):
659
+ line = line.strip()
660
+ if not line:
661
+ continue
662
+
663
+ try:
664
+ template_data = json.loads(line)
665
+ Template(**template_data)
666
+ template_count += 1
667
+ except json.JSONDecodeError as e:
668
+ errors.append(f"Line {line_num}: Invalid JSON - {e}")
669
+ except ValidationError as e:
670
+ errors.append(f"Line {line_num}: Validation error - {e}")
671
+
672
+ if errors:
673
+ print_error(f"Validation failed with {len(errors)} errors:")
674
+ for error in errors[:10]: # Show first 10 errors
675
+ console.print(f" [red]✗[/red] {error}")
676
+ if len(errors) > 10:
677
+ console.print(f" ... and {len(errors) - 10} more errors")
678
+ ctx.exit(1)
679
+ else:
680
+ print_success(f"Template file is valid: {template_count} templates")
681
+
682
+ except Exception as e:
683
+ print_error(f"Failed to validate template: {e}")
684
+ ctx.exit(1)
685
+
686
+
687
+ @resources.command()
688
+ @click.argument("output_file", type=click.Path(path_type=Path))
689
+ @click.option(
690
+ "--pattern",
691
+ required=True,
692
+ help="Template pattern with {slot_name} placeholders (e.g., '{subj} {verb}')",
693
+ )
694
+ @click.option(
695
+ "--name",
696
+ required=True,
697
+ help="Template name",
698
+ )
699
+ @click.option(
700
+ "--slot",
701
+ "slots",
702
+ multiple=True,
703
+ help="Slot specification: name:required (e.g., subject:true, object:false)",
704
+ )
705
+ @click.option(
706
+ "--description",
707
+ help="Description of the template",
708
+ )
709
+ @click.option(
710
+ "--language-code",
711
+ help="ISO 639 language code (e.g., 'eng', 'en')",
712
+ )
713
+ @click.option(
714
+ "--tags",
715
+ help="Comma-separated tags for categorization",
716
+ )
717
+ @click.pass_context
718
+ def generate_templates(
719
+ ctx: click.Context,
720
+ output_file: Path,
721
+ pattern: str,
722
+ name: str,
723
+ slots: tuple[str, ...],
724
+ description: str | None,
725
+ language_code: str | None,
726
+ tags: str | None,
727
+ ) -> None:
728
+ r"""Generate templates from pattern specifications.
729
+
730
+ Creates template objects from a pattern string with slot placeholders.
731
+ Slots are automatically extracted from the pattern or explicitly specified.
732
+
733
+ Parameters
734
+ ----------
735
+ ctx : click.Context
736
+ Click context object.
737
+ output_file : Path
738
+ Path to output template file (JSONL).
739
+ pattern : str
740
+ Template pattern with {slot_name} placeholders.
741
+ name : str
742
+ Template name.
743
+ slots : tuple[str, ...]
744
+ Slot specifications (name:required).
745
+ description : str | None
746
+ Template description.
747
+ language_code : str | None
748
+ ISO 639 language code.
749
+ tags : str | None
750
+ Comma-separated tags.
751
+
752
+ Examples
753
+ --------
754
+ # Generate simple template (auto-detect slots)
755
+ $ bead resources generate-templates template.jsonl \\
756
+ --pattern "{subject} {verb} {object}" \\
757
+ --name simple_transitive
758
+
759
+ # With explicit slot specifications
760
+ $ bead resources generate-templates template.jsonl \\
761
+ --pattern "{subject} {verb} {object}" \\
762
+ --name transitive \\
763
+ --slot subject:true \\
764
+ --slot verb:true \\
765
+ --slot object:false \\
766
+ --description "Transitive sentence template"
767
+
768
+ # With language and tags
769
+ $ bead resources generate-templates template.jsonl \\
770
+ --pattern "{subject} {verb} {object}" \\
771
+ --name transitive \\
772
+ --language-code eng \\
773
+ --tags "transitive,simple"
774
+ """
775
+ try:
776
+ # Extract slot names from pattern
777
+ slot_names_in_pattern = set(re.findall(r"\{(\w+)\}", pattern))
778
+
779
+ if not slot_names_in_pattern:
780
+ print_error(
781
+ "No slot placeholders found in pattern.\n\n"
782
+ "Pattern must contain {slot_name} placeholders.\n\n"
783
+ "Example: '{subject} {verb} {object}'"
784
+ )
785
+ ctx.exit(1)
786
+
787
+ # Build slot dictionary
788
+ slot_dict: dict[str, Slot] = {}
789
+
790
+ if slots:
791
+ # Use explicit slot specifications
792
+ for slot_spec in slots:
793
+ if ":" not in slot_spec:
794
+ print_error(
795
+ f"Invalid slot specification: {slot_spec}\n\n"
796
+ f"Format: name:required (e.g., subject:true, object:false)"
797
+ )
798
+ ctx.exit(1)
799
+
800
+ slot_name, required_str = slot_spec.split(":", 1)
801
+ required = required_str.lower() in ("true", "yes", "1", "t", "y")
802
+
803
+ if slot_name not in slot_names_in_pattern:
804
+ print_error(
805
+ f"Slot '{slot_name}' not found in pattern.\n\n"
806
+ f"Available slots: {', '.join(sorted(slot_names_in_pattern))}"
807
+ )
808
+ ctx.exit(1)
809
+
810
+ slot_dict[slot_name] = Slot(name=slot_name, required=required)
811
+ else:
812
+ # Auto-generate slots (all required)
813
+ for slot_name in slot_names_in_pattern:
814
+ slot_dict[slot_name] = Slot(name=slot_name, required=True)
815
+
816
+ # Build template
817
+ template_data: dict[str, Any] = {
818
+ "name": name,
819
+ "template_string": pattern,
820
+ "slots": slot_dict,
821
+ }
822
+
823
+ if description:
824
+ template_data["description"] = description
825
+ if language_code:
826
+ template_data["language_code"] = language_code
827
+ if tags:
828
+ template_data["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
829
+
830
+ template = Template(**template_data)
831
+
832
+ # Save to JSONL
833
+ output_file.parent.mkdir(parents=True, exist_ok=True)
834
+ mode = "a" if output_file.exists() else "w"
835
+ with open(output_file, mode, encoding="utf-8") as f:
836
+ f.write(template.model_dump_json() + "\n")
837
+
838
+ print_success(
839
+ f"Created template '{name}' with {len(slot_dict)} slots: {output_file}"
840
+ )
841
+
842
+ # Show slot details
843
+ console.print("\n[cyan]Slots:[/cyan]")
844
+ for slot_name, slot in sorted(slot_dict.items()):
845
+ required_str = (
846
+ "[green]required[/green]"
847
+ if slot.required
848
+ else "[yellow]optional[/yellow]"
849
+ )
850
+ console.print(f" • {slot_name}: {required_str}")
851
+
852
+ except ValidationError as e:
853
+ print_error(f"Validation error: {e}")
854
+ ctx.exit(1)
855
+ except Exception as e:
856
+ print_error(f"Failed to generate template: {e}")
857
+ ctx.exit(1)
858
+
859
+
860
+ @resources.command()
861
+ @click.argument("base_template_file", type=click.Path(exists=True, path_type=Path))
862
+ @click.argument("output_file", type=click.Path(path_type=Path))
863
+ @click.option(
864
+ "--slot-variants",
865
+ help="JSON file with slot variant specs: {slot_name: [variant1, variant2]}",
866
+ type=click.Path(exists=True, path_type=Path),
867
+ )
868
+ @click.option(
869
+ "--name-pattern",
870
+ default="{base_name}_variant_{index}",
871
+ help="Pattern for variant names (default: {base_name}_variant_{index})",
872
+ )
873
+ @click.option(
874
+ "--max-variants",
875
+ type=int,
876
+ help="Maximum number of variants to generate",
877
+ )
878
+ @click.pass_context
879
+ def generate_template_variants(
880
+ ctx: click.Context,
881
+ base_template_file: Path,
882
+ output_file: Path,
883
+ slot_variants: Path | None,
884
+ name_pattern: str,
885
+ max_variants: int | None,
886
+ ) -> None:
887
+ r"""Generate systematic variations of a base template.
888
+
889
+ Creates template variants by substituting slot configurations or
890
+ reordering slots while preserving the base structure.
891
+
892
+ Parameters
893
+ ----------
894
+ ctx : click.Context
895
+ Click context object.
896
+ base_template_file : Path
897
+ Path to base template file (JSONL).
898
+ output_file : Path
899
+ Path to output variants file (JSONL).
900
+ slot_variants : Path | None
901
+ JSON file with slot variant specifications.
902
+ name_pattern : str
903
+ Pattern for variant names.
904
+ max_variants : int | None
905
+ Maximum number of variants to generate.
906
+
907
+ Examples
908
+ --------
909
+ # Generate variants with slot permutations
910
+ $ bead resources generate-template-variants base.jsonl variants.jsonl \\
911
+ --slot-variants slot_variants.json \\
912
+ --max-variants 10
913
+
914
+ Where slot_variants.json contains:
915
+ {
916
+ "subject": ["{subject}", "{object}"],
917
+ "object": ["{object}", "{subject}"]
918
+ }
919
+
920
+ This creates templates with swapped subject/object positions.
921
+ """
922
+ try:
923
+ print_info(f"Loading base template from {base_template_file}")
924
+
925
+ # Load base template
926
+ with open(base_template_file, encoding="utf-8") as f:
927
+ first_line = f.readline().strip()
928
+ if not first_line:
929
+ print_error("Base template file is empty")
930
+ ctx.exit(1)
931
+
932
+ base_template_data = json.loads(first_line)
933
+ base_template = Template(**base_template_data)
934
+
935
+ variants: list[Template] = []
936
+ base_name = base_template.name
937
+ base_template_string = base_template.template_string
938
+
939
+ if slot_variants:
940
+ # Load slot variant specifications
941
+ print_info(f"Loading slot variants from {slot_variants}")
942
+ with open(slot_variants, encoding="utf-8") as f:
943
+ variant_spec = json.load(f)
944
+
945
+ # Generate all combinations of slot substitutions
946
+ slot_names = list(variant_spec.keys())
947
+ slot_options = [variant_spec[slot] for slot in slot_names]
948
+
949
+ # Generate all combinations
950
+ combinations = list(product(*slot_options))
951
+
952
+ # Limit to max_variants if specified
953
+ if max_variants and len(combinations) > max_variants:
954
+ print_info(
955
+ f"Limiting to {max_variants} variants "
956
+ f"(out of {len(combinations)} possible)"
957
+ )
958
+ combinations = combinations[:max_variants]
959
+
960
+ for idx, combo in enumerate(combinations):
961
+ # Create substitution map
962
+ substitution_map = dict(zip(slot_names, combo, strict=False))
963
+
964
+ # Apply substitutions to template_string
965
+ variant_template_string = base_template_string
966
+ for slot_name, replacement in substitution_map.items():
967
+ variant_template_string = variant_template_string.replace(
968
+ f"{{{slot_name}}}", replacement
969
+ )
970
+
971
+ # Skip if template_string didn't change (original)
972
+ if idx == 0 and variant_template_string == base_template_string:
973
+ continue
974
+
975
+ # Create variant template
976
+ variant_name = name_pattern.format(base_name=base_name, index=idx)
977
+ variant_data = base_template.model_dump()
978
+ variant_data["name"] = variant_name
979
+ variant_data["template_string"] = variant_template_string
980
+ variant_data["metadata"] = {
981
+ **variant_data.get("metadata", {}),
982
+ "variant_index": idx,
983
+ "base_template": base_name,
984
+ "substitutions": substitution_map,
985
+ }
986
+
987
+ variant = Template(**variant_data)
988
+ variants.append(variant)
989
+
990
+ print_success(f"Generated {len(variants)} slot-based template variants")
991
+
992
+ else:
993
+ # Generate simple metadata-only variants
994
+ print_info("No slot variants specified, generating metadata variants")
995
+ num_variants = max_variants or 3
996
+
997
+ for i in range(num_variants):
998
+ variant_name = name_pattern.format(base_name=base_name, index=i)
999
+
1000
+ variant_data = base_template.model_dump()
1001
+ variant_data["name"] = variant_name
1002
+ variant_data["metadata"] = {
1003
+ **variant_data.get("metadata", {}),
1004
+ "variant_index": i,
1005
+ "base_template": base_name,
1006
+ }
1007
+
1008
+ variant = Template(**variant_data)
1009
+ variants.append(variant)
1010
+
1011
+ print_success(f"Generated {len(variants)} metadata-only template variants")
1012
+
1013
+ # Save variants
1014
+ output_file.parent.mkdir(parents=True, exist_ok=True)
1015
+ with open(output_file, "w", encoding="utf-8") as f:
1016
+ for variant in variants:
1017
+ f.write(variant.model_dump_json() + "\n")
1018
+
1019
+ print_success(f"Saved variants to {output_file}")
1020
+
1021
+ except ValidationError as e:
1022
+ print_error(f"Validation error: {e}")
1023
+ ctx.exit(1)
1024
+ except Exception as e:
1025
+ print_error(f"Failed to generate template variants: {e}")
1026
+ ctx.exit(1)
1027
+
1028
+
1029
+ # Register external resource loader commands
1030
+ resources.add_command(import_verbnet)
1031
+ resources.add_command(import_unimorph)
1032
+ resources.add_command(import_propbank)
1033
+ resources.add_command(import_framenet)
1034
+
1035
+ # Register constraint builder command
1036
+ resources.add_command(create_constraint)