bead 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. bead/__init__.py +11 -0
  2. bead/__main__.py +11 -0
  3. bead/active_learning/__init__.py +15 -0
  4. bead/active_learning/config.py +231 -0
  5. bead/active_learning/loop.py +566 -0
  6. bead/active_learning/models/__init__.py +24 -0
  7. bead/active_learning/models/base.py +852 -0
  8. bead/active_learning/models/binary.py +910 -0
  9. bead/active_learning/models/categorical.py +943 -0
  10. bead/active_learning/models/cloze.py +862 -0
  11. bead/active_learning/models/forced_choice.py +956 -0
  12. bead/active_learning/models/free_text.py +773 -0
  13. bead/active_learning/models/lora.py +365 -0
  14. bead/active_learning/models/magnitude.py +835 -0
  15. bead/active_learning/models/multi_select.py +795 -0
  16. bead/active_learning/models/ordinal_scale.py +811 -0
  17. bead/active_learning/models/peft_adapter.py +155 -0
  18. bead/active_learning/models/random_effects.py +639 -0
  19. bead/active_learning/selection.py +354 -0
  20. bead/active_learning/strategies.py +391 -0
  21. bead/active_learning/trainers/__init__.py +26 -0
  22. bead/active_learning/trainers/base.py +210 -0
  23. bead/active_learning/trainers/data_collator.py +172 -0
  24. bead/active_learning/trainers/dataset_utils.py +261 -0
  25. bead/active_learning/trainers/huggingface.py +304 -0
  26. bead/active_learning/trainers/lightning.py +324 -0
  27. bead/active_learning/trainers/metrics.py +424 -0
  28. bead/active_learning/trainers/mixed_effects.py +551 -0
  29. bead/active_learning/trainers/model_wrapper.py +509 -0
  30. bead/active_learning/trainers/registry.py +104 -0
  31. bead/adapters/__init__.py +11 -0
  32. bead/adapters/huggingface.py +61 -0
  33. bead/behavioral/__init__.py +116 -0
  34. bead/behavioral/analytics.py +646 -0
  35. bead/behavioral/extraction.py +343 -0
  36. bead/behavioral/merging.py +343 -0
  37. bead/cli/__init__.py +11 -0
  38. bead/cli/active_learning.py +513 -0
  39. bead/cli/active_learning_commands.py +779 -0
  40. bead/cli/completion.py +359 -0
  41. bead/cli/config.py +624 -0
  42. bead/cli/constraint_builders.py +286 -0
  43. bead/cli/deployment.py +859 -0
  44. bead/cli/deployment_trials.py +493 -0
  45. bead/cli/deployment_ui.py +332 -0
  46. bead/cli/display.py +378 -0
  47. bead/cli/items.py +960 -0
  48. bead/cli/items_factories.py +776 -0
  49. bead/cli/list_constraints.py +714 -0
  50. bead/cli/lists.py +490 -0
  51. bead/cli/main.py +430 -0
  52. bead/cli/models.py +877 -0
  53. bead/cli/resource_loaders.py +621 -0
  54. bead/cli/resources.py +1036 -0
  55. bead/cli/shell.py +356 -0
  56. bead/cli/simulate.py +840 -0
  57. bead/cli/templates.py +1158 -0
  58. bead/cli/training.py +1080 -0
  59. bead/cli/utils.py +614 -0
  60. bead/cli/workflow.py +1273 -0
  61. bead/config/__init__.py +68 -0
  62. bead/config/active_learning.py +1009 -0
  63. bead/config/config.py +192 -0
  64. bead/config/defaults.py +118 -0
  65. bead/config/deployment.py +217 -0
  66. bead/config/env.py +147 -0
  67. bead/config/item.py +45 -0
  68. bead/config/list.py +193 -0
  69. bead/config/loader.py +149 -0
  70. bead/config/logging.py +42 -0
  71. bead/config/model.py +49 -0
  72. bead/config/paths.py +46 -0
  73. bead/config/profiles.py +320 -0
  74. bead/config/resources.py +47 -0
  75. bead/config/serialization.py +210 -0
  76. bead/config/simulation.py +206 -0
  77. bead/config/template.py +238 -0
  78. bead/config/validation.py +267 -0
  79. bead/data/__init__.py +65 -0
  80. bead/data/base.py +87 -0
  81. bead/data/identifiers.py +97 -0
  82. bead/data/language_codes.py +61 -0
  83. bead/data/metadata.py +270 -0
  84. bead/data/range.py +123 -0
  85. bead/data/repository.py +358 -0
  86. bead/data/serialization.py +249 -0
  87. bead/data/timestamps.py +89 -0
  88. bead/data/validation.py +349 -0
  89. bead/data_collection/__init__.py +11 -0
  90. bead/data_collection/jatos.py +223 -0
  91. bead/data_collection/merger.py +154 -0
  92. bead/data_collection/prolific.py +198 -0
  93. bead/deployment/__init__.py +5 -0
  94. bead/deployment/distribution.py +402 -0
  95. bead/deployment/jatos/__init__.py +1 -0
  96. bead/deployment/jatos/api.py +200 -0
  97. bead/deployment/jatos/exporter.py +210 -0
  98. bead/deployment/jspsych/__init__.py +9 -0
  99. bead/deployment/jspsych/biome.json +44 -0
  100. bead/deployment/jspsych/config.py +411 -0
  101. bead/deployment/jspsych/generator.py +598 -0
  102. bead/deployment/jspsych/package.json +51 -0
  103. bead/deployment/jspsych/pnpm-lock.yaml +2141 -0
  104. bead/deployment/jspsych/randomizer.py +299 -0
  105. bead/deployment/jspsych/src/lib/list-distributor.test.ts +327 -0
  106. bead/deployment/jspsych/src/lib/list-distributor.ts +1282 -0
  107. bead/deployment/jspsych/src/lib/randomizer.test.ts +232 -0
  108. bead/deployment/jspsych/src/lib/randomizer.ts +367 -0
  109. bead/deployment/jspsych/src/plugins/cloze-dropdown.ts +252 -0
  110. bead/deployment/jspsych/src/plugins/forced-choice.ts +265 -0
  111. bead/deployment/jspsych/src/plugins/plugins.test.ts +141 -0
  112. bead/deployment/jspsych/src/plugins/rating.ts +248 -0
  113. bead/deployment/jspsych/src/slopit/index.ts +9 -0
  114. bead/deployment/jspsych/src/types/jatos.d.ts +256 -0
  115. bead/deployment/jspsych/src/types/jspsych.d.ts +228 -0
  116. bead/deployment/jspsych/templates/experiment.css +1 -0
  117. bead/deployment/jspsych/templates/experiment.js.template +289 -0
  118. bead/deployment/jspsych/templates/index.html +51 -0
  119. bead/deployment/jspsych/templates/randomizer.js +241 -0
  120. bead/deployment/jspsych/templates/randomizer.js.template +313 -0
  121. bead/deployment/jspsych/trials.py +723 -0
  122. bead/deployment/jspsych/tsconfig.json +23 -0
  123. bead/deployment/jspsych/tsup.config.ts +30 -0
  124. bead/deployment/jspsych/ui/__init__.py +1 -0
  125. bead/deployment/jspsych/ui/components.py +383 -0
  126. bead/deployment/jspsych/ui/styles.py +411 -0
  127. bead/dsl/__init__.py +80 -0
  128. bead/dsl/ast.py +168 -0
  129. bead/dsl/context.py +178 -0
  130. bead/dsl/errors.py +71 -0
  131. bead/dsl/evaluator.py +570 -0
  132. bead/dsl/grammar.lark +81 -0
  133. bead/dsl/parser.py +231 -0
  134. bead/dsl/stdlib.py +929 -0
  135. bead/evaluation/__init__.py +13 -0
  136. bead/evaluation/convergence.py +485 -0
  137. bead/evaluation/interannotator.py +398 -0
  138. bead/items/__init__.py +40 -0
  139. bead/items/adapters/__init__.py +70 -0
  140. bead/items/adapters/anthropic.py +224 -0
  141. bead/items/adapters/api_utils.py +167 -0
  142. bead/items/adapters/base.py +216 -0
  143. bead/items/adapters/google.py +259 -0
  144. bead/items/adapters/huggingface.py +1074 -0
  145. bead/items/adapters/openai.py +323 -0
  146. bead/items/adapters/registry.py +202 -0
  147. bead/items/adapters/sentence_transformers.py +224 -0
  148. bead/items/adapters/togetherai.py +309 -0
  149. bead/items/binary.py +515 -0
  150. bead/items/cache.py +558 -0
  151. bead/items/categorical.py +593 -0
  152. bead/items/cloze.py +757 -0
  153. bead/items/constructor.py +784 -0
  154. bead/items/forced_choice.py +413 -0
  155. bead/items/free_text.py +681 -0
  156. bead/items/generation.py +432 -0
  157. bead/items/item.py +396 -0
  158. bead/items/item_template.py +787 -0
  159. bead/items/magnitude.py +573 -0
  160. bead/items/multi_select.py +621 -0
  161. bead/items/ordinal_scale.py +569 -0
  162. bead/items/scoring.py +448 -0
  163. bead/items/validation.py +723 -0
  164. bead/lists/__init__.py +30 -0
  165. bead/lists/balancer.py +263 -0
  166. bead/lists/constraints.py +1067 -0
  167. bead/lists/experiment_list.py +286 -0
  168. bead/lists/list_collection.py +378 -0
  169. bead/lists/partitioner.py +1141 -0
  170. bead/lists/stratification.py +254 -0
  171. bead/participants/__init__.py +73 -0
  172. bead/participants/collection.py +699 -0
  173. bead/participants/merging.py +312 -0
  174. bead/participants/metadata_spec.py +491 -0
  175. bead/participants/models.py +276 -0
  176. bead/resources/__init__.py +29 -0
  177. bead/resources/adapters/__init__.py +19 -0
  178. bead/resources/adapters/base.py +104 -0
  179. bead/resources/adapters/cache.py +128 -0
  180. bead/resources/adapters/glazing.py +508 -0
  181. bead/resources/adapters/registry.py +117 -0
  182. bead/resources/adapters/unimorph.py +796 -0
  183. bead/resources/classification.py +856 -0
  184. bead/resources/constraint_builders.py +329 -0
  185. bead/resources/constraints.py +165 -0
  186. bead/resources/lexical_item.py +223 -0
  187. bead/resources/lexicon.py +744 -0
  188. bead/resources/loaders.py +209 -0
  189. bead/resources/template.py +441 -0
  190. bead/resources/template_collection.py +707 -0
  191. bead/resources/template_generation.py +349 -0
  192. bead/simulation/__init__.py +29 -0
  193. bead/simulation/annotators/__init__.py +15 -0
  194. bead/simulation/annotators/base.py +175 -0
  195. bead/simulation/annotators/distance_based.py +135 -0
  196. bead/simulation/annotators/lm_based.py +114 -0
  197. bead/simulation/annotators/oracle.py +182 -0
  198. bead/simulation/annotators/random.py +181 -0
  199. bead/simulation/dsl_extension/__init__.py +3 -0
  200. bead/simulation/noise_models/__init__.py +13 -0
  201. bead/simulation/noise_models/base.py +42 -0
  202. bead/simulation/noise_models/random_noise.py +82 -0
  203. bead/simulation/noise_models/systematic.py +132 -0
  204. bead/simulation/noise_models/temperature.py +86 -0
  205. bead/simulation/runner.py +144 -0
  206. bead/simulation/strategies/__init__.py +23 -0
  207. bead/simulation/strategies/base.py +123 -0
  208. bead/simulation/strategies/binary.py +103 -0
  209. bead/simulation/strategies/categorical.py +123 -0
  210. bead/simulation/strategies/cloze.py +224 -0
  211. bead/simulation/strategies/forced_choice.py +127 -0
  212. bead/simulation/strategies/free_text.py +105 -0
  213. bead/simulation/strategies/magnitude.py +116 -0
  214. bead/simulation/strategies/multi_select.py +129 -0
  215. bead/simulation/strategies/ordinal_scale.py +131 -0
  216. bead/templates/__init__.py +27 -0
  217. bead/templates/adapters/__init__.py +17 -0
  218. bead/templates/adapters/base.py +128 -0
  219. bead/templates/adapters/cache.py +178 -0
  220. bead/templates/adapters/huggingface.py +312 -0
  221. bead/templates/combinatorics.py +103 -0
  222. bead/templates/filler.py +605 -0
  223. bead/templates/renderers.py +177 -0
  224. bead/templates/resolver.py +178 -0
  225. bead/templates/strategies.py +1806 -0
  226. bead/templates/streaming.py +195 -0
  227. bead-0.1.0.dist-info/METADATA +212 -0
  228. bead-0.1.0.dist-info/RECORD +231 -0
  229. bead-0.1.0.dist-info/WHEEL +4 -0
  230. bead-0.1.0.dist-info/entry_points.txt +2 -0
  231. bead-0.1.0.dist-info/licenses/LICENSE +21 -0
bead/cli/utils.py ADDED
@@ -0,0 +1,614 @@
1
+ """CLI utility functions for bead package.
2
+
3
+ This module provides utility functions for the CLI including configuration loading,
4
+ output formatting, error handling, and user prompts.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import sys
11
+ from datetime import date, datetime
12
+ from io import StringIO
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING, Literal, cast
15
+ from uuid import UUID
16
+
17
+ import click
18
+ import yaml
19
+ from rich.console import Console
20
+ from rich.table import Table
21
+
22
+ if TYPE_CHECKING:
23
+ from bead.config import BeadConfig
24
+
25
+ from bead.config import load_config
26
+ from bead.data.base import JsonValue
27
+
28
+ console = Console()
29
+
30
+
31
+ def load_config_for_cli(
32
+ config_file: str | None,
33
+ profile: str,
34
+ verbose: bool,
35
+ ) -> BeadConfig:
36
+ """Load configuration with CLI options.
37
+
38
+ Parameters
39
+ ----------
40
+ config_file : str | None
41
+ Path to configuration file (None to use profile defaults).
42
+ profile : str
43
+ Configuration profile name (default, dev, prod, test).
44
+ verbose : bool
45
+ Whether to enable verbose output.
46
+
47
+ Returns
48
+ -------
49
+ BeadConfig
50
+ Loaded configuration object.
51
+
52
+ Raises
53
+ ------
54
+ FileNotFoundError
55
+ If config_file is specified but doesn't exist.
56
+ ValidationError
57
+ If configuration is invalid.
58
+ """
59
+ config_path = Path(config_file) if config_file else None
60
+
61
+ try:
62
+ config = load_config(config_path=config_path, profile=profile)
63
+
64
+ if verbose:
65
+ console.print(
66
+ f"[green]✓[/green] Loaded configuration from profile: {profile}"
67
+ )
68
+ if config_file:
69
+ console.print(f"[green]✓[/green] Applied overrides from: {config_file}")
70
+
71
+ return config
72
+ except FileNotFoundError:
73
+ print_error(f"Configuration file not found: {config_file}", exit_code=1)
74
+ raise # For type checking
75
+ except Exception as e:
76
+ print_error(f"Failed to load configuration: {e}", exit_code=1)
77
+ raise # For type checking
78
+
79
+
80
+ def format_output(
81
+ data: dict[str, JsonValue] | list[JsonValue],
82
+ format_type: Literal["yaml", "json", "table"],
83
+ ) -> str:
84
+ """Format data for CLI output.
85
+
86
+ Parameters
87
+ ----------
88
+ data : dict[str, JsonValue] | list[JsonValue]
89
+ Data to format.
90
+ format_type : {"yaml", "json", "table"}
91
+ Output format type.
92
+
93
+ Returns
94
+ -------
95
+ str
96
+ Formatted output string.
97
+
98
+ Raises
99
+ ------
100
+ ValueError
101
+ If format_type is invalid or data cannot be formatted.
102
+ """
103
+ if format_type == "yaml":
104
+ return yaml.dump(data, default_flow_style=False, sort_keys=False)
105
+ elif format_type == "json":
106
+ # Convert Path, UUID, and datetime objects to strings for JSON serialization
107
+ def convert_non_serializable(
108
+ obj: JsonValue | Path | UUID | datetime | date,
109
+ ) -> JsonValue:
110
+ if isinstance(obj, Path):
111
+ return str(obj)
112
+ elif isinstance(obj, UUID):
113
+ return str(obj)
114
+ elif isinstance(obj, datetime):
115
+ return obj.isoformat()
116
+ elif isinstance(obj, date):
117
+ return obj.isoformat()
118
+ elif isinstance(obj, dict):
119
+ result: dict[str, JsonValue] = {}
120
+ k: str
121
+ v: JsonValue
122
+ for k, v in obj.items():
123
+ result[str(k)] = convert_non_serializable(v)
124
+ return result
125
+ elif isinstance(obj, list):
126
+ converted_list: list[JsonValue] = []
127
+ item: JsonValue
128
+ for item in obj:
129
+ converted_list.append(convert_non_serializable(item))
130
+ return converted_list
131
+ return obj
132
+
133
+ converted_data: JsonValue = convert_non_serializable(data)
134
+ return json.dumps(converted_data, indent=2)
135
+ elif format_type == "table":
136
+ if not isinstance(data, dict):
137
+ raise ValueError("Table format requires dict data")
138
+ return _dict_to_table(data)
139
+ else:
140
+ raise ValueError(f"Invalid format type: {format_type}")
141
+
142
+
143
+ def _dict_to_table(data: dict[str, JsonValue], title: str | None = None) -> str:
144
+ """Convert dictionary to rich table string.
145
+
146
+ Parameters
147
+ ----------
148
+ data : dict[str, JsonValue]
149
+ Dictionary to convert.
150
+ title : str | None
151
+ Optional table title.
152
+
153
+ Returns
154
+ -------
155
+ str
156
+ Rendered table as string.
157
+ """
158
+ table = Table(title=title, show_header=True, header_style="bold cyan")
159
+ table.add_column("Key", style="yellow", no_wrap=True)
160
+ table.add_column("Value", style="white")
161
+
162
+ for key, value in data.items():
163
+ # Handle nested dicts
164
+ if isinstance(value, dict):
165
+ value_str = _format_nested_dict(value) # type: ignore[arg-type]
166
+ elif isinstance(value, list):
167
+ value_str = "\n".join(str(item) for item in value) # type: ignore[var-annotated]
168
+ else:
169
+ value_str = str(value)
170
+
171
+ table.add_row(key, value_str)
172
+
173
+ # Capture table output
174
+ string_io = StringIO()
175
+ temp_console = Console(file=string_io, force_terminal=True, width=120)
176
+ temp_console.print(table)
177
+ return string_io.getvalue()
178
+
179
+
180
+ def _format_nested_dict(data: dict[str, JsonValue], indent: int = 0) -> str:
181
+ """Format nested dictionary for display.
182
+
183
+ Parameters
184
+ ----------
185
+ data : dict[str, JsonValue]
186
+ Dictionary to format.
187
+ indent : int
188
+ Indentation level.
189
+
190
+ Returns
191
+ -------
192
+ str
193
+ Formatted string.
194
+ """
195
+ lines: list[str] = []
196
+ key: str
197
+ value: JsonValue
198
+ for key, value in data.items():
199
+ prefix: str = " " * indent
200
+ if isinstance(value, dict):
201
+ lines.append(f"{prefix}{key}:")
202
+ lines.append(_format_nested_dict(value, indent + 1))
203
+ else:
204
+ lines.append(f"{prefix}{key}: {value}")
205
+ return "\n".join(lines)
206
+
207
+
208
+ def print_error(message: str, exit_code: int = 1) -> None:
209
+ """Print error message and exit.
210
+
211
+ Parameters
212
+ ----------
213
+ message : str
214
+ Error message to display.
215
+ exit_code : int
216
+ Exit code (default: 1). Pass 0 to not exit.
217
+ """
218
+ console.print(f"[red]✗ Error:[/red] {message}")
219
+ if exit_code != 0:
220
+ sys.exit(exit_code)
221
+
222
+
223
+ def print_success(message: str) -> None:
224
+ """Print success message.
225
+
226
+ Parameters
227
+ ----------
228
+ message : str
229
+ Success message to display.
230
+ """
231
+ console.print(f"[green]✓ {message}[/green]")
232
+
233
+
234
+ def print_warning(message: str) -> None:
235
+ """Print warning message.
236
+
237
+ Parameters
238
+ ----------
239
+ message : str
240
+ Warning message to display.
241
+ """
242
+ console.print(f"[yellow]⚠ Warning:[/yellow] {message}")
243
+
244
+
245
+ def print_info(message: str) -> None:
246
+ """Print info message.
247
+
248
+ Parameters
249
+ ----------
250
+ message : str
251
+ Info message to display.
252
+ """
253
+ console.print(f"[blue]ℹ Info:[/blue] {message}")
254
+
255
+
256
+ def confirm(prompt: str, default: bool = False) -> bool:
257
+ """Prompt user for confirmation.
258
+
259
+ Parameters
260
+ ----------
261
+ prompt : str
262
+ Confirmation prompt.
263
+ default : bool
264
+ Default value if user just presses Enter.
265
+
266
+ Returns
267
+ -------
268
+ bool
269
+ True if user confirmed, False otherwise.
270
+ """
271
+ return click.confirm(prompt, default=default)
272
+
273
+
274
+ def get_nested_value(data: dict[str, JsonValue], key_path: str) -> JsonValue:
275
+ """Get nested dictionary value using dot notation.
276
+
277
+ Parameters
278
+ ----------
279
+ data : dict[str, JsonValue]
280
+ Dictionary to search.
281
+ key_path : str
282
+ Dot-separated key path (e.g., "paths.data_dir").
283
+
284
+ Returns
285
+ -------
286
+ JsonValue
287
+ Value at key path.
288
+
289
+ Raises
290
+ ------
291
+ KeyError
292
+ If key path doesn't exist.
293
+
294
+ Examples
295
+ --------
296
+ >>> data = {"a": {"b": {"c": 42}}}
297
+ >>> get_nested_value(data, "a.b.c")
298
+ 42
299
+ """
300
+ keys = key_path.split(".")
301
+ current = data
302
+ for key in keys:
303
+ if not isinstance(current, dict):
304
+ raise KeyError(
305
+ f"Cannot access key '{key}' in non-dict value at path '{key_path}'"
306
+ )
307
+ if key not in current:
308
+ raise KeyError(f"Key '{key}' not found in path '{key_path}'")
309
+ current = current[key]
310
+ return current
311
+
312
+
313
+ def redact_sensitive_values(data: dict[str, JsonValue]) -> dict[str, JsonValue]:
314
+ """Redact sensitive values in configuration.
315
+
316
+ Parameters
317
+ ----------
318
+ data : dict[str, JsonValue]
319
+ Configuration data.
320
+
321
+ Returns
322
+ -------
323
+ dict[str, JsonValue]
324
+ Data with sensitive values redacted.
325
+ """
326
+ sensitive_keys: set[str] = {
327
+ "api_key",
328
+ "secret",
329
+ "password",
330
+ "token",
331
+ "openai_api_key",
332
+ "anthropic_api_key",
333
+ "google_api_key",
334
+ }
335
+
336
+ result: dict[str, JsonValue] = {}
337
+ key: str
338
+ value: JsonValue
339
+ for key, value in data.items():
340
+ if isinstance(value, dict):
341
+ result[key] = redact_sensitive_values(value)
342
+ elif any(sensitive in key.lower() for sensitive in sensitive_keys):
343
+ result[key] = "***REDACTED***" if value else None
344
+ else:
345
+ result[key] = value
346
+
347
+ return result
348
+
349
+
350
+ def parse_json_option(
351
+ json_str: str,
352
+ option_name: str,
353
+ ) -> dict[str, JsonValue]:
354
+ """Parse JSON string from CLI option with helpful error messages.
355
+
356
+ Parameters
357
+ ----------
358
+ json_str : str
359
+ JSON string to parse.
360
+ option_name : str
361
+ Name of the CLI option (for error messages).
362
+
363
+ Returns
364
+ -------
365
+ dict[str, JsonValue]
366
+ Parsed JSON dictionary.
367
+
368
+ Raises
369
+ ------
370
+ ValueError
371
+ If JSON is invalid, with helpful error message.
372
+
373
+ Examples
374
+ --------
375
+ >>> parse_json_option('{"key": "value"}', "--config")
376
+ {'key': 'value'}
377
+ """
378
+ try:
379
+ result: JsonValue = json.loads(json_str)
380
+ if not isinstance(result, dict):
381
+ raise ValueError(
382
+ f"{option_name} must be a JSON object (dictionary), "
383
+ f"not {type(result).__name__}. "
384
+ f'Wrap your JSON in curly braces: \'{{"key": "value"}}\''
385
+ )
386
+ # At this point, we've validated result is a dict
387
+ # Cast to the proper return type
388
+ return cast(dict[str, JsonValue], result)
389
+ except json.JSONDecodeError as e:
390
+ raise ValueError(
391
+ f"Invalid JSON in {option_name}: {e}\n"
392
+ f"Provided: {json_str}\n"
393
+ f'Example: \'{{"key": "value"}}\''
394
+ ) from e
395
+
396
+
397
+ def parse_key_value_pairs(
398
+ pairs_str: str,
399
+ separator: str = ",",
400
+ kv_separator: str = "=",
401
+ ) -> dict[str, str]:
402
+ """Parse key=value pairs from string.
403
+
404
+ Parameters
405
+ ----------
406
+ pairs_str : str
407
+ String containing key=value pairs.
408
+ separator : str, optional
409
+ Separator between pairs (default: ",").
410
+ kv_separator : str, optional
411
+ Separator between key and value (default: "=").
412
+
413
+ Returns
414
+ -------
415
+ dict[str, str]
416
+ Dictionary of parsed key-value pairs.
417
+
418
+ Raises
419
+ ------
420
+ ValueError
421
+ If format is invalid.
422
+
423
+ Examples
424
+ --------
425
+ >>> parse_key_value_pairs("key1=val1,key2=val2")
426
+ {'key1': 'val1', 'key2': 'val2'}
427
+ """
428
+ result: dict[str, str] = {}
429
+ if not pairs_str or not pairs_str.strip():
430
+ return result
431
+
432
+ for pair in pairs_str.split(separator):
433
+ pair = pair.strip()
434
+ if not pair:
435
+ continue
436
+
437
+ if kv_separator not in pair:
438
+ raise ValueError(
439
+ f"Invalid key=value pair: '{pair}'. "
440
+ f"Expected format: key{kv_separator}value"
441
+ )
442
+
443
+ key, value = pair.split(kv_separator, 1)
444
+ key = key.strip()
445
+ value = value.strip()
446
+
447
+ if not key:
448
+ raise ValueError(f"Empty key in pair: '{pair}'")
449
+
450
+ result[key] = value
451
+
452
+ return result
453
+
454
+
455
+ def parse_list_option(
456
+ list_str: str,
457
+ separator: str = ",",
458
+ allow_empty: bool = False,
459
+ ) -> list[str]:
460
+ """Parse comma-separated list from string.
461
+
462
+ Parameters
463
+ ----------
464
+ list_str : str
465
+ String containing comma-separated values.
466
+ separator : str, optional
467
+ Separator between values (default: ",").
468
+ allow_empty : bool, optional
469
+ Whether to allow empty lists (default: False).
470
+
471
+ Returns
472
+ -------
473
+ list[str]
474
+ List of parsed values.
475
+
476
+ Raises
477
+ ------
478
+ ValueError
479
+ If list is empty and allow_empty is False.
480
+
481
+ Examples
482
+ --------
483
+ >>> parse_list_option("a,b,c")
484
+ ['a', 'b', 'c']
485
+ """
486
+ if not list_str or not list_str.strip():
487
+ if allow_empty:
488
+ return []
489
+ raise ValueError("List cannot be empty")
490
+
491
+ values = [v.strip() for v in list_str.split(separator) if v.strip()]
492
+
493
+ if not values and not allow_empty:
494
+ raise ValueError("List cannot be empty after parsing")
495
+
496
+ return values
497
+
498
+
499
+ def validate_file_exists(
500
+ file_path: Path,
501
+ file_description: str = "File",
502
+ ) -> None:
503
+ """Validate that a file exists.
504
+
505
+ Parameters
506
+ ----------
507
+ file_path : Path
508
+ Path to file.
509
+ file_description : str, optional
510
+ Description of file for error message (default: "File").
511
+
512
+ Raises
513
+ ------
514
+ FileNotFoundError
515
+ If file doesn't exist.
516
+
517
+ Examples
518
+ --------
519
+ >>> validate_file_exists(Path("config.yaml"), "Config file")
520
+ """
521
+ if not file_path.exists():
522
+ raise FileNotFoundError(
523
+ f"{file_description} not found: {file_path}\n"
524
+ f"Please check the path and try again."
525
+ )
526
+
527
+ if not file_path.is_file():
528
+ raise ValueError(
529
+ f"{file_description} is not a file: {file_path}\n"
530
+ f"Expected a file, got a directory."
531
+ )
532
+
533
+
534
+ def validate_directory_exists(
535
+ dir_path: Path,
536
+ dir_description: str = "Directory",
537
+ create_if_missing: bool = False,
538
+ ) -> None:
539
+ """Validate that a directory exists.
540
+
541
+ Parameters
542
+ ----------
543
+ dir_path : Path
544
+ Path to directory.
545
+ dir_description : str, optional
546
+ Description of directory for error message (default: "Directory").
547
+ create_if_missing : bool, optional
548
+ Whether to create directory if it doesn't exist (default: False).
549
+
550
+ Raises
551
+ ------
552
+ FileNotFoundError
553
+ If directory doesn't exist and create_if_missing is False.
554
+ ValueError
555
+ If path exists but is not a directory.
556
+
557
+ Examples
558
+ --------
559
+ >>> validate_directory_exists(Path("data/"), "Data directory")
560
+ """
561
+ if not dir_path.exists():
562
+ if create_if_missing:
563
+ dir_path.mkdir(parents=True, exist_ok=True)
564
+ print_info(f"Created {dir_description}: {dir_path}")
565
+ else:
566
+ raise FileNotFoundError(
567
+ f"{dir_description} not found: {dir_path}\n"
568
+ f"Please create the directory or use --create flag."
569
+ )
570
+ elif not dir_path.is_dir():
571
+ raise ValueError(
572
+ f"{dir_description} is not a directory: {dir_path}\n"
573
+ f"Expected a directory, got a file."
574
+ )
575
+
576
+
577
+ def merge_config_dicts(
578
+ base: dict[str, JsonValue],
579
+ override: dict[str, JsonValue],
580
+ ) -> dict[str, JsonValue]:
581
+ """Merge two configuration dictionaries recursively.
582
+
583
+ Parameters
584
+ ----------
585
+ base : dict[str, JsonValue]
586
+ Base configuration dictionary.
587
+ override : dict[str, JsonValue]
588
+ Override configuration dictionary.
589
+
590
+ Returns
591
+ -------
592
+ dict[str, JsonValue]
593
+ Merged configuration dictionary.
594
+
595
+ Examples
596
+ --------
597
+ >>> base = {"a": 1, "b": {"c": 2}}
598
+ >>> override = {"b": {"c": 3, "d": 4}}
599
+ >>> merge_config_dicts(base, override)
600
+ {'a': 1, 'b': {'c': 3, 'd': 4}}
601
+ """
602
+ result: dict[str, JsonValue] = base.copy()
603
+
604
+ key: str
605
+ value: JsonValue
606
+ for key, value in override.items():
607
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
608
+ base_val: dict[str, JsonValue] = cast(dict[str, JsonValue], result[key])
609
+ override_val: dict[str, JsonValue] = cast(dict[str, JsonValue], value)
610
+ result[key] = merge_config_dicts(base_val, override_val)
611
+ else:
612
+ result[key] = value
613
+
614
+ return result