confarg 0.0.1.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
confarg/_parse_cli.py ADDED
@@ -0,0 +1,507 @@
1
+ # This Source Code Form is subject to the terms of the Mozilla Public
2
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ # file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+
5
+ """CLI argument parsing for confarg."""
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from collections.abc import Sequence
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from confarg._errors import ConfargError, UnknownArgumentError
15
+ from confarg._merge import (
16
+ DICT_DELETE,
17
+ LIST_APPEND_KEY,
18
+ LIST_DELETE_KEY,
19
+ _accumulate_list_delete,
20
+ _set_nested,
21
+ )
22
+ from confarg._types import (
23
+ _dict_kv,
24
+ _elem_type,
25
+ _is_callable,
26
+ _is_dc,
27
+ _is_dict,
28
+ _is_frozenset,
29
+ _is_list,
30
+ _is_set,
31
+ _is_struct,
32
+ _is_struct_like,
33
+ _is_tuple,
34
+ _is_union,
35
+ _is_varlen_collection,
36
+ _resolve_type,
37
+ _StrToken,
38
+ _struct_fields,
39
+ _try_coerce,
40
+ _tuple_types,
41
+ _union_args_no_none,
42
+ )
43
+
44
+
45
+ def _subclass_field_type(tp: type, field: str) -> Any | None:
46
+ """Search all dataclass subclasses of tp for a field, returning its type.
47
+
48
+ Returns the common type if all subclasses agree, str if they disagree, None if absent.
49
+ """
50
+ found: list[Any] = []
51
+ queue = list(tp.__subclasses__())
52
+ while queue:
53
+ sub = queue.pop()
54
+ queue.extend(sub.__subclasses__())
55
+ if not _is_struct(sub):
56
+ continue
57
+ flds = _struct_fields(sub)
58
+ if field in flds:
59
+ found.append(flds[field])
60
+ if not found:
61
+ return None
62
+ first = found[0]
63
+ return first if all(f == first for f in found[1:]) else str
64
+
65
+
66
+ def _resolve_field_type(target: Any, parts: list[str], union_tag: str) -> Any | None:
67
+ """Walk the type tree following dot-separated path parts.
68
+
69
+ Resolves the type at the end of the path by traversing dataclass fields,
70
+ collections, dicts, and unions.
71
+
72
+ Args:
73
+ target: The root type to start resolution from.
74
+ parts: A list of path segments to follow.
75
+ union_tag: The field name used as a discriminator tag in unions.
76
+
77
+ Returns:
78
+ The resolved type at the end of the path, or None if the path is invalid.
79
+ """
80
+ tp = _resolve_type(target)
81
+ for idx, part in enumerate(parts):
82
+ if part == union_tag:
83
+ return str
84
+ tp = _resolve_type(tp)
85
+ if _is_union(tp):
86
+ remaining = parts[idx:]
87
+ resolved = []
88
+ for variant in _union_args_no_none(tp):
89
+ v = _resolve_type(variant)
90
+ result = _resolve_field_type(v, remaining, union_tag)
91
+ if result is not None:
92
+ resolved.append(result)
93
+ if not resolved:
94
+ return None
95
+ first = resolved[0]
96
+ if all(r == first for r in resolved[1:]):
97
+ return first # All variants agree → safe to use
98
+ return str # Variants disagree → conservative string consumption
99
+ elif _is_struct(tp):
100
+ flds = _struct_fields(tp)
101
+ if part in flds:
102
+ tp = flds[part]
103
+ else:
104
+ tp = _subclass_field_type(tp, part)
105
+ if tp is None:
106
+ return None
107
+ elif _is_list(tp) or _is_set(tp) or _is_frozenset(tp):
108
+ tp = _elem_type(tp)
109
+ elif _is_tuple(tp):
110
+ et = _tuple_types(tp)
111
+ if et is None:
112
+ tp = _elem_type(tp)
113
+ else:
114
+ try:
115
+ tp = et[int(part)]
116
+ except (ValueError, IndexError):
117
+ return None
118
+ elif _is_dict(tp):
119
+ _, vt = _dict_kv(tp)
120
+ tp = vt
121
+ elif _is_callable(tp):
122
+ if part in ("fn", "class", "call"):
123
+ tp = str
124
+ elif part == "bind":
125
+ remaining_count = len(parts) - idx - 1
126
+ if remaining_count == 0:
127
+ return None # --field.bind alone is not addressable
128
+ return str # --field.bind.key -> str leaf
129
+ else:
130
+ # Accept flat constructor kwargs for both factory mode and callable-object mode.
131
+ return str
132
+ else:
133
+ return None
134
+ return tp
135
+
136
+
137
+ def _is_dict_at_path(target: Any, parts: list[str], union_tag: str) -> bool:
138
+ """Check if any prefix of the path lands on a dict type.
139
+
140
+ Args:
141
+ target: The root type to start resolution from.
142
+ parts: A list of path segments to check.
143
+ union_tag: The field name used as a discriminator tag in unions.
144
+
145
+ Returns:
146
+ True if any non-empty prefix of parts resolves to a dict type.
147
+ """
148
+ for j in range(len(parts) - 1, 0, -1):
149
+ pt = _resolve_field_type(target, parts[:j], union_tag)
150
+ if pt is not None and _is_dict(_resolve_type(pt)):
151
+ return True
152
+ return False
153
+
154
+
155
+ def _parse_json_arg(token: str, flag: str) -> Any:
156
+ """Parse token as JSON, raising ConfargError on invalid JSON.
157
+
158
+ Args:
159
+ token: The raw CLI token to parse.
160
+ flag: The flag name (e.g. ``--foo``) used in the error message.
161
+
162
+ Returns:
163
+ The decoded JSON value.
164
+
165
+ Raises:
166
+ ConfargError: If the token is not valid JSON.
167
+ """
168
+ try:
169
+ return json.loads(token)
170
+ except json.JSONDecodeError as e:
171
+ raise ConfargError(f"Invalid JSON for {flag}: {e}") from e
172
+
173
+
174
+ def _looks_like_flag(token: str) -> bool:
175
+ """Check whether a token looks like a CLI flag (--word).
176
+
177
+ Bare ``--`` and negative numbers like ``--3.14`` are not considered flags.
178
+
179
+ Args:
180
+ token: The CLI token to check.
181
+
182
+ Returns:
183
+ True if the token looks like a CLI flag.
184
+ """
185
+ return token.startswith("--") and len(token) > 2 and not token[2:].lstrip("-").replace(".", "").isdigit()
186
+
187
+
188
+ def _next_is_flag_or_end(args: Sequence[str], i: int) -> bool:
189
+ """Check whether the next position is past the end or is a flag.
190
+
191
+ Args:
192
+ args: The CLI argument sequence.
193
+ i: The index to check.
194
+
195
+ Returns:
196
+ True if index i is past the end of args or args[i] looks like a flag.
197
+ """
198
+ return i >= len(args) or _looks_like_flag(args[i])
199
+
200
+
201
+ def _check_config_flag_conflict(target: Any, config_flag: str, cli_prefix: str) -> None:
202
+ """Raise ConfargError if config_flag matches a top-level field name of target.
203
+
204
+ When config_flag shadows a field name the user can never set that field via
205
+ --{config_flag}, because the parser intercepts it as a file-path argument.
206
+ """
207
+ flag_display = f"--{cli_prefix}.{config_flag}" if cli_prefix else f"--{config_flag}"
208
+
209
+ def _check_struct(tp: Any) -> None:
210
+ flds = _struct_fields(tp)
211
+ if config_flag in flds:
212
+ tp_name = getattr(tp, "__name__", repr(tp))
213
+ raise ConfargError(
214
+ f"{flag_display!r} is reserved as the config-file flag but {tp_name} has a field"
215
+ f" named {config_flag!r}. The field cannot be set via CLI because the flag is"
216
+ f" intercepted before field lookup."
217
+ f" Pass a different config_flag to merge()/load(), e.g. config_flag='conf'."
218
+ )
219
+
220
+ tp = _resolve_type(target)
221
+ if _is_struct(tp):
222
+ _check_struct(tp)
223
+ elif _is_union(tp):
224
+ for variant in _union_args_no_none(tp):
225
+ v = _resolve_type(variant)
226
+ if _is_struct(v):
227
+ _check_struct(v)
228
+
229
+
230
+ def _parse_cli(
231
+ args: Sequence[str],
232
+ target: Any,
233
+ cli_prefix: str,
234
+ config_flag: str,
235
+ union_tag: str,
236
+ ) -> tuple[dict[str, Any], list[tuple[str, Path]]]:
237
+ """Parse CLI arguments into a nested dict and a list of config file paths.
238
+
239
+ Args:
240
+ args: The CLI argument sequence to parse.
241
+ target: The target type, used for type-aware parsing decisions.
242
+ cli_prefix: Required prefix for CLI flags (empty string for no prefix).
243
+ config_flag: The flag name used to specify config files.
244
+ union_tag: The field name used as a discriminator tag in unions.
245
+
246
+ Returns:
247
+ A tuple of (data_dict, config_files) where data_dict is the parsed
248
+ argument data and config_files is a list of (subpath, Path) pairs.
249
+
250
+ Raises:
251
+ UnknownArgumentError: If an unrecognized argument is encountered.
252
+ ConfargError: If a config flag is missing its path argument or conflicts with a field name.
253
+ """
254
+ # Detect config_flag shadowing a field name before parsing begins.
255
+ _check_config_flag_conflict(target, config_flag, cli_prefix)
256
+
257
+ # Normalize --key=value into --key value so the rest of the parser is uniform.
258
+ normalized: list[str] = []
259
+ for tok in args:
260
+ if tok.startswith("--") and "=" in tok:
261
+ flag, _, val = tok.partition("=")
262
+ normalized.append(flag)
263
+ normalized.append(val)
264
+ else:
265
+ normalized.append(tok)
266
+ args = normalized
267
+
268
+ data: dict[str, Any] = {}
269
+ config_files: list[tuple[str, Path]] = []
270
+ target_r = _resolve_type(target)
271
+ is_struct = _is_struct_like(target_r)
272
+ i = 0
273
+
274
+ while i < len(args):
275
+ token = args[i]
276
+ if not _looks_like_flag(token):
277
+ raise UnknownArgumentError(
278
+ f"Unexpected positional argument: {token!r}."
279
+ " All arguments must be named flags (e.g. --fieldname value)."
280
+ )
281
+
282
+ raw_key = token[2:]
283
+
284
+ # Strip cli_prefix
285
+ key = raw_key
286
+ if cli_prefix:
287
+ dot_pfx = cli_prefix + "."
288
+ if key.startswith(dot_pfx):
289
+ key = key[len(dot_pfx) :]
290
+ elif key == cli_prefix:
291
+ key = ""
292
+ else:
293
+ raise UnknownArgumentError(
294
+ f"Unknown argument: {token!r}. Expected arguments to start with --{cli_prefix}."
295
+ )
296
+
297
+ # Config flag
298
+ if key == config_flag or key.startswith(config_flag + "."):
299
+ subpath = key[len(config_flag) + 1 :] if key.startswith(config_flag + ".") else ""
300
+ i += 1
301
+ if i >= len(args) or _looks_like_flag(args[i]):
302
+ raise ConfargError(
303
+ f"Missing file path after --{config_flag}. Usage: --{config_flag} /path/to/config.yaml"
304
+ )
305
+ while i < len(args) and not _looks_like_flag(args[i]):
306
+ config_files.append((subpath, Path(args[i])))
307
+ i += 1
308
+ continue
309
+
310
+ path = key.split(".") if key else []
311
+
312
+ # Detect + suffix on last path segment (list append): --foo.bar+
313
+ # len > 1 guard prevents bare "--+" or "--foo.+" from triggering append mode.
314
+ append_mode = bool(path) and path[-1].endswith("+") and len(path[-1]) > 1
315
+ if append_mode:
316
+ path[-1] = path[-1][:-1]
317
+
318
+ # Detect - suffix on last path segment (deletion): --foo.bar- or --foo.1-
319
+ # len > 1 guard prevents bare "--foo.-" from triggering delete mode.
320
+ delete_mode = not append_mode and bool(path) and path[-1].endswith("-") and len(path[-1]) > 1
321
+ if delete_mode:
322
+ raw_last = path[-1][:-1]
323
+ path[-1] = raw_last
324
+ try:
325
+ delete_idx = int(raw_last)
326
+ is_list_delete = True
327
+ except ValueError:
328
+ is_list_delete = False
329
+ delete_idx = -1 # unused
330
+
331
+ # .str type-cast: --foo.str VALUE forces VALUE as a plain string (bypasses steal rule)
332
+ force_str = not delete_mode and bool(path) and path[-1] == "str"
333
+ if force_str:
334
+ path = path[:-1]
335
+
336
+ # Delete mode: --foo.bar- (dict-key deletion) or --foo.1- (list-index deletion)
337
+ if delete_mode:
338
+ if is_list_delete:
339
+ parent_path = path[:-1]
340
+ # Validate the parent path is reachable (use the numeric-index path for type look-up).
341
+ ft_check = _resolve_field_type(target, path, union_tag)
342
+ if ft_check is None and not _is_dict_at_path(target, path, union_tag):
343
+ raise UnknownArgumentError(
344
+ f"Unknown argument: {token!r} (field '{'.'.join(parent_path)}' not found or not indexable)"
345
+ )
346
+ _accumulate_list_delete(data, parent_path, delete_idx, token)
347
+ else:
348
+ # Dict-key (or struct-field) deletion.
349
+ ft_check = _resolve_field_type(target, path, union_tag)
350
+ if ft_check is None and not _is_dict_at_path(target, path, union_tag):
351
+ raise UnknownArgumentError(f"Unknown argument: {token!r} (field '{'.'.join(path)}' not found)")
352
+ _set_nested(data, path, DICT_DELETE)
353
+ i += 1
354
+ continue
355
+
356
+ # Non-dataclass scalar target with empty path
357
+ if not is_struct and not path:
358
+ i += 1
359
+ if i >= len(args) or _looks_like_flag(args[i]):
360
+ raise ConfargError(f"Missing value for {token!r}. Usage: {token} <value>")
361
+ data["__root__"] = _try_coerce(target_r, _StrToken(args[i]))
362
+ i += 1
363
+ continue
364
+
365
+ # Resolve type at this path
366
+ ft = _resolve_field_type(target, path, union_tag)
367
+
368
+ if ft is None:
369
+ if append_mode:
370
+ raise UnknownArgumentError(f"Unknown argument: {token} (field '{'.'.join(path)}' not found)")
371
+ if _is_dict_at_path(target, path, union_tag):
372
+ i += 1
373
+ if i < len(args) and not _looks_like_flag(args[i]):
374
+ _set_nested(data, path, _StrToken(args[i]))
375
+ i += 1
376
+ continue
377
+ if len(path) > 1 and path[-1] in ("", "+"):
378
+ dot_pos = token.rfind(".")
379
+ raise UnknownArgumentError(f"Missing field name after '{token[: dot_pos + 1]}'")
380
+ raise UnknownArgumentError(f"Unknown argument: {token} (field '{'.'.join(path)}' not found)")
381
+
382
+ ft = _resolve_type(ft)
383
+ i += 1 # move past the flag token
384
+
385
+ # .str cast: store next token as a plain str (bypasses steal rule in construct)
386
+ if force_str:
387
+ if i >= len(args) or _looks_like_flag(args[i]):
388
+ raise ConfargError(f"Missing value for {token!r}. Usage: {token} <value>")
389
+ _set_nested(data, path, str(args[i]))
390
+ i += 1
391
+ continue
392
+
393
+ # List/set/frozenset append mode (--foo+): collect values, store as {"+": [...]}
394
+ if append_mode:
395
+ if not _is_varlen_collection(ft):
396
+ raise ConfargError(
397
+ f"Cannot use + (append) syntax on {token!r}:"
398
+ f" field '{'.'.join(path)}' has type {ft!r}, which is not a list, set, or frozenset."
399
+ )
400
+ et = _elem_type(ft)
401
+ append_items: list[Any] = []
402
+ consumed_json = False
403
+ if i < len(args) and not _looks_like_flag(args[i]) and args[i].startswith("["):
404
+ try:
405
+ parsed = json.loads(args[i])
406
+ except json.JSONDecodeError:
407
+ parsed = None
408
+ if isinstance(parsed, list):
409
+ append_items = parsed
410
+ i += 1
411
+ consumed_json = True
412
+ if not consumed_json:
413
+ # Space-separated values; JSON objects are treated as single elements
414
+ while i < len(args) and not _looks_like_flag(args[i]):
415
+ tok = args[i]
416
+ if tok.startswith("{"):
417
+ try:
418
+ append_items.append(json.loads(tok))
419
+ except json.JSONDecodeError:
420
+ append_items.append(_try_coerce(et, _StrToken(tok)))
421
+ else:
422
+ append_items.append(_try_coerce(et, _StrToken(tok)))
423
+ i += 1
424
+ # Preserve any existing delete spec at this path (e.g. --items.1- before --items+)
425
+ node = data
426
+ for p in path[:-1]:
427
+ node = node.get(p, {}) if isinstance(node, dict) else {}
428
+ existing = node.get(path[-1]) if path and isinstance(node, dict) else None
429
+ new_val: dict[str, Any] = {LIST_APPEND_KEY: append_items}
430
+ if isinstance(existing, dict) and LIST_DELETE_KEY in existing:
431
+ new_val[LIST_DELETE_KEY] = existing[LIST_DELETE_KEY]
432
+ _set_nested(data, path, new_val)
433
+ continue
434
+
435
+ # JSON object for dataclass / dict / callable / union-with-dc fields
436
+ if i < len(args) and not _looks_like_flag(args[i]) and args[i].startswith("{"):
437
+ accepts_obj = (
438
+ _is_dc(ft)
439
+ or _is_dict(ft)
440
+ or _is_callable(ft)
441
+ or (_is_union(ft) and any(_is_dc(_resolve_type(v)) for v in _union_args_no_none(ft)))
442
+ )
443
+ if accepts_obj:
444
+ _set_nested(data, path, _parse_json_arg(args[i], token))
445
+ i += 1
446
+ continue
447
+
448
+ # Dataclass field with no value → skip (use defaults)
449
+ if _is_dc(ft) and _next_is_flag_or_end(args, i):
450
+ continue
451
+
452
+ # JSON array for list / tuple / union-of-tuples fields
453
+ is_collection = (
454
+ _is_varlen_collection(ft)
455
+ or _is_tuple(ft)
456
+ or (_is_union(ft) and (nv := _union_args_no_none(ft)) and all(_is_tuple(_resolve_type(v)) for v in nv))
457
+ )
458
+ if is_collection and i < len(args) and not _looks_like_flag(args[i]) and args[i].startswith("["):
459
+ try:
460
+ parsed = json.loads(args[i])
461
+ except json.JSONDecodeError:
462
+ parsed = None
463
+ if isinstance(parsed, list):
464
+ _set_nested(data, path, parsed)
465
+ i += 1
466
+ continue
467
+
468
+ # Variable-length collection → consume until next flag
469
+ if _is_varlen_collection(ft):
470
+ et = _elem_type(ft)
471
+ items: list[Any] = []
472
+ while i < len(args) and not _looks_like_flag(args[i]):
473
+ items.append(_try_coerce(et, _StrToken(args[i])))
474
+ i += 1
475
+ _set_nested(data, path, items)
476
+ continue
477
+
478
+ # Fixed-length tuple → consume exact count
479
+ if _is_tuple(ft):
480
+ tt = _tuple_types(ft)
481
+ if tt is not None:
482
+ items = []
483
+ for et in tt:
484
+ if i < len(args):
485
+ items.append(_try_coerce(et, _StrToken(args[i])))
486
+ i += 1
487
+ _set_nested(data, path, items)
488
+ continue
489
+
490
+ # Union of tuple variants → consume greedily (disambiguation at construct time)
491
+ if _is_union(ft):
492
+ non_none_vars = _union_args_no_none(ft)
493
+ if non_none_vars and all(_is_tuple(_resolve_type(v)) for v in non_none_vars):
494
+ items = []
495
+ while i < len(args) and not _looks_like_flag(args[i]):
496
+ items.append(_StrToken(args[i]))
497
+ i += 1
498
+ _set_nested(data, path, items)
499
+ continue
500
+
501
+ # Default: consume one value
502
+ if i >= len(args) or _looks_like_flag(args[i]):
503
+ raise ConfargError(f"Missing value for {token!r}. Usage: {token} <value>")
504
+ _set_nested(data, path, _try_coerce(ft, _StrToken(args[i])))
505
+ i += 1
506
+
507
+ return data, config_files