grabmonkey 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
grabmonkey/__init__.py ADDED
@@ -0,0 +1,29 @@
1
+ """grabmonkey: dot-path access, mutation, and flattening for nested data.
2
+
3
+ Public API (import from the package root):
4
+
5
+ from grabmonkey import grab, grab_many, put, delete, flatten, unflatten
6
+ from grabmonkey import GrabError, PathSyntaxError, PathError, CoercionError
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .access import grab, grab_many
12
+ from .errors import CoercionError, GrabError, PathError, PathSyntaxError
13
+ from .flatten import flatten, unflatten
14
+ from .mutate import delete, put
15
+
16
+ __all__ = [
17
+ "grab",
18
+ "grab_many",
19
+ "put",
20
+ "delete",
21
+ "flatten",
22
+ "unflatten",
23
+ "GrabError",
24
+ "PathSyntaxError",
25
+ "PathError",
26
+ "CoercionError",
27
+ ]
28
+
29
+ __version__ = "0.1.0"
grabmonkey/access.py ADDED
@@ -0,0 +1,218 @@
1
+ """Read access: :func:`grab` and :func:`grab_many`.
2
+
3
+ This module exists to walk a parsed path against in-memory data and either
4
+ return the resolved value or raise a :class:`PathError` whose message names the
5
+ exact segment that failed and why. It treats mappings as key lookups,
6
+ sequences as positional indexing, and anything else as attribute access, so the
7
+ same path works against dicts, lists, dataclasses, named tuples, and plain
8
+ objects.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from collections.abc import Mapping, Sequence
14
+ from typing import Any, Callable, Mapping as MappingT
15
+
16
+ from .coerce import coerce_value
17
+ from .errors import PathError
18
+ from .path import (
19
+ Index,
20
+ Key,
21
+ Token,
22
+ Wildcard,
23
+ _is_simple_key,
24
+ _quote_key,
25
+ parse_path,
26
+ render_path,
27
+ )
28
+
29
+ _MISSING = object()
30
+
31
+ # How many dict keys to list before truncating, in a "key not found" message.
32
+ _MAX_KEYS_SHOWN = 12
33
+
34
+
35
+ def _describe_keys(mapping: Mapping[Any, Any]) -> str:
36
+ keys = list(mapping.keys())
37
+ shown = ", ".join(repr(k) for k in keys[:_MAX_KEYS_SHOWN])
38
+ if len(keys) > _MAX_KEYS_SHOWN:
39
+ shown += f", …(+{len(keys) - _MAX_KEYS_SHOWN} more)"
40
+ return "[" + shown + "]"
41
+
42
+
43
+ def _segment_label(tok: Token) -> str:
44
+ # Render the failing segment the same way render_path would, so the label is
45
+ # a literal substring of the rendered path (quoted keys included).
46
+ if isinstance(tok, Key):
47
+ return f"'{tok.name}'" if _is_simple_key(tok.name) else _quote_key(tok.name)
48
+ if isinstance(tok, Index):
49
+ return f"[{tok.index}]"
50
+ return "[*]"
51
+
52
+
53
+ def _fail(tokens: list[Token], k: int, reason: str) -> PathError:
54
+ rendered = render_path(tokens[: k + 1])
55
+ return PathError(
56
+ f"Path {rendered!r} failed at {_segment_label(tokens[k])}: {reason}"
57
+ )
58
+
59
+
60
+ def _is_seq(value: Any) -> bool:
61
+ return isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray))
62
+
63
+
64
+ def _step(current: Any, tokens: list[Token], k: int) -> Any:
65
+ tok = tokens[k]
66
+ if isinstance(tok, Key):
67
+ if isinstance(current, Mapping):
68
+ if tok.name in current:
69
+ return current[tok.name]
70
+ raise _fail(
71
+ tokens, k,
72
+ f"key not found in dict with keys {_describe_keys(current)}",
73
+ )
74
+ if current is None:
75
+ raise _fail(tokens, k, "value is None, cannot look up a key")
76
+ if isinstance(current, (str, bytes, bytearray)):
77
+ # str/bytes/bytearray are treated as leaf values, not navigable
78
+ # containers (see LIMITATIONS.md). Stop here with a clear message
79
+ # rather than returning a bound method like str.upper.
80
+ raise _fail(
81
+ tokens, k,
82
+ f"reached a {type(current).__name__} leaf, cannot look up a key "
83
+ f"(str/bytes are leaf values, not containers)",
84
+ )
85
+ # Anything else (objects, dataclasses, named tuples) is tried as an
86
+ # attribute. Named tuples are Sequences, so this must come after the
87
+ # str/bytes guard but apply to sequences too.
88
+ try:
89
+ return getattr(current, tok.name)
90
+ except AttributeError:
91
+ raise _fail(
92
+ tokens, k,
93
+ f"attribute not found on {type(current).__name__} object",
94
+ ) from None
95
+
96
+ if isinstance(tok, Index):
97
+ if isinstance(current, Mapping):
98
+ if tok.index in current:
99
+ return current[tok.index]
100
+ raise _fail(
101
+ tokens, k,
102
+ f"integer key {tok.index} not found in dict with keys "
103
+ f"{_describe_keys(current)}",
104
+ )
105
+ if current is None:
106
+ raise _fail(tokens, k, "value is None, cannot index")
107
+ if _is_seq(current):
108
+ try:
109
+ return current[tok.index]
110
+ except IndexError:
111
+ raise _fail(
112
+ tokens, k,
113
+ f"index {tok.index} out of range for "
114
+ f"{type(current).__name__} of length {len(current)}",
115
+ ) from None
116
+ raise _fail(
117
+ tokens, k,
118
+ f"expected a sequence to index, got {type(current).__name__}",
119
+ )
120
+
121
+ raise AssertionError(f"unhandled token type: {tok!r}") # pragma: no cover
122
+
123
+
124
+ def _iter_wildcard(current: Any, tokens: list[Token], k: int):
125
+ if isinstance(current, Mapping):
126
+ return list(current.values())
127
+ if _is_seq(current):
128
+ return list(current)
129
+ if current is None:
130
+ raise _fail(tokens, k, "value is None, cannot expand wildcard '[*]'")
131
+ raise _fail(
132
+ tokens, k,
133
+ f"expected a sequence or mapping to expand '[*]', got "
134
+ f"{type(current).__name__}",
135
+ )
136
+
137
+
138
+ def _traverse(current: Any, tokens: list[Token], start: int = 0) -> Any:
139
+ k = start
140
+ while k < len(tokens):
141
+ tok = tokens[k]
142
+ if isinstance(tok, Wildcard):
143
+ elements = _iter_wildcard(current, tokens, k)
144
+ results = []
145
+ for element in elements:
146
+ try:
147
+ results.append(_traverse(element, tokens, k + 1))
148
+ except PathError:
149
+ # Elements that lack the remaining sub-path are skipped, not
150
+ # padded; see LIMITATIONS.md ("wildcard skips misses").
151
+ continue
152
+ return results
153
+ current = _step(current, tokens, k)
154
+ k += 1
155
+ return current
156
+
157
+
158
+ def grab(
159
+ data: Any,
160
+ path: str,
161
+ *,
162
+ default: Any = _MISSING,
163
+ as_type: Callable[[Any], Any] | None = None,
164
+ ) -> Any:
165
+ """Resolve ``path`` against ``data`` and return the value.
166
+
167
+ ``default``
168
+ Returned instead of raising when the path does not resolve
169
+ (:class:`PathError`). Absent by default, so a miss raises. A
170
+ :class:`PathSyntaxError` (malformed path) and a :class:`CoercionError`
171
+ (bad ``as_type``) are *not* absorbed by ``default``.
172
+ ``as_type``
173
+ One-argument callable applied to the resolved value. For a wildcard
174
+ path it is applied to each matched element. The ``default`` is returned
175
+ as-is and is never coerced.
176
+
177
+ Wildcards (``items[*].name``) return a list; elements that lack the
178
+ sub-path are skipped (see LIMITATIONS.md).
179
+ """
180
+ tokens = parse_path(path)
181
+ try:
182
+ value = _traverse(data, tokens)
183
+ except PathError:
184
+ if default is not _MISSING:
185
+ return default
186
+ raise
187
+
188
+ if as_type is not None:
189
+ rendered = render_path(tokens)
190
+ depth = sum(1 for t in tokens if isinstance(t, Wildcard))
191
+ value = _coerce_result(value, as_type, rendered, depth)
192
+ return value
193
+
194
+
195
+ def _coerce_result(value: Any, as_type: Callable[[Any], Any], rendered: str, depth: int) -> Any:
196
+ if depth == 0:
197
+ return coerce_value(value, as_type, rendered)
198
+ return [_coerce_result(item, as_type, rendered, depth - 1) for item in value]
199
+
200
+
201
+ def grab_many(
202
+ data: Any,
203
+ paths: MappingT[str, str],
204
+ *,
205
+ default: Any = _MISSING,
206
+ ) -> dict[str, Any]:
207
+ """Resolve several paths at once.
208
+
209
+ ``paths`` maps result keys to path strings; the return dict has the same
210
+ keys mapped to resolved values. ``default`` (one value for all paths) is
211
+ applied per path exactly as in :func:`grab`.
212
+ """
213
+ if not isinstance(paths, Mapping):
214
+ raise TypeError(
215
+ f"grab_many expects a mapping of name -> path, got "
216
+ f"{type(paths).__name__}"
217
+ )
218
+ return {name: grab(data, p, default=default) for name, p in paths.items()}
grabmonkey/cli.py ADDED
@@ -0,0 +1,170 @@
1
+ """Command-line interface.
2
+
3
+ This module is a thin argument-parser and return-code wrapper around the
4
+ library: it loads JSON from a file or stdin, calls one library function, and
5
+ prints the result as JSON. All real behaviour lives in the library modules so
6
+ the CLI and the Python API never drift apart.
7
+
8
+ Exit codes: 0 success, 1 path/coercion failure against the data, 2 usage or
9
+ syntax error (bad path, unparseable input).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import json
16
+ import sys
17
+ from typing import Any, Callable
18
+
19
+ from .access import grab, grab_many
20
+ from .errors import CoercionError, PathError, PathSyntaxError
21
+ from .flatten import flatten, unflatten
22
+ from .mutate import delete, put
23
+
24
+ def _cli_bool(value: Any) -> bool:
25
+ """Parse a CLI boolean by recognised token, not Python truthiness.
26
+
27
+ ``bool("false")`` is ``True`` in Python; that footgun is wrong for a CLI, so
28
+ only explicit tokens are accepted and anything else raises (surfacing as a
29
+ coercion error / exit 1).
30
+ """
31
+ if isinstance(value, bool):
32
+ return value
33
+ if isinstance(value, str):
34
+ low = value.strip().lower()
35
+ if low in ("true", "1", "yes", "y", "on"):
36
+ return True
37
+ if low in ("false", "0", "no", "n", "off", ""):
38
+ return False
39
+ raise ValueError(f"not a recognised boolean: {value!r}")
40
+
41
+
42
+ _TYPES: dict[str, Callable[[Any], Any]] = {
43
+ "int": int,
44
+ "float": float,
45
+ "str": str,
46
+ "bool": _cli_bool,
47
+ "json": lambda v: v,
48
+ }
49
+
50
+ _MISSING = object()
51
+
52
+
53
+ def _read_input(path: str | None) -> str:
54
+ if path is None or path == "-":
55
+ return sys.stdin.read()
56
+ with open(path, encoding="utf-8") as fh:
57
+ return fh.read()
58
+
59
+
60
+ def _dump(value: Any) -> str:
61
+ return json.dumps(value, indent=2, ensure_ascii=False, sort_keys=False)
62
+
63
+
64
+ def _build_parser() -> argparse.ArgumentParser:
65
+ parser = argparse.ArgumentParser(
66
+ prog="grabmonkey",
67
+ description="Dot-path access for nested JSON.",
68
+ )
69
+ sub = parser.add_subparsers(dest="command", required=True)
70
+
71
+ def add_input(p: argparse.ArgumentParser) -> None:
72
+ p.add_argument("--input", "-i", help="JSON file to read (default: stdin)")
73
+
74
+ g = sub.add_parser("grab", help="read a value at a path")
75
+ g.add_argument("path")
76
+ add_input(g)
77
+ g.add_argument("--default", help="value (parsed as JSON) to return on a miss")
78
+ g.add_argument("--type", choices=sorted(_TYPES), help="coerce the resolved value")
79
+
80
+ gm = sub.add_parser("grab-many", help="read many paths given as name=path pairs")
81
+ gm.add_argument("pairs", nargs="+", help="name=path arguments")
82
+ add_input(gm)
83
+
84
+ p = sub.add_parser("put", help="set a value at a path and print the result")
85
+ p.add_argument("path")
86
+ p.add_argument("value", help="value to set, parsed as JSON")
87
+ add_input(p)
88
+
89
+ d = sub.add_parser("delete", help="delete a value at a path and print the result")
90
+ d.add_argument("path")
91
+ add_input(d)
92
+ d.add_argument("--prune", action="store_true", help="remove emptied ancestors")
93
+ d.add_argument("--missing-ok", action="store_true", help="ignore a missing path")
94
+
95
+ f = sub.add_parser("flatten", help="flatten nested JSON to a flat dict")
96
+ add_input(f)
97
+ f.add_argument("--sep", default=".", help="separator for keys (default '.')")
98
+
99
+ u = sub.add_parser("unflatten", help="rebuild nested JSON from a flat dict")
100
+ add_input(u)
101
+ u.add_argument("--sep", default=".", help="separator for keys (default '.')")
102
+
103
+ return parser
104
+
105
+
106
+ def main(argv: list[str] | None = None) -> int:
107
+ parser = _build_parser()
108
+ args = parser.parse_args(argv)
109
+
110
+ try:
111
+ data = json.loads(_read_input(args.input))
112
+ except (OSError, json.JSONDecodeError) as exc:
113
+ print(f"grabmonkey: could not read input: {exc}", file=sys.stderr)
114
+ return 2
115
+
116
+ try:
117
+ result = _dispatch(args, data)
118
+ except json.JSONDecodeError as exc:
119
+ # A --default / put / delete VALUE argument that is not valid JSON is a
120
+ # usage error, like unparseable input: exit 2, no traceback.
121
+ print(f"grabmonkey: invalid JSON in argument: {exc}", file=sys.stderr)
122
+ return 2
123
+ except PathSyntaxError as exc:
124
+ print(f"grabmonkey: {exc}", file=sys.stderr)
125
+ return 2
126
+ except (PathError, CoercionError) as exc:
127
+ print(f"grabmonkey: {exc}", file=sys.stderr)
128
+ return 1
129
+
130
+ print(_dump(result))
131
+ return 0
132
+
133
+
134
+ def _dispatch(args: argparse.Namespace, data: Any) -> Any:
135
+ if args.command == "grab":
136
+ default = json.loads(args.default) if args.default is not None else _MISSING
137
+ as_type = _TYPES[args.type] if args.type else None
138
+ kwargs: dict[str, Any] = {}
139
+ if default is not _MISSING:
140
+ kwargs["default"] = default
141
+ if as_type is not None:
142
+ kwargs["as_type"] = as_type
143
+ return grab(data, args.path, **kwargs)
144
+
145
+ if args.command == "grab-many":
146
+ mapping = {}
147
+ for pair in args.pairs:
148
+ if "=" not in pair:
149
+ raise PathSyntaxError(f"expected name=path, got {pair!r}")
150
+ name, _, path = pair.partition("=")
151
+ mapping[name] = path
152
+ return grab_many(data, mapping)
153
+
154
+ if args.command == "put":
155
+ return put(data, args.path, json.loads(args.value))
156
+
157
+ if args.command == "delete":
158
+ return delete(data, args.path, prune=args.prune, missing_ok=args.missing_ok)
159
+
160
+ if args.command == "flatten":
161
+ return flatten(data, sep=args.sep)
162
+
163
+ if args.command == "unflatten":
164
+ return unflatten(data, sep=args.sep)
165
+
166
+ raise AssertionError(f"unhandled command: {args.command}") # pragma: no cover
167
+
168
+
169
+ if __name__ == "__main__": # pragma: no cover
170
+ raise SystemExit(main())
grabmonkey/coerce.py ADDED
@@ -0,0 +1,32 @@
1
+ """Type coercion for resolved values.
2
+
3
+ This module exists so ``grab(..., as_type=int)`` has one well-defined failure
4
+ mode: any value that resolves but cannot be passed through ``as_type`` raises
5
+ :class:`CoercionError` (a ``ValueError`` subclass) with the offending value and
6
+ its type in the message, rather than letting a raw ``TypeError``/``ValueError``
7
+ escape with no path context.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Any, Callable
13
+
14
+ from .errors import CoercionError
15
+
16
+
17
+ def coerce_value(value: Any, as_type: Callable[[Any], Any], rendered_path: str) -> Any:
18
+ """Return ``as_type(value)``, re-raising failures as :class:`CoercionError`.
19
+
20
+ ``as_type`` is any one-argument callable (``int``, ``float``, ``str``, a
21
+ custom parser). Only ``TypeError`` and ``ValueError`` are translated; any
22
+ other exception from ``as_type`` propagates unchanged.
23
+ """
24
+ try:
25
+ return as_type(value)
26
+ except (TypeError, ValueError) as exc:
27
+ type_name = getattr(as_type, "__name__", repr(as_type))
28
+ raise CoercionError(
29
+ f"Path {rendered_path!r} resolved to {value!r} "
30
+ f"({type(value).__name__}), which could not be coerced via "
31
+ f"{type_name}: {exc}"
32
+ ) from exc
grabmonkey/errors.py ADDED
@@ -0,0 +1,45 @@
1
+ """Exception hierarchy for grabmonkey.
2
+
3
+ This module exists so callers can distinguish *why* an access failed without
4
+ parsing message strings. A malformed path string (a programmer error) is a
5
+ ``PathSyntaxError``; a structurally valid path that does not resolve against
6
+ the data (a runtime miss, the case ``default=`` is meant to absorb) is a
7
+ ``PathError``; a value that resolved but could not be coerced to the requested
8
+ type is a ``CoercionError``. All three derive from ``GrabError`` so a caller
9
+ can catch the whole family with one ``except``.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+
15
+ class GrabError(Exception):
16
+ """Base class for every error raised by grabmonkey."""
17
+
18
+
19
+ class PathSyntaxError(GrabError, ValueError):
20
+ """The path string itself is malformed (unclosed bracket, empty segment).
21
+
22
+ This is a programmer error and is *never* absorbed by ``default=``: a
23
+ default substitutes for data that is absent, not for a path you cannot
24
+ parse.
25
+ """
26
+
27
+
28
+ class PathError(GrabError, LookupError):
29
+ """A structurally valid path did not resolve against the data.
30
+
31
+ Raised when a dict key is missing, a list index is out of range, an
32
+ intermediate value is ``None``, or a segment expects a container type the
33
+ data does not provide. Subclasses :class:`LookupError` (the common base of
34
+ ``KeyError`` and ``IndexError``) so existing ``except LookupError`` blocks
35
+ keep working. This is the error ``default=`` absorbs.
36
+ """
37
+
38
+
39
+ class CoercionError(GrabError, ValueError):
40
+ """A value resolved but could not be coerced to the requested ``as_type``.
41
+
42
+ Distinct from :class:`PathError` on purpose: the data was *found*, so
43
+ silently returning ``default`` would hide a real type problem in the
44
+ source. ``default=`` does not absorb this error.
45
+ """