omnist 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnist/__init__.py +104 -0
- omnist/canonical/__init__.py +79 -0
- omnist/canonical/deserialize.py +117 -0
- omnist/canonical/document.py +331 -0
- omnist/canonical/dsl.py +229 -0
- omnist/canonical/formats.py +410 -0
- omnist/canonical/infer.py +126 -0
- omnist/canonical/oml.py +565 -0
- omnist/canonical/operations.py +124 -0
- omnist/canonical/registry.py +72 -0
- omnist/canonical/report.py +79 -0
- omnist/canonical/schema.py +414 -0
- omnist/errors.py +58 -0
- omnist/py.typed +0 -0
- omnist-0.2.0.dist-info/METADATA +204 -0
- omnist-0.2.0.dist-info/RECORD +20 -0
- omnist-0.2.0.dist-info/WHEEL +5 -0
- omnist-0.2.0.dist-info/licenses/LICENSE +201 -0
- omnist-0.2.0.dist-info/licenses/NOTICE +15 -0
- omnist-0.2.0.dist-info/top_level.txt +1 -0
omnist/__init__.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""omnist — one canonical data model, many formats.
|
|
2
|
+
|
|
3
|
+
A **Document** is a *tree*: an ordered list of labeled edges (repeated
|
|
4
|
+
labels are how arrays appear), held by a :class:`Doc`. A **Schema** describes
|
|
5
|
+
the shape a Document may have, as ``record`` definitions referenced by name.
|
|
6
|
+
A field's value side is always exactly one of the seven scalars (``string``,
|
|
7
|
+
``integer``, ``number``, ``boolean``, ``date``, ``time``, ``datetime``),
|
|
8
|
+
optionally nullable, or a reference to a named record — never a composed
|
|
9
|
+
value-domain (no enums, no literal-valued fields). Read a format into a
|
|
10
|
+
``Doc``, validate it against a ``Schema``, and write it back to any format.
|
|
11
|
+
|
|
12
|
+
from omnist import parse_schema, doc
|
|
13
|
+
|
|
14
|
+
s = parse_schema('''
|
|
15
|
+
record Member { "name": string, "role": string }
|
|
16
|
+
record Team { "name": string, "members" [1,]: Member }
|
|
17
|
+
root Team
|
|
18
|
+
''')
|
|
19
|
+
s.validate(doc({"name": "X", "members": [{"name": "Ann", "role": "dev"}]})).ok
|
|
20
|
+
|
|
21
|
+
The model is defined formally in ``docs/design/model.md``. The implementation
|
|
22
|
+
lives in :mod:`omnist.canonical`; this module is its public surface.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from .canonical.deserialize import materialize
|
|
26
|
+
from .canonical.document import Doc, doc
|
|
27
|
+
from .canonical.dsl import parse_schema, to_dsl
|
|
28
|
+
from .canonical.formats import (
|
|
29
|
+
check_json,
|
|
30
|
+
check_toml,
|
|
31
|
+
check_xml,
|
|
32
|
+
check_yaml,
|
|
33
|
+
read_json,
|
|
34
|
+
read_toml,
|
|
35
|
+
read_xml,
|
|
36
|
+
read_yaml,
|
|
37
|
+
write_json,
|
|
38
|
+
write_toml,
|
|
39
|
+
write_xml,
|
|
40
|
+
write_yaml,
|
|
41
|
+
)
|
|
42
|
+
from .canonical.infer import infer
|
|
43
|
+
from .canonical.oml import check_oml, read_oml, write_oml
|
|
44
|
+
from .canonical.registry import Format, formats, get_format, register_format
|
|
45
|
+
from .canonical.report import Adjustment, WriteReport, finish_write
|
|
46
|
+
from .canonical.schema import (
|
|
47
|
+
BOOLEAN,
|
|
48
|
+
DATE,
|
|
49
|
+
DATETIME,
|
|
50
|
+
INTEGER,
|
|
51
|
+
NUMBER,
|
|
52
|
+
STRING,
|
|
53
|
+
TIME,
|
|
54
|
+
Error,
|
|
55
|
+
Field,
|
|
56
|
+
Record,
|
|
57
|
+
Ref,
|
|
58
|
+
Scalar,
|
|
59
|
+
Schema,
|
|
60
|
+
ValidationResult,
|
|
61
|
+
field,
|
|
62
|
+
nullable,
|
|
63
|
+
record,
|
|
64
|
+
ref,
|
|
65
|
+
schema,
|
|
66
|
+
t,
|
|
67
|
+
)
|
|
68
|
+
from .errors import (
|
|
69
|
+
DetachedNode,
|
|
70
|
+
DocumentError,
|
|
71
|
+
OmnistError,
|
|
72
|
+
ParseError,
|
|
73
|
+
SchemaError,
|
|
74
|
+
UnsafeXMLWarning,
|
|
75
|
+
WriteError,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
__version__ = "0.2.0"
|
|
79
|
+
|
|
80
|
+
__all__ = [
|
|
81
|
+
# errors
|
|
82
|
+
"OmnistError", "SchemaError", "ParseError", "WriteError", "DocumentError",
|
|
83
|
+
"DetachedNode", "UnsafeXMLWarning",
|
|
84
|
+
# document
|
|
85
|
+
"Doc", "doc",
|
|
86
|
+
# schema model
|
|
87
|
+
"Schema", "Record", "Scalar", "Ref", "Field", "ValidationResult", "Error",
|
|
88
|
+
# builders
|
|
89
|
+
"record", "ref", "field", "schema", "nullable", "t",
|
|
90
|
+
"STRING", "INTEGER", "NUMBER", "BOOLEAN", "DATE", "TIME", "DATETIME",
|
|
91
|
+
# dsl
|
|
92
|
+
"parse_schema", "to_dsl",
|
|
93
|
+
# operations (compatible_with / equivalent / normalize are Schema methods)
|
|
94
|
+
"infer", "materialize",
|
|
95
|
+
# codecs
|
|
96
|
+
"read_json", "write_json", "read_yaml", "write_yaml",
|
|
97
|
+
"read_toml", "write_toml", "read_xml", "write_xml",
|
|
98
|
+
"read_oml", "write_oml",
|
|
99
|
+
"check_json", "check_yaml", "check_toml", "check_xml", "check_oml",
|
|
100
|
+
# adjustment reports
|
|
101
|
+
"WriteReport", "Adjustment", "finish_write",
|
|
102
|
+
# format registry
|
|
103
|
+
"Format", "register_format", "get_format", "formats",
|
|
104
|
+
]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Canonical model (design proposal implementation).
|
|
2
|
+
|
|
3
|
+
A self-contained implementation of the redesigned Document and Schema models
|
|
4
|
+
described in ``docs/design/model.md``:
|
|
5
|
+
|
|
6
|
+
* :mod:`~omnist.canonical.document` — the Document as an ordered list of
|
|
7
|
+
labeled edges, not a dict-with-arrays.
|
|
8
|
+
* :mod:`~omnist.canonical.schema` — the Schema as ``Record`` (labels) /
|
|
9
|
+
``Scalar`` (one of seven, never composed) / ``Ref``, with field
|
|
10
|
+
cardinality, plus conformance.
|
|
11
|
+
* :mod:`~omnist.canonical.dsl` — the ``record`` text syntax.
|
|
12
|
+
* :mod:`~omnist.canonical.operations` — ``compatible_with`` / ``equivalent``
|
|
13
|
+
/ ``normalize`` on the new model.
|
|
14
|
+
|
|
15
|
+
This package is the implementation of the model; ``import omnist`` is its
|
|
16
|
+
public surface.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from .deserialize import materialize
|
|
20
|
+
from .document import Doc, doc
|
|
21
|
+
from .dsl import parse_schema, to_dsl
|
|
22
|
+
from .formats import (
|
|
23
|
+
check_json,
|
|
24
|
+
check_toml,
|
|
25
|
+
check_xml,
|
|
26
|
+
check_yaml,
|
|
27
|
+
read_json,
|
|
28
|
+
read_toml,
|
|
29
|
+
read_xml,
|
|
30
|
+
read_yaml,
|
|
31
|
+
write_json,
|
|
32
|
+
write_toml,
|
|
33
|
+
write_xml,
|
|
34
|
+
write_yaml,
|
|
35
|
+
)
|
|
36
|
+
from .infer import infer
|
|
37
|
+
from .operations import compatible_with, equivalent, normalize
|
|
38
|
+
from .registry import Format, formats, get_format, register_format
|
|
39
|
+
|
|
40
|
+
# register the four built-in formats
|
|
41
|
+
from .registry import _register_builtins as _rb # noqa: E402
|
|
42
|
+
from .report import Adjustment, WriteReport, finish_write
|
|
43
|
+
from .schema import (
|
|
44
|
+
BOOLEAN,
|
|
45
|
+
DATE,
|
|
46
|
+
DATETIME,
|
|
47
|
+
INTEGER,
|
|
48
|
+
NUMBER,
|
|
49
|
+
STRING,
|
|
50
|
+
TIME,
|
|
51
|
+
Field,
|
|
52
|
+
Record,
|
|
53
|
+
Ref,
|
|
54
|
+
Scalar,
|
|
55
|
+
Schema,
|
|
56
|
+
ValidationResult,
|
|
57
|
+
field,
|
|
58
|
+
nullable,
|
|
59
|
+
record,
|
|
60
|
+
ref,
|
|
61
|
+
schema,
|
|
62
|
+
t,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
_rb()
|
|
66
|
+
|
|
67
|
+
__all__ = [
|
|
68
|
+
"Doc", "doc",
|
|
69
|
+
"Schema", "Record", "Scalar", "Ref", "Field", "ValidationResult",
|
|
70
|
+
"record", "ref", "field", "schema", "nullable", "t",
|
|
71
|
+
"STRING", "INTEGER", "NUMBER", "BOOLEAN", "DATE", "TIME", "DATETIME",
|
|
72
|
+
"parse_schema", "to_dsl",
|
|
73
|
+
"compatible_with", "equivalent", "normalize", "infer", "materialize",
|
|
74
|
+
"read_json", "read_yaml", "read_toml", "read_xml",
|
|
75
|
+
"write_json", "write_yaml", "write_toml", "write_xml",
|
|
76
|
+
"check_json", "check_yaml", "check_toml", "check_xml",
|
|
77
|
+
"WriteReport", "Adjustment", "finish_write",
|
|
78
|
+
"Format", "register_format", "get_format", "formats",
|
|
79
|
+
]
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Schema-directed deserialization: upgrade a freshly-read node's leaf values
|
|
2
|
+
to match a :class:`~omnist.canonical.schema.Schema`'s declared scalars.
|
|
3
|
+
|
|
4
|
+
Readers (``read_json``, etc.) hand back text-shaped values: JSON/YAML/TOML
|
|
5
|
+
have no ``date``/``time`` type, so a temporal field arrives as an ISO-8601
|
|
6
|
+
string; a whole-number ``float`` may need to become an ``int`` (or vice
|
|
7
|
+
versa) to match what the schema declares. :func:`materialize` converts each
|
|
8
|
+
leaf **only when the conversion is value-exact** -- ``"2024-01-01" -> date``,
|
|
9
|
+
``1.0 -> int 1`` -- and raises :class:`~omnist.errors.ParseError` when it
|
|
10
|
+
isn't -- ``1.5 -> integer``, ``"abc" -> integer``.
|
|
11
|
+
|
|
12
|
+
This is unambiguous by construction: every field has exactly one candidate
|
|
13
|
+
scalar (see ``docs/design/model.md``), so there's never a choice between
|
|
14
|
+
candidate representations -- only "does this value exactly fit the one
|
|
15
|
+
scalar declared, or not."
|
|
16
|
+
|
|
17
|
+
Shape problems (a missing field, an unexpected field, the wrong cardinality)
|
|
18
|
+
are left to :meth:`Schema.validate`, not raised here -- :func:`materialize`
|
|
19
|
+
only ever touches values whose field type it can identify; anything it
|
|
20
|
+
doesn't recognize is passed through unchanged for ``validate`` to flag.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import datetime as _dt
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from ..errors import ParseError
|
|
29
|
+
from .schema import Record, Scalar, Schema
|
|
30
|
+
|
|
31
|
+
_TEMPORAL_CLASS = {"date": _dt.date, "time": _dt.time, "datetime": _dt.datetime}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def materialize(node: Any, schema: Schema) -> Any:
|
|
35
|
+
"""A copy of ``node`` with leaf values upgraded to match ``schema``."""
|
|
36
|
+
return _materialize_type(node, schema, schema.root, "$")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _materialize_type(node: Any, schema: Schema, t: Any, path: str) -> Any:
|
|
40
|
+
d = schema.resolve(t)
|
|
41
|
+
if isinstance(d, Scalar):
|
|
42
|
+
return _materialize_scalar(node, d, path)
|
|
43
|
+
return _materialize_record(node, schema, d, path)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _materialize_record(node: Any, schema: Schema, rec: Record, path: str) -> Any:
|
|
47
|
+
if not isinstance(node, list):
|
|
48
|
+
return node # a shape mismatch -- validate()'s job
|
|
49
|
+
out = []
|
|
50
|
+
counts: dict = {}
|
|
51
|
+
for label, child in node:
|
|
52
|
+
i = counts.get(label, 0)
|
|
53
|
+
counts[label] = i + 1
|
|
54
|
+
p = f"{path}.{label}" if i == 0 else f"{path}.{label}[{i}]"
|
|
55
|
+
f = rec.field(label)
|
|
56
|
+
if f is None:
|
|
57
|
+
out.append((label, child)) # an unexpected field -- validate()'s job
|
|
58
|
+
else:
|
|
59
|
+
out.append((label, _materialize_type(child, schema, f.type, p)))
|
|
60
|
+
return out
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _materialize_scalar(value: Any, s: Scalar, path: str) -> Any:
|
|
64
|
+
if value is None or isinstance(value, list):
|
|
65
|
+
return value # null, or a shape mismatch -- validate()'s job
|
|
66
|
+
if s.name == "string":
|
|
67
|
+
if isinstance(value, str):
|
|
68
|
+
return value
|
|
69
|
+
elif s.name == "boolean":
|
|
70
|
+
if isinstance(value, bool):
|
|
71
|
+
return value
|
|
72
|
+
elif s.name == "integer":
|
|
73
|
+
if isinstance(value, bool):
|
|
74
|
+
pass
|
|
75
|
+
elif isinstance(value, int):
|
|
76
|
+
return value
|
|
77
|
+
elif isinstance(value, float) and value.is_integer():
|
|
78
|
+
return int(value)
|
|
79
|
+
elif s.name == "number":
|
|
80
|
+
if isinstance(value, bool):
|
|
81
|
+
pass
|
|
82
|
+
elif isinstance(value, (int, float)):
|
|
83
|
+
return float(value)
|
|
84
|
+
elif s.name in _TEMPORAL_CLASS:
|
|
85
|
+
converted = _materialize_temporal(value, s.name)
|
|
86
|
+
if converted is not _SENTINEL:
|
|
87
|
+
return converted
|
|
88
|
+
raise ParseError(f"{path}: {value!r} cannot be read as {s.name} "
|
|
89
|
+
"(not a value-exact conversion)")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
_SENTINEL = object()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _materialize_temporal(value: Any, name: str) -> Any:
|
|
96
|
+
cls = _TEMPORAL_CLASS[name]
|
|
97
|
+
if isinstance(value, cls):
|
|
98
|
+
if name == "date" and isinstance(value, _dt.datetime):
|
|
99
|
+
return _SENTINEL # datetime is a date subclass -- not this kind
|
|
100
|
+
return value
|
|
101
|
+
if not isinstance(value, str):
|
|
102
|
+
return _SENTINEL
|
|
103
|
+
try:
|
|
104
|
+
parsed = cls.fromisoformat(value)
|
|
105
|
+
except ValueError:
|
|
106
|
+
return _SENTINEL
|
|
107
|
+
if name == "datetime" and _is_iso(value, _dt.date):
|
|
108
|
+
return _SENTINEL # a bare date string is not a datetime
|
|
109
|
+
return parsed
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _is_iso(value: str, cls) -> bool:
|
|
113
|
+
try:
|
|
114
|
+
cls.fromisoformat(value)
|
|
115
|
+
return True
|
|
116
|
+
except ValueError:
|
|
117
|
+
return False
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""The Document model — a canonical tree of ordered, labeled edges.
|
|
2
|
+
|
|
3
|
+
A Document model **node** is either
|
|
4
|
+
|
|
5
|
+
* a **leaf** holding a scalar value (``str``/``int``/``float``/``bool``/
|
|
6
|
+
``datetime`` values, or ``None``), or
|
|
7
|
+
* an **internal node** holding an *ordered list of edges*, each a
|
|
8
|
+
``(label, child)`` pair. **Labels may repeat** — "many members" is the label
|
|
9
|
+
``member`` appearing several times, not a field pointing to an array.
|
|
10
|
+
|
|
11
|
+
The canonical Python form of a node is therefore::
|
|
12
|
+
|
|
13
|
+
scalar # a leaf
|
|
14
|
+
[(label, node), (label, node), ...] # an internal node (ordered)
|
|
15
|
+
|
|
16
|
+
This single shape represents every supported format canonically, including
|
|
17
|
+
XML's interleaved repeated elements, which a dict-with-array-values cannot.
|
|
18
|
+
``Doc`` is a thin, guarded wrapper around a node, with navigation helpers.
|
|
19
|
+
Order is preserved (it is data); schema validation ignores it. See
|
|
20
|
+
``docs/design/model.md``.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import datetime as _dt
|
|
26
|
+
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple
|
|
27
|
+
|
|
28
|
+
from ..errors import DocumentError
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from .report import WriteReport
|
|
32
|
+
from .schema import Schema
|
|
33
|
+
|
|
34
|
+
_MAX_DEPTH = 200
|
|
35
|
+
|
|
36
|
+
Edge = Tuple[str, Any] # (label, node)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _is_scalar(v: Any) -> bool:
|
|
40
|
+
# bool is an int subclass and datetime a date subclass — both are fine here.
|
|
41
|
+
return isinstance(v, (str, int, float, _dt.date, _dt.time, _dt.datetime)) or v is None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
# Building a node from a plain Python value (JSON-shaped)
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
def build_node(value: Any, path: str = "$", depth: int = 0,
|
|
49
|
+
seen: Optional[frozenset] = None) -> Any:
|
|
50
|
+
"""Turn a plain Python value into a canonical node.
|
|
51
|
+
|
|
52
|
+
A ``dict`` becomes an ordered edge list; a key whose value is a list expands
|
|
53
|
+
into one edge **per item** (the same label repeated). A scalar becomes a
|
|
54
|
+
leaf. A *bare* list (a top-level array, or a list nested directly inside a
|
|
55
|
+
list) has no labeled-edge form and raises ``DocumentError``.
|
|
56
|
+
"""
|
|
57
|
+
if depth > _MAX_DEPTH:
|
|
58
|
+
raise DocumentError(f"{path}: nesting exceeds the maximum depth ({_MAX_DEPTH})")
|
|
59
|
+
if isinstance(value, dict):
|
|
60
|
+
seen = seen or frozenset()
|
|
61
|
+
if id(value) in seen:
|
|
62
|
+
raise DocumentError(f"{path}: cycle detected")
|
|
63
|
+
seen = seen | {id(value)}
|
|
64
|
+
edges: List[Edge] = []
|
|
65
|
+
for k, v in value.items():
|
|
66
|
+
if not isinstance(k, str):
|
|
67
|
+
raise DocumentError(f"{path}: object key {k!r} is not a string")
|
|
68
|
+
kp = _join(path, k)
|
|
69
|
+
for child in _children(v, kp, depth + 1, seen):
|
|
70
|
+
edges.append((k, child))
|
|
71
|
+
return edges
|
|
72
|
+
if isinstance(value, (list, tuple)):
|
|
73
|
+
raise DocumentError(f"{path}: a bare array has no labeled-edge form "
|
|
74
|
+
"(arrays appear only as a repeated field)")
|
|
75
|
+
if _is_scalar(value):
|
|
76
|
+
return value
|
|
77
|
+
raise DocumentError(f"{path}: {type(value).__name__} is not a Document value")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _children(v: Any, path: str, depth: int, seen: frozenset) -> Iterator[Any]:
|
|
81
|
+
if isinstance(v, (list, tuple)):
|
|
82
|
+
for i, item in enumerate(v):
|
|
83
|
+
if isinstance(item, (list, tuple)):
|
|
84
|
+
raise DocumentError(
|
|
85
|
+
f"{path}[{i}]: an array of arrays has no labeled-edge form")
|
|
86
|
+
yield build_node(item, f"{path}[{i}]", depth + 1, seen)
|
|
87
|
+
else:
|
|
88
|
+
yield build_node(v, path, depth, seen)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _join(path: str, key: str) -> str:
|
|
92
|
+
return f"{path}.{key}" if key.isidentifier() else f'{path}["{key}"]'
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# Doc — guarded wrapper with navigation
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
class Doc:
|
|
100
|
+
"""A guarded handle on a Document node (a leaf value or an edge list)."""
|
|
101
|
+
|
|
102
|
+
__slots__ = ("_node", "path")
|
|
103
|
+
|
|
104
|
+
def __init__(self, node: Any, path: str = "$") -> None:
|
|
105
|
+
self._node = node
|
|
106
|
+
self.path = path
|
|
107
|
+
|
|
108
|
+
# -- construction ---------------------------------------------------
|
|
109
|
+
@classmethod
|
|
110
|
+
def of(cls, value: Any) -> "Doc":
|
|
111
|
+
return cls(build_node(value))
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def from_format(cls, name: str, text: str) -> "Doc":
|
|
115
|
+
from .registry import get_format
|
|
116
|
+
return cls(get_format(name).read(text))
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def from_json(cls, text: str, *, schema: Optional["Schema"] = None) -> "Doc":
|
|
120
|
+
from .formats import read_json
|
|
121
|
+
return cls(read_json(text, schema=schema))
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def from_yaml(cls, text: str, *, schema: Optional["Schema"] = None) -> "Doc":
|
|
125
|
+
from .formats import read_yaml
|
|
126
|
+
return cls(read_yaml(text, schema=schema))
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def from_toml(cls, text: str, *, schema: Optional["Schema"] = None) -> "Doc":
|
|
130
|
+
from .formats import read_toml
|
|
131
|
+
return cls(read_toml(text, schema=schema))
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def from_xml(cls, text: str, *, schema: Optional["Schema"] = None) -> "Doc":
|
|
135
|
+
from .formats import read_xml
|
|
136
|
+
return cls(read_xml(text, schema=schema))
|
|
137
|
+
|
|
138
|
+
@classmethod
|
|
139
|
+
def from_oml(cls, text: str, *, schema: Optional["Schema"] = None) -> "Doc":
|
|
140
|
+
from .oml import read_oml
|
|
141
|
+
return cls(read_oml(text, schema=schema))
|
|
142
|
+
|
|
143
|
+
# -- shape ----------------------------------------------------------
|
|
144
|
+
@property
|
|
145
|
+
def is_leaf(self) -> bool:
|
|
146
|
+
return not isinstance(self._node, list)
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def value(self) -> Any:
|
|
150
|
+
if isinstance(self._node, list):
|
|
151
|
+
raise DocumentError(f"{self.path}: not a leaf; use edges()")
|
|
152
|
+
return self._node
|
|
153
|
+
|
|
154
|
+
def edges(self) -> List[Tuple[str, "Doc"]]:
|
|
155
|
+
if not isinstance(self._node, list):
|
|
156
|
+
raise DocumentError(f"{self.path}: a leaf has no edges")
|
|
157
|
+
out, counts = [], {}
|
|
158
|
+
for label, child in self._node:
|
|
159
|
+
i = counts.get(label, 0)
|
|
160
|
+
counts[label] = i + 1
|
|
161
|
+
cp = f"{self.path}.{label}" if i == 0 else f"{self.path}.{label}[{i}]"
|
|
162
|
+
out.append((label, Doc(child, cp)))
|
|
163
|
+
return out
|
|
164
|
+
|
|
165
|
+
def labels(self) -> List[str]:
|
|
166
|
+
seen, out = set(), []
|
|
167
|
+
for label, _ in self._iter():
|
|
168
|
+
if label not in seen:
|
|
169
|
+
seen.add(label)
|
|
170
|
+
out.append(label)
|
|
171
|
+
return out
|
|
172
|
+
|
|
173
|
+
def get(self, label: str) -> List["Doc"]:
|
|
174
|
+
return [c for lbl, c in self.edges() if lbl == label]
|
|
175
|
+
|
|
176
|
+
def get_one(self, label: str) -> "Doc":
|
|
177
|
+
cs = self.get(label)
|
|
178
|
+
if len(cs) != 1:
|
|
179
|
+
raise DocumentError(
|
|
180
|
+
f"{self.path}: expected exactly one {label!r}, found {len(cs)}")
|
|
181
|
+
return cs[0]
|
|
182
|
+
|
|
183
|
+
def count(self, label: str) -> int:
|
|
184
|
+
return sum(1 for lbl, _ in self._iter() if lbl == label)
|
|
185
|
+
|
|
186
|
+
def _iter(self) -> Iterator[Tuple[str, Any]]:
|
|
187
|
+
if isinstance(self._node, list):
|
|
188
|
+
yield from self._node
|
|
189
|
+
|
|
190
|
+
def child(self, label: str) -> "Doc":
|
|
191
|
+
"""A cursor to the single child under ``label`` (editable if internal)."""
|
|
192
|
+
return self.get_one(label)
|
|
193
|
+
|
|
194
|
+
# -- editing (mutates the underlying edge list) ---------------------
|
|
195
|
+
def add(self, label: str, value: Any) -> "Doc":
|
|
196
|
+
"""Append an edge ``(label, value)``. A repeated label is how an array
|
|
197
|
+
grows. Returns ``self`` for chaining."""
|
|
198
|
+
self._require_internal("add")
|
|
199
|
+
self._node.append((label, build_node(value, f"{self.path}.{label}")))
|
|
200
|
+
return self
|
|
201
|
+
|
|
202
|
+
def remove(self, label: str) -> "Doc":
|
|
203
|
+
"""Remove every edge under ``label``."""
|
|
204
|
+
self._require_internal("remove")
|
|
205
|
+
self._node[:] = [(lbl, c) for lbl, c in self._node if lbl != label]
|
|
206
|
+
return self
|
|
207
|
+
|
|
208
|
+
def set(self, label: str, value: Any) -> "Doc":
|
|
209
|
+
"""Replace the (single) child under ``label``, or add it if absent."""
|
|
210
|
+
self._require_internal("set")
|
|
211
|
+
new = build_node(value, f"{self.path}.{label}")
|
|
212
|
+
for i, (lbl, _) in enumerate(self._node):
|
|
213
|
+
if lbl == label:
|
|
214
|
+
self._node[i] = (label, new)
|
|
215
|
+
return self
|
|
216
|
+
self._node.append((label, new))
|
|
217
|
+
return self
|
|
218
|
+
|
|
219
|
+
def _require_internal(self, op: str) -> None:
|
|
220
|
+
if not isinstance(self._node, list):
|
|
221
|
+
raise DocumentError(f"{self.path}: cannot {op} on a leaf")
|
|
222
|
+
|
|
223
|
+
# -- export ---------------------------------------------------------
|
|
224
|
+
def to_data(self) -> Any:
|
|
225
|
+
return _copy(self._node)
|
|
226
|
+
|
|
227
|
+
def to_grouped(self) -> Any:
|
|
228
|
+
"""A JSON-shaped projection: same-label edges grouped into a list.
|
|
229
|
+
|
|
230
|
+
A label seen once stays a single value; a label seen more than once
|
|
231
|
+
becomes a list (the schema-less fallback of the count-1 rule, see
|
|
232
|
+
``docs/design/model.md`` §10)."""
|
|
233
|
+
return _grouped(self._node)
|
|
234
|
+
|
|
235
|
+
def to_json(self, **o: Any) -> str:
|
|
236
|
+
from .formats import write_json
|
|
237
|
+
return write_json(self._node, **o)
|
|
238
|
+
|
|
239
|
+
def to_yaml(self, **o: Any) -> str:
|
|
240
|
+
from .formats import write_yaml
|
|
241
|
+
return write_yaml(self._node, **o)
|
|
242
|
+
|
|
243
|
+
def to_toml(self, **o: Any) -> str:
|
|
244
|
+
from .formats import write_toml
|
|
245
|
+
return write_toml(self._node, **o)
|
|
246
|
+
|
|
247
|
+
def to_xml(self, **o: Any) -> str:
|
|
248
|
+
from .formats import write_xml
|
|
249
|
+
return write_xml(self._node, **o)
|
|
250
|
+
|
|
251
|
+
def to_oml(self, **o: Any) -> str:
|
|
252
|
+
from .oml import write_oml
|
|
253
|
+
return write_oml(self._node, **o)
|
|
254
|
+
|
|
255
|
+
def to_format(self, name: str, **o: Any) -> str:
|
|
256
|
+
from .registry import get_format
|
|
257
|
+
return get_format(name).write(self._node, **o)
|
|
258
|
+
|
|
259
|
+
def check_json(self) -> "WriteReport":
|
|
260
|
+
from .formats import check_json
|
|
261
|
+
return check_json(self._node)
|
|
262
|
+
|
|
263
|
+
def check_yaml(self) -> "WriteReport":
|
|
264
|
+
from .formats import check_yaml
|
|
265
|
+
return check_yaml(self._node)
|
|
266
|
+
|
|
267
|
+
def check_toml(self) -> "WriteReport":
|
|
268
|
+
from .formats import check_toml
|
|
269
|
+
return check_toml(self._node)
|
|
270
|
+
|
|
271
|
+
def check_xml(self) -> "WriteReport":
|
|
272
|
+
from .formats import check_xml
|
|
273
|
+
return check_xml(self._node)
|
|
274
|
+
|
|
275
|
+
def check_oml(self) -> "WriteReport":
|
|
276
|
+
from .oml import check_oml
|
|
277
|
+
return check_oml(self._node)
|
|
278
|
+
|
|
279
|
+
def check_format(self, name: str) -> "WriteReport":
|
|
280
|
+
"""Simulate writing to format ``name`` and return the adjustment
|
|
281
|
+
report, without producing output. Requires the registered
|
|
282
|
+
:class:`~omnist.canonical.registry.Format` to provide a ``check``
|
|
283
|
+
callable (the four built-ins do; a custom plugin may not)."""
|
|
284
|
+
from .registry import get_format
|
|
285
|
+
fmt = get_format(name)
|
|
286
|
+
if fmt.check is None:
|
|
287
|
+
raise DocumentError(
|
|
288
|
+
f"format {name!r} has no check() -- cannot simulate a write")
|
|
289
|
+
return fmt.check(self._node)
|
|
290
|
+
|
|
291
|
+
def validate(self, schema):
|
|
292
|
+
return schema.validate(self)
|
|
293
|
+
|
|
294
|
+
# -- dunders --------------------------------------------------------
|
|
295
|
+
def __eq__(self, other: Any) -> bool:
|
|
296
|
+
if isinstance(other, Doc):
|
|
297
|
+
return self._node == other._node
|
|
298
|
+
try:
|
|
299
|
+
return self._node == build_node(other)
|
|
300
|
+
except DocumentError:
|
|
301
|
+
return NotImplemented
|
|
302
|
+
|
|
303
|
+
def __repr__(self) -> str:
|
|
304
|
+
return f"Doc({'leaf' if self.is_leaf else 'node'}: {self._node!r})"
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def doc(value: Any) -> Doc:
|
|
308
|
+
"""Build a :class:`Doc` from a plain Python value."""
|
|
309
|
+
return value if isinstance(value, Doc) else Doc.of(value)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _copy(node: Any) -> Any:
|
|
313
|
+
if isinstance(node, list):
|
|
314
|
+
return [(label, _copy(child)) for label, child in node]
|
|
315
|
+
return node
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def _grouped(node: Any) -> Any:
|
|
319
|
+
if not isinstance(node, list):
|
|
320
|
+
return node
|
|
321
|
+
counts: dict = {}
|
|
322
|
+
for label, _ in node:
|
|
323
|
+
counts[label] = counts.get(label, 0) + 1
|
|
324
|
+
out: dict = {}
|
|
325
|
+
for label, child in node:
|
|
326
|
+
g = _grouped(child)
|
|
327
|
+
if counts[label] > 1:
|
|
328
|
+
out.setdefault(label, []).append(g)
|
|
329
|
+
else:
|
|
330
|
+
out[label] = g
|
|
331
|
+
return out
|