json-schema-utils 0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_schema_utils-0.8.dist-info/METADATA +119 -0
- json_schema_utils-0.8.dist-info/RECORD +15 -0
- json_schema_utils-0.8.dist-info/WHEEL +5 -0
- json_schema_utils-0.8.dist-info/entry_points.txt +7 -0
- json_schema_utils-0.8.dist-info/licenses/LICENSE +1 -0
- json_schema_utils-0.8.dist-info/top_level.txt +1 -0
- jsutils/__init__.py +5 -0
- jsutils/convert.py +934 -0
- jsutils/inline.py +206 -0
- jsutils/recurse.py +90 -0
- jsutils/schemas.py +151 -0
- jsutils/scripts.py +396 -0
- jsutils/simplify.py +580 -0
- jsutils/stats.py +1310 -0
- jsutils/utils.py +44 -0
jsutils/inline.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
import copy
|
|
3
|
+
from urllib.parse import urlsplit
|
|
4
|
+
|
|
5
|
+
from .utils import JsonSchema, JSUError, log
|
|
6
|
+
from .schemas import Schemas
|
|
7
|
+
from .recurse import recurseSchema
|
|
8
|
+
|
|
9
|
+
# TODO fix recursion handling
|
|
10
|
+
|
|
11
|
+
def mergeProperty(schema: JsonSchema, prop: str, value: Any) -> JsonSchema:
|
|
12
|
+
"""Merge an additional property into en existing schema.
|
|
13
|
+
|
|
14
|
+
Note: this is in a best effort basis, on failure a backup plan is required.
|
|
15
|
+
"""
|
|
16
|
+
# handle boolean schema
|
|
17
|
+
if isinstance(schema, bool):
|
|
18
|
+
return { prop: value } if schema else False
|
|
19
|
+
assert isinstance(schema, dict) # pyright helper
|
|
20
|
+
|
|
21
|
+
# log.debug(f"merging {prop} in {schema}")
|
|
22
|
+
|
|
23
|
+
# then object
|
|
24
|
+
if prop in ("$defs"): # ignore
|
|
25
|
+
pass
|
|
26
|
+
elif prop == "enum":
|
|
27
|
+
if prop in schema:
|
|
28
|
+
# intersect in order
|
|
29
|
+
vals = []
|
|
30
|
+
for v in schema[prop]: # pyright: ignore
|
|
31
|
+
if v in value:
|
|
32
|
+
vals.append(v)
|
|
33
|
+
if len(vals) == 0:
|
|
34
|
+
log.warning("incompatible enum/enum makes schema unsatisfiable")
|
|
35
|
+
schema = False
|
|
36
|
+
else:
|
|
37
|
+
schema[prop] = vals
|
|
38
|
+
elif "const" in schema:
|
|
39
|
+
if schema["const"] in value:
|
|
40
|
+
pass
|
|
41
|
+
else:
|
|
42
|
+
log.warning("incompatible enum/const makes schema unsatisfiable")
|
|
43
|
+
schema = False
|
|
44
|
+
else:
|
|
45
|
+
schema[prop] = value
|
|
46
|
+
elif prop == "const":
|
|
47
|
+
if prop in schema:
|
|
48
|
+
if schema[prop] == value:
|
|
49
|
+
pass
|
|
50
|
+
else:
|
|
51
|
+
log.warning("incompatible const/const makes schema unsatisfiable")
|
|
52
|
+
schema = False
|
|
53
|
+
elif "enum" in schema:
|
|
54
|
+
schema[prop] = value
|
|
55
|
+
if value in schema["enum"]:
|
|
56
|
+
del schema["enum"]
|
|
57
|
+
else:
|
|
58
|
+
log.warning("incompatible const/enum makes schema unsatisfiable")
|
|
59
|
+
schema = False
|
|
60
|
+
else:
|
|
61
|
+
schema[prop] = value
|
|
62
|
+
elif prop == "required":
|
|
63
|
+
assert isinstance(value, list)
|
|
64
|
+
if prop in schema:
|
|
65
|
+
# append in order and without duplicates
|
|
66
|
+
for p in value:
|
|
67
|
+
if p not in schema["required"]:
|
|
68
|
+
schema["required"].append(p) # pyright: ignore
|
|
69
|
+
else:
|
|
70
|
+
schema["required"] = value
|
|
71
|
+
elif prop == "properties":
|
|
72
|
+
if prop in schema:
|
|
73
|
+
props = schema[prop]
|
|
74
|
+
assert isinstance(value, dict) and isinstance(props, dict)
|
|
75
|
+
for p, s in value.items():
|
|
76
|
+
if p in props:
|
|
77
|
+
if props[p] == s or s is True:
|
|
78
|
+
pass
|
|
79
|
+
else:
|
|
80
|
+
props[p] = { "allOf": [ props[p], s ] }
|
|
81
|
+
else:
|
|
82
|
+
props[p] = s
|
|
83
|
+
else:
|
|
84
|
+
schema[prop] = value
|
|
85
|
+
elif prop in ("allOf", "anyOf", "oneOf"):
|
|
86
|
+
assert isinstance(value, list)
|
|
87
|
+
if prop in schema:
|
|
88
|
+
schema[prop].extend(value) # pyright: ignore
|
|
89
|
+
else:
|
|
90
|
+
schema[prop] = value
|
|
91
|
+
elif prop in ("title", "$comment"):
|
|
92
|
+
# best effort
|
|
93
|
+
if prop not in schema:
|
|
94
|
+
schema[prop] = value
|
|
95
|
+
# FIXME: what about "else"?
|
|
96
|
+
# TODO extend list of supported validations?
|
|
97
|
+
elif prop in ("type", "$ref", "pattern",
|
|
98
|
+
"additionalProperties", "additionalItems", "items",
|
|
99
|
+
"minLength", "maxLength", "minProperties", "maxProperties",
|
|
100
|
+
"minimum", "maximum", "exclusiveMinimum", "exclusiveMaximum",
|
|
101
|
+
"minItems", "maxItems", "minContains", "maxContains", "multipleOf",
|
|
102
|
+
"uniqueItems"):
|
|
103
|
+
# allow identical values only (for now)
|
|
104
|
+
if prop in schema:
|
|
105
|
+
if schema[prop] == value:
|
|
106
|
+
pass
|
|
107
|
+
else:
|
|
108
|
+
raise JSUError(f"cannot merge prop {prop} distinct values")
|
|
109
|
+
else:
|
|
110
|
+
schema[prop] = value
|
|
111
|
+
else:
|
|
112
|
+
raise JSUError(f"merging of prop {prop} is not supported (yet)")
|
|
113
|
+
|
|
114
|
+
# log.debug(f"result: {schema}")
|
|
115
|
+
return schema
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# properties keept at the root while merging
|
|
119
|
+
_KEEP_PROPS = {
|
|
120
|
+
"$schema", "$id", "$comment", "title", "description", "examples",
|
|
121
|
+
# containers
|
|
122
|
+
"$defs", "oneOf", "anyOf", "allOf",
|
|
123
|
+
# special cases
|
|
124
|
+
"unevaluatedProperties", "unevaluatedItems",
|
|
125
|
+
# older version compatibility?
|
|
126
|
+
"definitions",
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def mergeSchemas(schema: JsonSchema, refschema: JsonSchema) -> JsonSchema:
|
|
131
|
+
"""Merge two schemas."""
|
|
132
|
+
|
|
133
|
+
if isinstance(refschema, bool):
|
|
134
|
+
return schema if refschema else False
|
|
135
|
+
elif isinstance(schema, bool):
|
|
136
|
+
return refschema if schema else False
|
|
137
|
+
assert isinstance(schema, dict) and isinstance(refschema, dict)
|
|
138
|
+
|
|
139
|
+
saved = schema
|
|
140
|
+
schema = copy.deepcopy(schema)
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
# best effort
|
|
144
|
+
for p, v in refschema.items():
|
|
145
|
+
schema = mergeProperty(schema, p, v)
|
|
146
|
+
|
|
147
|
+
# log.debug(f"merged: {schema}")
|
|
148
|
+
|
|
149
|
+
except JSUError as e:
|
|
150
|
+
# backup merge with allOf
|
|
151
|
+
log.warning(f"merge error: {e}")
|
|
152
|
+
log.info("merging schemas with allOf")
|
|
153
|
+
schema = saved
|
|
154
|
+
separate = {}
|
|
155
|
+
for p in list(schema.keys()):
|
|
156
|
+
if p not in _KEEP_PROPS:
|
|
157
|
+
separate[p] = schema[p]
|
|
158
|
+
del schema[p]
|
|
159
|
+
if "allOf" not in schema:
|
|
160
|
+
schema["allOf"] = []
|
|
161
|
+
if len(separate) > 0:
|
|
162
|
+
schema["allOf"].append(separate) # pyright: ignore
|
|
163
|
+
if len(refschema) > 0:
|
|
164
|
+
schema["allOf"].append(refschema) # pyright: ignore
|
|
165
|
+
|
|
166
|
+
return schema
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _url(ref):
|
|
170
|
+
"""Extract base URL from full reference."""
|
|
171
|
+
u = urlsplit(ref)
|
|
172
|
+
return u.scheme + "://" + u.netloc
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def inlineRefs(schema: JsonSchema, url: str, schemas: Schemas) -> JsonSchema:
|
|
176
|
+
"""Recursively inline $ref in schema, which is modified."""
|
|
177
|
+
|
|
178
|
+
def rwtRef(schema: JsonSchema, path: list[str]) -> JsonSchema:
|
|
179
|
+
|
|
180
|
+
# recursion avoidance (FIXME insufficient)
|
|
181
|
+
spath = "/".join(path)
|
|
182
|
+
skips = {url + "#" + spath, url + "#/" + spath, url + "#./" + spath}
|
|
183
|
+
|
|
184
|
+
while isinstance(schema, dict) and "$ref" in schema:
|
|
185
|
+
ref = schema["$ref"]
|
|
186
|
+
assert isinstance(ref, str)
|
|
187
|
+
|
|
188
|
+
# (direct) recursion detection
|
|
189
|
+
if ref in skips:
|
|
190
|
+
log.info(f"skipping recursive ref: {ref}")
|
|
191
|
+
break
|
|
192
|
+
|
|
193
|
+
# actual substitution
|
|
194
|
+
sub = schemas.schema(url, ref)
|
|
195
|
+
del schema["$ref"]
|
|
196
|
+
if isinstance(sub, dict):
|
|
197
|
+
schema = mergeSchemas(schema, sub)
|
|
198
|
+
else:
|
|
199
|
+
assert isinstance(sub, bool)
|
|
200
|
+
if not sub:
|
|
201
|
+
schema = False
|
|
202
|
+
# else True is coldly ignored
|
|
203
|
+
|
|
204
|
+
return schema
|
|
205
|
+
|
|
206
|
+
return recurseSchema(schema, url, rwt=rwtRef)
|
jsutils/recurse.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from .utils import JsonSchema, log, FilterFun, RewriteFun
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _recurseSchema(
|
|
5
|
+
schema: JsonSchema,
|
|
6
|
+
url: str,
|
|
7
|
+
path: list[str],
|
|
8
|
+
flt: FilterFun,
|
|
9
|
+
rwt: RewriteFun) -> JsonSchema:
|
|
10
|
+
|
|
11
|
+
log.debug(f"recuring at {path}")
|
|
12
|
+
|
|
13
|
+
# skip recursion
|
|
14
|
+
if not flt(schema, path):
|
|
15
|
+
return schema
|
|
16
|
+
|
|
17
|
+
if isinstance(schema, bool):
|
|
18
|
+
return rwt(schema, path)
|
|
19
|
+
assert isinstance(schema, dict), f"schema must be a dict: {type(schema).__name__}"
|
|
20
|
+
|
|
21
|
+
# list of schemas
|
|
22
|
+
for prop in ("allOf", "oneOf", "anyOf", "prefixItems"):
|
|
23
|
+
if prop in schema:
|
|
24
|
+
subs = schema[prop]
|
|
25
|
+
assert isinstance(subs, list)
|
|
26
|
+
schema[prop] = [ _recurseSchema(s, url, path + [prop, str(i)], flt, rwt) # type: ignore
|
|
27
|
+
for i, s in enumerate(subs) ]
|
|
28
|
+
|
|
29
|
+
# direct schemas
|
|
30
|
+
for prop in ("additionalProperties", "unevaluatedProperties", "items",
|
|
31
|
+
"not", "if", "then", "else", "contains", "propertyNames",
|
|
32
|
+
"unevaluatedItems"):
|
|
33
|
+
if prop in schema:
|
|
34
|
+
# handle old items ~ prefixItems
|
|
35
|
+
if prop == "items" and isinstance(schema["items"], list):
|
|
36
|
+
schema[prop] = [ _recurseSchema(s, url, path + [prop, str(i)], flt, rwt)
|
|
37
|
+
for i, s in enumerate(schema[prop]) ] # type: ignore
|
|
38
|
+
else: # standard case
|
|
39
|
+
schema[prop] = _recurseSchema(schema[prop], # type: ignore
|
|
40
|
+
url, path + [prop], flt, rwt)
|
|
41
|
+
|
|
42
|
+
# handle values as schemas
|
|
43
|
+
def recValue(schema, *propnames):
|
|
44
|
+
for prop in propnames:
|
|
45
|
+
if prop in schema:
|
|
46
|
+
props = schema[prop]
|
|
47
|
+
assert isinstance(props, dict)
|
|
48
|
+
for p, s in list(props.items()):
|
|
49
|
+
props[p] = _recurseSchema(s, url, path + [prop, p], flt, rwt)
|
|
50
|
+
|
|
51
|
+
recValue(schema, "properties", "dependentSchemas", "patternProperties")
|
|
52
|
+
|
|
53
|
+
# apply rwt
|
|
54
|
+
schema = rwt(schema, path)
|
|
55
|
+
assert isinstance(schema, (bool, dict))
|
|
56
|
+
|
|
57
|
+
# keep last?!
|
|
58
|
+
if isinstance(schema, dict):
|
|
59
|
+
recValue(schema, "$defs", "definitions")
|
|
60
|
+
|
|
61
|
+
return schema
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def recurseSchema(schema: JsonSchema, url: str,
|
|
65
|
+
flt: FilterFun = lambda s, p: True,
|
|
66
|
+
rwt: RewriteFun = lambda s, p: s) -> JsonSchema:
|
|
67
|
+
"""Generic recursion on a JSON Schema.
|
|
68
|
+
|
|
69
|
+
:param schema: schema to consider.
|
|
70
|
+
:param url: url of schema.
|
|
71
|
+
:param flt: filter (top-down) function, whether to keep recursing.
|
|
72
|
+
:param rwt: rewrite (bottom-up) function.
|
|
73
|
+
"""
|
|
74
|
+
return _recurseSchema(schema, url, [], flt, rwt)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def hasDirectRef(schema, url):
|
|
78
|
+
"""Tell whether schema has a $ref."""
|
|
79
|
+
|
|
80
|
+
some_ref: bool = False
|
|
81
|
+
|
|
82
|
+
def fltHasRef(schema: JsonSchema, path: list[str]) -> bool:
|
|
83
|
+
nonlocal some_ref
|
|
84
|
+
if "$defs" not in path and isinstance(schema, dict) and "$ref" in schema:
|
|
85
|
+
some_ref = True
|
|
86
|
+
return not some_ref
|
|
87
|
+
|
|
88
|
+
recurseSchema(schema, url, flt=fltHasRef)
|
|
89
|
+
|
|
90
|
+
return some_ref
|
jsutils/schemas.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from typing import Callable
|
|
2
|
+
import os.path
|
|
3
|
+
from urllib.parse import urlsplit
|
|
4
|
+
import json
|
|
5
|
+
import requests
|
|
6
|
+
from .utils import JsonSchema, JSUError, log
|
|
7
|
+
from .recurse import recurseSchema
|
|
8
|
+
|
|
9
|
+
ProcessFun = Callable[[JsonSchema, str], JsonSchema]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _full_url(url: str, ref: str) -> str:
|
|
13
|
+
"""Build full normalized URL from a reference.
|
|
14
|
+
|
|
15
|
+
:param url: the url of the schema.
|
|
16
|
+
:param ref: the reference (relative) url.
|
|
17
|
+
"""
|
|
18
|
+
if "#" in ref:
|
|
19
|
+
lurl, lpath = ref.split("#", 1)
|
|
20
|
+
else:
|
|
21
|
+
lurl, lpath = ref, ""
|
|
22
|
+
# build the full url
|
|
23
|
+
if lurl == "":
|
|
24
|
+
return url + "#" + lpath
|
|
25
|
+
elif lurl.startswith("/"):
|
|
26
|
+
u = urlsplit(url)
|
|
27
|
+
return u.scheme + "://" + u.netloc + lurl + "#" + lpath
|
|
28
|
+
else:
|
|
29
|
+
return lurl + "#" + lpath
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _fullURL(schema: JsonSchema, url: str) -> JsonSchema:
|
|
33
|
+
"""Replace relative references with absolute references.
|
|
34
|
+
|
|
35
|
+
:param schema: schema to consider.
|
|
36
|
+
:param url: the url of the schema.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
# we need full references to avoid ambiguities on inline!
|
|
40
|
+
def fullref(schema: JsonSchema, _path: list[str]) -> JsonSchema:
|
|
41
|
+
if isinstance(schema, dict) and "$ref" in schema:
|
|
42
|
+
ref = schema["$ref"]
|
|
43
|
+
assert isinstance(ref, str)
|
|
44
|
+
nref = _full_url(url, ref)
|
|
45
|
+
log.debug(f"updating {ref} in {url}: {nref}")
|
|
46
|
+
schema["$ref"] = nref
|
|
47
|
+
return schema
|
|
48
|
+
|
|
49
|
+
return recurseSchema(schema, url, rwt=fullref)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Schemas:
|
|
53
|
+
"""Hold a set of identified schemas and sub-schemas."""
|
|
54
|
+
|
|
55
|
+
def __init__(self):
|
|
56
|
+
# url mapping for local storage: https://schema.psl.eu -> .
|
|
57
|
+
self._urlmap: dict[str, str] = {}
|
|
58
|
+
# cache of schema references
|
|
59
|
+
self._schemas: dict[str, JsonSchema] = {}
|
|
60
|
+
# additional processing on store
|
|
61
|
+
self._process: list[ProcessFun] = [_fullURL]
|
|
62
|
+
|
|
63
|
+
def addProcess(self, process: ProcessFun):
|
|
64
|
+
self._process.append(process)
|
|
65
|
+
|
|
66
|
+
def addMap(self, url: str, target: str):
|
|
67
|
+
"""Add local mapping for URLs."""
|
|
68
|
+
self._urlmap[url] = target
|
|
69
|
+
|
|
70
|
+
def store(self, url: str, schema: JsonSchema):
|
|
71
|
+
"""Store schema associated to URL."""
|
|
72
|
+
log.info(f"adding schema {url}")
|
|
73
|
+
|
|
74
|
+
if isinstance(schema, dict):
|
|
75
|
+
for prop in ("$id", "id"):
|
|
76
|
+
if prop in schema:
|
|
77
|
+
if url != schema[prop]:
|
|
78
|
+
log.warning(f"{prop}={schema[prop]} / {url}")
|
|
79
|
+
# del schema["$id"] # FIXME?
|
|
80
|
+
|
|
81
|
+
# NOTE intermediate to avoid an infinite recursion
|
|
82
|
+
self._schemas[url] = schema
|
|
83
|
+
|
|
84
|
+
# process schema through all filters
|
|
85
|
+
for process in self._process:
|
|
86
|
+
schema = process(schema, url)
|
|
87
|
+
|
|
88
|
+
# store final processed version
|
|
89
|
+
self._schemas[url] = schema
|
|
90
|
+
|
|
91
|
+
def _load(self, url: str):
|
|
92
|
+
"""Load schema from URL if needed."""
|
|
93
|
+
log.info(f"loading schema: {url}")
|
|
94
|
+
|
|
95
|
+
assert "#" not in url
|
|
96
|
+
|
|
97
|
+
# rewrite url for local search
|
|
98
|
+
path = url
|
|
99
|
+
for u, t in self._urlmap.items():
|
|
100
|
+
if path.startswith(u):
|
|
101
|
+
path = t + path[len(u):]
|
|
102
|
+
break
|
|
103
|
+
|
|
104
|
+
# FIXME what about actual http download?
|
|
105
|
+
schema: JsonSchema|None
|
|
106
|
+
if path.startswith("http://") or path.startswith("https://"):
|
|
107
|
+
schema = requests.get(path).json # type: ignore
|
|
108
|
+
else:
|
|
109
|
+
schema = None
|
|
110
|
+
for suffix in ("", ".json", ".schema.json"):
|
|
111
|
+
fn = f"{path}{suffix}"
|
|
112
|
+
if os.path.isfile(fn):
|
|
113
|
+
log.debug(f"loading file: {fn}")
|
|
114
|
+
schema = json.load(open(fn))
|
|
115
|
+
break
|
|
116
|
+
|
|
117
|
+
if schema is None:
|
|
118
|
+
raise JSUError(f"schema {url} not found")
|
|
119
|
+
|
|
120
|
+
self.store(url, schema)
|
|
121
|
+
|
|
122
|
+
def _resolve(self, schema: JsonSchema, lpath: str) -> JsonSchema:
|
|
123
|
+
"""Extract sub-schema from schema following path.
|
|
124
|
+
|
|
125
|
+
:param schema: schema to consider.
|
|
126
|
+
:param path: path to consider.
|
|
127
|
+
|
|
128
|
+
FIXME this does not handle URL escaping?
|
|
129
|
+
"""
|
|
130
|
+
for p in lpath.split("/"):
|
|
131
|
+
if p in (".", ""):
|
|
132
|
+
pass
|
|
133
|
+
else:
|
|
134
|
+
assert isinstance(schema, dict) and p in schema
|
|
135
|
+
schema = schema[p] # type: ignore
|
|
136
|
+
|
|
137
|
+
return schema
|
|
138
|
+
|
|
139
|
+
def schema(self, url: str, ref: str) -> JsonSchema:
|
|
140
|
+
"""Resolve schema reference.
|
|
141
|
+
|
|
142
|
+
:param url: url of schema.
|
|
143
|
+
:param ref: reference to resolve in url.
|
|
144
|
+
"""
|
|
145
|
+
assert isinstance(ref, str) and len(ref) > 0
|
|
146
|
+
fref = _full_url(url, ref)
|
|
147
|
+
assert "#" in fref
|
|
148
|
+
curl, path = fref.split("#", 1)
|
|
149
|
+
if curl not in self._schemas:
|
|
150
|
+
self._load(curl)
|
|
151
|
+
return self._resolve(self._schemas[curl], path)
|