json-schema-utils 0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
jsutils/inline.py ADDED
@@ -0,0 +1,206 @@
1
+ from typing import Any
2
+ import copy
3
+ from urllib.parse import urlsplit
4
+
5
+ from .utils import JsonSchema, JSUError, log
6
+ from .schemas import Schemas
7
+ from .recurse import recurseSchema
8
+
9
+ # TODO fix recursion handling
10
+
11
+ def mergeProperty(schema: JsonSchema, prop: str, value: Any) -> JsonSchema:
12
+ """Merge an additional property into en existing schema.
13
+
14
+ Note: this is in a best effort basis, on failure a backup plan is required.
15
+ """
16
+ # handle boolean schema
17
+ if isinstance(schema, bool):
18
+ return { prop: value } if schema else False
19
+ assert isinstance(schema, dict) # pyright helper
20
+
21
+ # log.debug(f"merging {prop} in {schema}")
22
+
23
+ # then object
24
+ if prop in ("$defs"): # ignore
25
+ pass
26
+ elif prop == "enum":
27
+ if prop in schema:
28
+ # intersect in order
29
+ vals = []
30
+ for v in schema[prop]: # pyright: ignore
31
+ if v in value:
32
+ vals.append(v)
33
+ if len(vals) == 0:
34
+ log.warning("incompatible enum/enum makes schema unsatisfiable")
35
+ schema = False
36
+ else:
37
+ schema[prop] = vals
38
+ elif "const" in schema:
39
+ if schema["const"] in value:
40
+ pass
41
+ else:
42
+ log.warning("incompatible enum/const makes schema unsatisfiable")
43
+ schema = False
44
+ else:
45
+ schema[prop] = value
46
+ elif prop == "const":
47
+ if prop in schema:
48
+ if schema[prop] == value:
49
+ pass
50
+ else:
51
+ log.warning("incompatible const/const makes schema unsatisfiable")
52
+ schema = False
53
+ elif "enum" in schema:
54
+ schema[prop] = value
55
+ if value in schema["enum"]:
56
+ del schema["enum"]
57
+ else:
58
+ log.warning("incompatible const/enum makes schema unsatisfiable")
59
+ schema = False
60
+ else:
61
+ schema[prop] = value
62
+ elif prop == "required":
63
+ assert isinstance(value, list)
64
+ if prop in schema:
65
+ # append in order and without duplicates
66
+ for p in value:
67
+ if p not in schema["required"]:
68
+ schema["required"].append(p) # pyright: ignore
69
+ else:
70
+ schema["required"] = value
71
+ elif prop == "properties":
72
+ if prop in schema:
73
+ props = schema[prop]
74
+ assert isinstance(value, dict) and isinstance(props, dict)
75
+ for p, s in value.items():
76
+ if p in props:
77
+ if props[p] == s or s is True:
78
+ pass
79
+ else:
80
+ props[p] = { "allOf": [ props[p], s ] }
81
+ else:
82
+ props[p] = s
83
+ else:
84
+ schema[prop] = value
85
+ elif prop in ("allOf", "anyOf", "oneOf"):
86
+ assert isinstance(value, list)
87
+ if prop in schema:
88
+ schema[prop].extend(value) # pyright: ignore
89
+ else:
90
+ schema[prop] = value
91
+ elif prop in ("title", "$comment"):
92
+ # best effort
93
+ if prop not in schema:
94
+ schema[prop] = value
95
+ # FIXME: what about "else"?
96
+ # TODO extend list of supported validations?
97
+ elif prop in ("type", "$ref", "pattern",
98
+ "additionalProperties", "additionalItems", "items",
99
+ "minLength", "maxLength", "minProperties", "maxProperties",
100
+ "minimum", "maximum", "exclusiveMinimum", "exclusiveMaximum",
101
+ "minItems", "maxItems", "minContains", "maxContains", "multipleOf",
102
+ "uniqueItems"):
103
+ # allow identical values only (for now)
104
+ if prop in schema:
105
+ if schema[prop] == value:
106
+ pass
107
+ else:
108
+ raise JSUError(f"cannot merge prop {prop} distinct values")
109
+ else:
110
+ schema[prop] = value
111
+ else:
112
+ raise JSUError(f"merging of prop {prop} is not supported (yet)")
113
+
114
+ # log.debug(f"result: {schema}")
115
+ return schema
116
+
117
+
118
+ # properties keept at the root while merging
119
+ _KEEP_PROPS = {
120
+ "$schema", "$id", "$comment", "title", "description", "examples",
121
+ # containers
122
+ "$defs", "oneOf", "anyOf", "allOf",
123
+ # special cases
124
+ "unevaluatedProperties", "unevaluatedItems",
125
+ # older version compatibility?
126
+ "definitions",
127
+ }
128
+
129
+
130
+ def mergeSchemas(schema: JsonSchema, refschema: JsonSchema) -> JsonSchema:
131
+ """Merge two schemas."""
132
+
133
+ if isinstance(refschema, bool):
134
+ return schema if refschema else False
135
+ elif isinstance(schema, bool):
136
+ return refschema if schema else False
137
+ assert isinstance(schema, dict) and isinstance(refschema, dict)
138
+
139
+ saved = schema
140
+ schema = copy.deepcopy(schema)
141
+
142
+ try:
143
+ # best effort
144
+ for p, v in refschema.items():
145
+ schema = mergeProperty(schema, p, v)
146
+
147
+ # log.debug(f"merged: {schema}")
148
+
149
+ except JSUError as e:
150
+ # backup merge with allOf
151
+ log.warning(f"merge error: {e}")
152
+ log.info("merging schemas with allOf")
153
+ schema = saved
154
+ separate = {}
155
+ for p in list(schema.keys()):
156
+ if p not in _KEEP_PROPS:
157
+ separate[p] = schema[p]
158
+ del schema[p]
159
+ if "allOf" not in schema:
160
+ schema["allOf"] = []
161
+ if len(separate) > 0:
162
+ schema["allOf"].append(separate) # pyright: ignore
163
+ if len(refschema) > 0:
164
+ schema["allOf"].append(refschema) # pyright: ignore
165
+
166
+ return schema
167
+
168
+
169
+ def _url(ref):
170
+ """Extract base URL from full reference."""
171
+ u = urlsplit(ref)
172
+ return u.scheme + "://" + u.netloc
173
+
174
+
175
+ def inlineRefs(schema: JsonSchema, url: str, schemas: Schemas) -> JsonSchema:
176
+ """Recursively inline $ref in schema, which is modified."""
177
+
178
+ def rwtRef(schema: JsonSchema, path: list[str]) -> JsonSchema:
179
+
180
+ # recursion avoidance (FIXME insufficient)
181
+ spath = "/".join(path)
182
+ skips = {url + "#" + spath, url + "#/" + spath, url + "#./" + spath}
183
+
184
+ while isinstance(schema, dict) and "$ref" in schema:
185
+ ref = schema["$ref"]
186
+ assert isinstance(ref, str)
187
+
188
+ # (direct) recursion detection
189
+ if ref in skips:
190
+ log.info(f"skipping recursive ref: {ref}")
191
+ break
192
+
193
+ # actual substitution
194
+ sub = schemas.schema(url, ref)
195
+ del schema["$ref"]
196
+ if isinstance(sub, dict):
197
+ schema = mergeSchemas(schema, sub)
198
+ else:
199
+ assert isinstance(sub, bool)
200
+ if not sub:
201
+ schema = False
202
+ # else True is coldly ignored
203
+
204
+ return schema
205
+
206
+ return recurseSchema(schema, url, rwt=rwtRef)
jsutils/recurse.py ADDED
@@ -0,0 +1,90 @@
1
+ from .utils import JsonSchema, log, FilterFun, RewriteFun
2
+
3
+
4
+ def _recurseSchema(
5
+ schema: JsonSchema,
6
+ url: str,
7
+ path: list[str],
8
+ flt: FilterFun,
9
+ rwt: RewriteFun) -> JsonSchema:
10
+
11
+ log.debug(f"recuring at {path}")
12
+
13
+ # skip recursion
14
+ if not flt(schema, path):
15
+ return schema
16
+
17
+ if isinstance(schema, bool):
18
+ return rwt(schema, path)
19
+ assert isinstance(schema, dict), f"schema must be a dict: {type(schema).__name__}"
20
+
21
+ # list of schemas
22
+ for prop in ("allOf", "oneOf", "anyOf", "prefixItems"):
23
+ if prop in schema:
24
+ subs = schema[prop]
25
+ assert isinstance(subs, list)
26
+ schema[prop] = [ _recurseSchema(s, url, path + [prop, str(i)], flt, rwt) # type: ignore
27
+ for i, s in enumerate(subs) ]
28
+
29
+ # direct schemas
30
+ for prop in ("additionalProperties", "unevaluatedProperties", "items",
31
+ "not", "if", "then", "else", "contains", "propertyNames",
32
+ "unevaluatedItems"):
33
+ if prop in schema:
34
+ # handle old items ~ prefixItems
35
+ if prop == "items" and isinstance(schema["items"], list):
36
+ schema[prop] = [ _recurseSchema(s, url, path + [prop, str(i)], flt, rwt)
37
+ for i, s in enumerate(schema[prop]) ] # type: ignore
38
+ else: # standard case
39
+ schema[prop] = _recurseSchema(schema[prop], # type: ignore
40
+ url, path + [prop], flt, rwt)
41
+
42
+ # handle values as schemas
43
+ def recValue(schema, *propnames):
44
+ for prop in propnames:
45
+ if prop in schema:
46
+ props = schema[prop]
47
+ assert isinstance(props, dict)
48
+ for p, s in list(props.items()):
49
+ props[p] = _recurseSchema(s, url, path + [prop, p], flt, rwt)
50
+
51
+ recValue(schema, "properties", "dependentSchemas", "patternProperties")
52
+
53
+ # apply rwt
54
+ schema = rwt(schema, path)
55
+ assert isinstance(schema, (bool, dict))
56
+
57
+ # keep last?!
58
+ if isinstance(schema, dict):
59
+ recValue(schema, "$defs", "definitions")
60
+
61
+ return schema
62
+
63
+
64
+ def recurseSchema(schema: JsonSchema, url: str,
65
+ flt: FilterFun = lambda s, p: True,
66
+ rwt: RewriteFun = lambda s, p: s) -> JsonSchema:
67
+ """Generic recursion on a JSON Schema.
68
+
69
+ :param schema: schema to consider.
70
+ :param url: url of schema.
71
+ :param flt: filter (top-down) function, whether to keep recursing.
72
+ :param rwt: rewrite (bottom-up) function.
73
+ """
74
+ return _recurseSchema(schema, url, [], flt, rwt)
75
+
76
+
77
+ def hasDirectRef(schema, url):
78
+ """Tell whether schema has a $ref."""
79
+
80
+ some_ref: bool = False
81
+
82
+ def fltHasRef(schema: JsonSchema, path: list[str]) -> bool:
83
+ nonlocal some_ref
84
+ if "$defs" not in path and isinstance(schema, dict) and "$ref" in schema:
85
+ some_ref = True
86
+ return not some_ref
87
+
88
+ recurseSchema(schema, url, flt=fltHasRef)
89
+
90
+ return some_ref
jsutils/schemas.py ADDED
@@ -0,0 +1,151 @@
1
+ from typing import Callable
2
+ import os.path
3
+ from urllib.parse import urlsplit
4
+ import json
5
+ import requests
6
+ from .utils import JsonSchema, JSUError, log
7
+ from .recurse import recurseSchema
8
+
9
+ ProcessFun = Callable[[JsonSchema, str], JsonSchema]
10
+
11
+
12
+ def _full_url(url: str, ref: str) -> str:
13
+ """Build full normalized URL from a reference.
14
+
15
+ :param url: the url of the schema.
16
+ :param ref: the reference (relative) url.
17
+ """
18
+ if "#" in ref:
19
+ lurl, lpath = ref.split("#", 1)
20
+ else:
21
+ lurl, lpath = ref, ""
22
+ # build the full url
23
+ if lurl == "":
24
+ return url + "#" + lpath
25
+ elif lurl.startswith("/"):
26
+ u = urlsplit(url)
27
+ return u.scheme + "://" + u.netloc + lurl + "#" + lpath
28
+ else:
29
+ return lurl + "#" + lpath
30
+
31
+
32
+ def _fullURL(schema: JsonSchema, url: str) -> JsonSchema:
33
+ """Replace relative references with absolute references.
34
+
35
+ :param schema: schema to consider.
36
+ :param url: the url of the schema.
37
+ """
38
+
39
+ # we need full references to avoid ambiguities on inline!
40
+ def fullref(schema: JsonSchema, _path: list[str]) -> JsonSchema:
41
+ if isinstance(schema, dict) and "$ref" in schema:
42
+ ref = schema["$ref"]
43
+ assert isinstance(ref, str)
44
+ nref = _full_url(url, ref)
45
+ log.debug(f"updating {ref} in {url}: {nref}")
46
+ schema["$ref"] = nref
47
+ return schema
48
+
49
+ return recurseSchema(schema, url, rwt=fullref)
50
+
51
+
52
+ class Schemas:
53
+ """Hold a set of identified schemas and sub-schemas."""
54
+
55
+ def __init__(self):
56
+ # url mapping for local storage: https://schema.psl.eu -> .
57
+ self._urlmap: dict[str, str] = {}
58
+ # cache of schema references
59
+ self._schemas: dict[str, JsonSchema] = {}
60
+ # additional processing on store
61
+ self._process: list[ProcessFun] = [_fullURL]
62
+
63
+ def addProcess(self, process: ProcessFun):
64
+ self._process.append(process)
65
+
66
+ def addMap(self, url: str, target: str):
67
+ """Add local mapping for URLs."""
68
+ self._urlmap[url] = target
69
+
70
+ def store(self, url: str, schema: JsonSchema):
71
+ """Store schema associated to URL."""
72
+ log.info(f"adding schema {url}")
73
+
74
+ if isinstance(schema, dict):
75
+ for prop in ("$id", "id"):
76
+ if prop in schema:
77
+ if url != schema[prop]:
78
+ log.warning(f"{prop}={schema[prop]} / {url}")
79
+ # del schema["$id"] # FIXME?
80
+
81
+ # NOTE intermediate to avoid an infinite recursion
82
+ self._schemas[url] = schema
83
+
84
+ # process schema through all filters
85
+ for process in self._process:
86
+ schema = process(schema, url)
87
+
88
+ # store final processed version
89
+ self._schemas[url] = schema
90
+
91
+ def _load(self, url: str):
92
+ """Load schema from URL if needed."""
93
+ log.info(f"loading schema: {url}")
94
+
95
+ assert "#" not in url
96
+
97
+ # rewrite url for local search
98
+ path = url
99
+ for u, t in self._urlmap.items():
100
+ if path.startswith(u):
101
+ path = t + path[len(u):]
102
+ break
103
+
104
+ # FIXME what about actual http download?
105
+ schema: JsonSchema|None
106
+ if path.startswith("http://") or path.startswith("https://"):
107
+ schema = requests.get(path).json # type: ignore
108
+ else:
109
+ schema = None
110
+ for suffix in ("", ".json", ".schema.json"):
111
+ fn = f"{path}{suffix}"
112
+ if os.path.isfile(fn):
113
+ log.debug(f"loading file: {fn}")
114
+ schema = json.load(open(fn))
115
+ break
116
+
117
+ if schema is None:
118
+ raise JSUError(f"schema {url} not found")
119
+
120
+ self.store(url, schema)
121
+
122
+ def _resolve(self, schema: JsonSchema, lpath: str) -> JsonSchema:
123
+ """Extract sub-schema from schema following path.
124
+
125
+ :param schema: schema to consider.
126
+ :param path: path to consider.
127
+
128
+ FIXME this does not handle URL escaping?
129
+ """
130
+ for p in lpath.split("/"):
131
+ if p in (".", ""):
132
+ pass
133
+ else:
134
+ assert isinstance(schema, dict) and p in schema
135
+ schema = schema[p] # type: ignore
136
+
137
+ return schema
138
+
139
+ def schema(self, url: str, ref: str) -> JsonSchema:
140
+ """Resolve schema reference.
141
+
142
+ :param url: url of schema.
143
+ :param ref: reference to resolve in url.
144
+ """
145
+ assert isinstance(ref, str) and len(ref) > 0
146
+ fref = _full_url(url, ref)
147
+ assert "#" in fref
148
+ curl, path = fref.split("#", 1)
149
+ if curl not in self._schemas:
150
+ self._load(curl)
151
+ return self._resolve(self._schemas[curl], path)