json-schema-utils 0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- json_schema_utils-0.8.dist-info/METADATA +119 -0
- json_schema_utils-0.8.dist-info/RECORD +15 -0
- json_schema_utils-0.8.dist-info/WHEEL +5 -0
- json_schema_utils-0.8.dist-info/entry_points.txt +7 -0
- json_schema_utils-0.8.dist-info/licenses/LICENSE +1 -0
- json_schema_utils-0.8.dist-info/top_level.txt +1 -0
- jsutils/__init__.py +5 -0
- jsutils/convert.py +934 -0
- jsutils/inline.py +206 -0
- jsutils/recurse.py +90 -0
- jsutils/schemas.py +151 -0
- jsutils/scripts.py +396 -0
- jsutils/simplify.py +580 -0
- jsutils/stats.py +1310 -0
- jsutils/utils.py +44 -0
jsutils/simplify.py
ADDED
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
# TODO
|
|
2
|
+
# oneOf [ { "enum": [] }, { "const": } ]
|
|
3
|
+
# import urllib
|
|
4
|
+
from typing import Any
|
|
5
|
+
import copy
|
|
6
|
+
from .utils import JsonSchema, log, JSUError, only
|
|
7
|
+
from .recurse import recurseSchema
|
|
8
|
+
from .inline import mergeProperty
|
|
9
|
+
|
|
10
|
+
# type-specific properties
|
|
11
|
+
# TODO complete
|
|
12
|
+
TYPED_PROPS: dict[str, set[str]] = {
|
|
13
|
+
# format: not in theory, quite often in practice
|
|
14
|
+
"string": {"minLength", "maxLength", "pattern"},
|
|
15
|
+
"number": {"minimum", "exclusiveMinimum", "maximum", "exclusiveMaximum", "multipleOf"},
|
|
16
|
+
"object": {"additionalProperties", "unevaluatedProperties", "propertyNames", "required",
|
|
17
|
+
"properties", "minProperties", "maxProperties", "patternProperties"},
|
|
18
|
+
"array": {"items", "minItems", "maxItems", "prefixItems", "contains", "minContains",
|
|
19
|
+
"maxContains", "unevaluatedItems", "additionalItems"},
|
|
20
|
+
"boolean": set(),
|
|
21
|
+
"null": set()
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def incompatibleProps(st: str):
|
|
26
|
+
props = set()
|
|
27
|
+
[ props := props.union(p) for t, p in TYPED_PROPS.items() if t != st ]
|
|
28
|
+
return props
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# string-specific predefined formats
|
|
32
|
+
# NOTE some extensions use other formats, eg for "int32" for numbers
|
|
33
|
+
STRING_FORMATS: set[str] = {
|
|
34
|
+
"date", "date-time", "time", "duration",
|
|
35
|
+
"email", "idn-email",
|
|
36
|
+
"hostname", "idn-hostname", "ipv4", "ipv6",
|
|
37
|
+
"uri", "uri-reference", "uri-template",
|
|
38
|
+
"iri", "iri-reference",
|
|
39
|
+
"uuid",
|
|
40
|
+
"json-pointer", "relative-json-pointer",
|
|
41
|
+
"regex",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def counts(lv: list[Any]) -> dict[Any, int]:
|
|
46
|
+
"""Count values in list. Probably exists elsewhere."""
|
|
47
|
+
cnt = {}
|
|
48
|
+
for v in lv:
|
|
49
|
+
cnt[v] = (cnt[v] + 1) if v in cnt else 1
|
|
50
|
+
return cnt
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def getEnum(ls: list[JsonSchema], is_one: bool) -> list[Any]|None:
|
|
54
|
+
"""Attempt to extract a list of constants."""
|
|
55
|
+
assert isinstance(ls, list)
|
|
56
|
+
lv = []
|
|
57
|
+
for s in ls:
|
|
58
|
+
if isinstance(s, dict):
|
|
59
|
+
if "const" in s:
|
|
60
|
+
lv.append(s["const"])
|
|
61
|
+
elif "enum" in s:
|
|
62
|
+
assert isinstance(s["enum"], list)
|
|
63
|
+
lv.extend(dict.fromkeys(s["enum"]))
|
|
64
|
+
else:
|
|
65
|
+
return None
|
|
66
|
+
else:
|
|
67
|
+
return None
|
|
68
|
+
cnt = counts(lv)
|
|
69
|
+
if is_one:
|
|
70
|
+
# fully remove duplicates
|
|
71
|
+
lv = list(filter(lambda i: cnt[i] == 1, lv))
|
|
72
|
+
else:
|
|
73
|
+
# only remove duplicates
|
|
74
|
+
lv = list(dict.fromkeys(lv))
|
|
75
|
+
return lv
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _typeCompat(t: str, v: Any) -> bool:
|
|
79
|
+
"""Check JSON type / value compatibility."""
|
|
80
|
+
return ((t == "null" and v is None) or
|
|
81
|
+
(t == "boolean" and isinstance(v, bool)) or
|
|
82
|
+
(t == "number" and isinstance(v, (int, float))) or
|
|
83
|
+
(t == "string" and isinstance(v, str)) or
|
|
84
|
+
(t == "array" and isinstance(v, (list, tuple))) or
|
|
85
|
+
(t == "object" and isinstance(v, dict)))
|
|
86
|
+
|
|
87
|
+
_IGNORABLE = (
|
|
88
|
+
# core
|
|
89
|
+
"$schema", "$id", "$comment", "$vocabulary", "$anchor", "$dynamicAnchor",
|
|
90
|
+
# metadata
|
|
91
|
+
"description", "title", "readOnly", "writeOnly", "default", "examples", "deprecated",
|
|
92
|
+
# namespace
|
|
93
|
+
"definitions", "$defs",
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def _ignored(schema: JsonSchema) -> JsonSchema:
|
|
97
|
+
"""Remove preperties dans do not need to be considered."""
|
|
98
|
+
if isinstance(schema, bool):
|
|
99
|
+
return schema
|
|
100
|
+
schema = copy.deepcopy(schema)
|
|
101
|
+
for keyword in _IGNORABLE:
|
|
102
|
+
if keyword in schema:
|
|
103
|
+
del schema[keyword]
|
|
104
|
+
return schema
|
|
105
|
+
|
|
106
|
+
def same(s1: JsonSchema, s2: JsonSchema) -> bool:
|
|
107
|
+
return _ignored(s1) == _ignored(s2)
|
|
108
|
+
|
|
109
|
+
def simplifySchema(schema: JsonSchema, url: str):
|
|
110
|
+
"""Simplify a JSON Schema with various rules."""
|
|
111
|
+
|
|
112
|
+
# schema version for $ref aggressive pruning
|
|
113
|
+
version: int
|
|
114
|
+
if isinstance(schema, dict) and "$schema" in schema and isinstance(schema["$schema"], str):
|
|
115
|
+
ds = schema["$schema"]
|
|
116
|
+
version = \
|
|
117
|
+
9 if "2020-12" in ds else \
|
|
118
|
+
8 if "2019-09" in ds else \
|
|
119
|
+
7 if "draft-07" in ds else \
|
|
120
|
+
6 if "draft-06" in ds else \
|
|
121
|
+
4 if "draft-04" in ds else \
|
|
122
|
+
3 if "draft-03" in ds else \
|
|
123
|
+
2 if "draft-02" in ds else \
|
|
124
|
+
1 if "draft-01" in ds else \
|
|
125
|
+
9
|
|
126
|
+
else:
|
|
127
|
+
version = 9 # 2020-12
|
|
128
|
+
|
|
129
|
+
# TODO more generic dynamicAnchor removal
|
|
130
|
+
# TODO anchor removal?
|
|
131
|
+
# FIXME check that there is only one dynamicAnchor of this name?!
|
|
132
|
+
dynroot: str|None = None
|
|
133
|
+
if isinstance(schema, dict) and "$dynamicAnchor" in schema:
|
|
134
|
+
dynroot = schema["$dynamicAnchor"]
|
|
135
|
+
del schema["$dynamicAnchor"]
|
|
136
|
+
|
|
137
|
+
def rwtSimpler(schema: JsonSchema, path: list[str]) -> JsonSchema:
|
|
138
|
+
|
|
139
|
+
lpath = ".".join(path) if path else "."
|
|
140
|
+
|
|
141
|
+
if isinstance(schema, bool):
|
|
142
|
+
return schema
|
|
143
|
+
assert isinstance(schema, dict)
|
|
144
|
+
|
|
145
|
+
# references
|
|
146
|
+
if "$ref" in schema and version <= 7:
|
|
147
|
+
# https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3
|
|
148
|
+
keep = { p: v for p, v in schema.items() if p in _IGNORABLE or p == "$ref" }
|
|
149
|
+
if len(keep) != len(schema):
|
|
150
|
+
log.warning(f"dropping all props adjacent to $ref on old schemas at {path}")
|
|
151
|
+
return keep
|
|
152
|
+
|
|
153
|
+
if isinstance(dynroot, str):
|
|
154
|
+
if path and "$dynamicAnchor" in schema and schema["$dynamicAnchor"] == dynroot:
|
|
155
|
+
log.error(f"Ooops: multiple root dynamic anchor: {dynroot}")
|
|
156
|
+
raise Exception("FIXME!")
|
|
157
|
+
|
|
158
|
+
if "$dynamicRef" in schema:
|
|
159
|
+
dref = schema["$dynamicRef"]
|
|
160
|
+
if dref == "#" + dynroot:
|
|
161
|
+
log.info(f"replacing root $dynamicAnchor with simple $ref at {path}")
|
|
162
|
+
del schema["$dynamicRef"]
|
|
163
|
+
schema["$ref"] = "#"
|
|
164
|
+
|
|
165
|
+
# TODO anyOf/oneOf/allOf of length 0?
|
|
166
|
+
# anyOf/oneOf/allOf of length 1
|
|
167
|
+
for prop in ("anyOf", "oneOf", "allOf"):
|
|
168
|
+
if (isinstance(schema, dict) and prop in schema and
|
|
169
|
+
len(schema[prop]) == 1): # type: ignore
|
|
170
|
+
try:
|
|
171
|
+
nschema = copy.deepcopy(schema)
|
|
172
|
+
sub = schema[prop][0] # pyright: ignore
|
|
173
|
+
for p, v in sub.items(): # pyright: ignore
|
|
174
|
+
nschema = mergeProperty(nschema, p, v)
|
|
175
|
+
# success!
|
|
176
|
+
schema = nschema
|
|
177
|
+
if isinstance(schema, dict):
|
|
178
|
+
del schema[prop]
|
|
179
|
+
except JSUError as e:
|
|
180
|
+
log.debug(e)
|
|
181
|
+
log.warning(f"{prop} of one merge failed")
|
|
182
|
+
|
|
183
|
+
if isinstance(schema, bool):
|
|
184
|
+
return schema
|
|
185
|
+
assert isinstance(schema, dict)
|
|
186
|
+
|
|
187
|
+
# TODO detect inconsistent allOf?
|
|
188
|
+
|
|
189
|
+
# switch oneOf/anyOf const/enum to enum/const
|
|
190
|
+
for prop in ("oneOf", "anyOf"):
|
|
191
|
+
if prop in schema:
|
|
192
|
+
val = schema[prop]
|
|
193
|
+
assert isinstance(val, list)
|
|
194
|
+
lv = getEnum(val, prop == "oneOf") # pyright: ignore
|
|
195
|
+
if lv is not None:
|
|
196
|
+
del schema[prop]
|
|
197
|
+
log.info(f"{prop} to enum/const/false at {lpath}")
|
|
198
|
+
if len(lv) == 0:
|
|
199
|
+
# FIXME check
|
|
200
|
+
return False
|
|
201
|
+
else: # at least one
|
|
202
|
+
if "enum" in schema:
|
|
203
|
+
lev = schema["enum"]
|
|
204
|
+
del schema["enum"]
|
|
205
|
+
assert isinstance(lev, list)
|
|
206
|
+
# intersect in initial order
|
|
207
|
+
nlv = []
|
|
208
|
+
for v in lev:
|
|
209
|
+
if v in lv:
|
|
210
|
+
nlv.append(v)
|
|
211
|
+
schema["enum"] = nlv
|
|
212
|
+
else:
|
|
213
|
+
schema["enum"] = lv
|
|
214
|
+
|
|
215
|
+
# void condition application
|
|
216
|
+
for kw in ("then", "else"):
|
|
217
|
+
if kw in schema:
|
|
218
|
+
subs = schema[kw]
|
|
219
|
+
compat = True
|
|
220
|
+
for k, v in subs.items():
|
|
221
|
+
if k in _IGNORABLE:
|
|
222
|
+
pass
|
|
223
|
+
elif k in schema and v == schema[k]:
|
|
224
|
+
pass
|
|
225
|
+
elif k in schema:
|
|
226
|
+
# special case, check for inclusion
|
|
227
|
+
if k == "required":
|
|
228
|
+
assert isinstance(v, list) # and str
|
|
229
|
+
for n in v:
|
|
230
|
+
if n not in schema["required"]:
|
|
231
|
+
compat = False
|
|
232
|
+
else:
|
|
233
|
+
compat = False
|
|
234
|
+
if compat:
|
|
235
|
+
log.info(f"removing ineffective {kw}")
|
|
236
|
+
del schema[kw]
|
|
237
|
+
|
|
238
|
+
# if/then/else
|
|
239
|
+
if "if" not in schema:
|
|
240
|
+
for kw in ("then", "else"):
|
|
241
|
+
if kw in schema:
|
|
242
|
+
log.info(f"removing {kw} without if")
|
|
243
|
+
del schema[kw]
|
|
244
|
+
if "if" in schema and not ("then" in schema or "else" in schema):
|
|
245
|
+
log.info(f"removing lone if at {path}")
|
|
246
|
+
del schema["if"]
|
|
247
|
+
|
|
248
|
+
# simplify condition if possible
|
|
249
|
+
if "if" in schema:
|
|
250
|
+
cond = schema["if"]
|
|
251
|
+
if "not" in cond and only(cond, "not", *_IGNORABLE):
|
|
252
|
+
log.info("simplifying if not")
|
|
253
|
+
schema["if"] = cond["not"]
|
|
254
|
+
sthen = schema.get("then", None)
|
|
255
|
+
selse = schema.get("else", None)
|
|
256
|
+
if sthen is not None:
|
|
257
|
+
schema["else"] = sthen
|
|
258
|
+
if selse is not None:
|
|
259
|
+
schema["then"] = selse
|
|
260
|
+
else:
|
|
261
|
+
del schema["then"]
|
|
262
|
+
else:
|
|
263
|
+
assert selse is not None
|
|
264
|
+
schema["then"] = selse
|
|
265
|
+
del schema["else"]
|
|
266
|
+
|
|
267
|
+
# short type list
|
|
268
|
+
if "type" in schema and isinstance(schema["type"], list):
|
|
269
|
+
types = schema["type"]
|
|
270
|
+
if len(types) == 0:
|
|
271
|
+
return False
|
|
272
|
+
elif len(types) == 1:
|
|
273
|
+
schema["type"] = types[0]
|
|
274
|
+
# type/props…
|
|
275
|
+
if "type" in schema and isinstance(schema["type"], str):
|
|
276
|
+
stype = schema["type"]
|
|
277
|
+
if stype == "number":
|
|
278
|
+
if "multipleOf" in schema and schema["multipleOf"] == 1:
|
|
279
|
+
schema["type"] = "integer"
|
|
280
|
+
del schema["multipleOf"]
|
|
281
|
+
if stype == "integer":
|
|
282
|
+
if "multipleOf" in schema and schema["multipleOf"] == 1:
|
|
283
|
+
del schema["multipleOf"]
|
|
284
|
+
# use this for later type-related checks
|
|
285
|
+
stype = "number"
|
|
286
|
+
# remove type-specific properties
|
|
287
|
+
if stype in TYPED_PROPS:
|
|
288
|
+
for p in incompatibleProps(stype):
|
|
289
|
+
if p in schema:
|
|
290
|
+
log.info(f"unused property {p} for {stype} at {lpath}")
|
|
291
|
+
del schema[p]
|
|
292
|
+
if stype != "string" and "format" in schema and schema["format"] in STRING_FORMATS:
|
|
293
|
+
log.info(f"unused string format on {stype}: {schema['format']}")
|
|
294
|
+
del schema["format"]
|
|
295
|
+
# type/const
|
|
296
|
+
if "const" in schema:
|
|
297
|
+
cst = schema["const"]
|
|
298
|
+
if _typeCompat(stype, cst):
|
|
299
|
+
log.info(f"removing redundant type with const at {lpath}")
|
|
300
|
+
del schema["type"]
|
|
301
|
+
else:
|
|
302
|
+
log.info(f"incompatible type {stype} for {cst} at {lpath}")
|
|
303
|
+
return False
|
|
304
|
+
# type/enum
|
|
305
|
+
if "enum" in schema:
|
|
306
|
+
vals = schema["enum"]
|
|
307
|
+
assert isinstance(vals, list)
|
|
308
|
+
nvals = list(filter(lambda v: _typeCompat(stype, v), vals))
|
|
309
|
+
if len(vals) != len(nvals):
|
|
310
|
+
log.info(f"removing {len(vals) - len(nvals)} incompatible values "
|
|
311
|
+
f"from enum at {lpath}")
|
|
312
|
+
schema["enum"] = nvals
|
|
313
|
+
del schema["type"]
|
|
314
|
+
# simplify any array
|
|
315
|
+
if stype == "array":
|
|
316
|
+
simpler = _ignored(schema)
|
|
317
|
+
assert isinstance(simpler, dict) # pyright hint
|
|
318
|
+
if len(simpler) == 2 and "type" in schema:
|
|
319
|
+
# lone keyword
|
|
320
|
+
for kw in ("items", "additionalItems", "unevaluatedItems"):
|
|
321
|
+
if kw in schema:
|
|
322
|
+
subschema = _ignored(schema[kw]) # pyright: ignore
|
|
323
|
+
if subschema in (True, {}):
|
|
324
|
+
log.info(f"removing useless {kw} keyword at {lpath}")
|
|
325
|
+
del schema[kw]
|
|
326
|
+
# simplify any object
|
|
327
|
+
if stype == "object":
|
|
328
|
+
simpler = _ignored(schema)
|
|
329
|
+
assert isinstance(simpler, dict) # pyright hint
|
|
330
|
+
if len(simpler) == 2 and "type" in schema:
|
|
331
|
+
# lone keyword
|
|
332
|
+
for kw in ("additionalProperties", "unevaluatedProperties"):
|
|
333
|
+
if kw in schema:
|
|
334
|
+
subschema = _ignored(schema[kw]) # pyright: ignore
|
|
335
|
+
if subschema in (True, {}):
|
|
336
|
+
log.info(f"removing useless {kw} keyword at {lpath}")
|
|
337
|
+
del schema[kw]
|
|
338
|
+
|
|
339
|
+
# simplify propertyNames + additionalProperties to patternProperties
|
|
340
|
+
if "propertyNames" in schema and "additionalProperties" in schema and \
|
|
341
|
+
"properties" not in schema and "patternProperties" not in schema:
|
|
342
|
+
pn = schema["propertyNames"]
|
|
343
|
+
ap = schema["additionalProperties"]
|
|
344
|
+
if "pattern" in pn and only(pn, "pattern", "type", *_IGNORABLE):
|
|
345
|
+
log.info(f"switching propertyNames and additionalProperties to patternProperties at {lpath}")
|
|
346
|
+
del schema["propertyNames"]
|
|
347
|
+
del schema["additionalProperties"]
|
|
348
|
+
schema["patternProperties"] = { pn["pattern"]: ap }
|
|
349
|
+
|
|
350
|
+
# const/enum
|
|
351
|
+
if "const" in schema and "enum" in schema:
|
|
352
|
+
log.info(f"const/enum at {lpath}")
|
|
353
|
+
assert isinstance(schema["enum"], list)
|
|
354
|
+
if schema["const"] in schema["enum"]:
|
|
355
|
+
del schema["enum"]
|
|
356
|
+
else:
|
|
357
|
+
return False
|
|
358
|
+
elif "enum" in schema:
|
|
359
|
+
assert isinstance(schema["enum"], list)
|
|
360
|
+
nenum = len(schema["enum"])
|
|
361
|
+
if nenum == 0:
|
|
362
|
+
log.info(f"empty enum at {lpath}")
|
|
363
|
+
return False
|
|
364
|
+
elif nenum == 1:
|
|
365
|
+
log.info(f"enum of one at {lpath}")
|
|
366
|
+
schema["const"] = schema["enum"][0]
|
|
367
|
+
del schema["enum"]
|
|
368
|
+
|
|
369
|
+
return schema
|
|
370
|
+
|
|
371
|
+
return recurseSchema(schema, url, rwt=rwtSimpler)
|
|
372
|
+
|
|
373
|
+
#
|
|
374
|
+
# move definitions at the root and resolve ids
|
|
375
|
+
#
|
|
376
|
+
from urllib.parse import quote, unquote
|
|
377
|
+
|
|
378
|
+
def _defId(schema) -> tuple[str|None, str|None]:
|
|
379
|
+
"""return name of definitions and id properties."""
|
|
380
|
+
if not isinstance(schema, dict):
|
|
381
|
+
return (None, None)
|
|
382
|
+
defn = "$defs" if "$defs" in schema else \
|
|
383
|
+
"definitions" if "definitions" in schema else \
|
|
384
|
+
None
|
|
385
|
+
idn = "$id" if "$id" in schema else \
|
|
386
|
+
"id" if "id" in schema else \
|
|
387
|
+
None
|
|
388
|
+
return (defn, idn)
|
|
389
|
+
|
|
390
|
+
_SUBCOUNT: int = 0
|
|
391
|
+
|
|
392
|
+
# TODO handle arbitrary path references
|
|
393
|
+
|
|
394
|
+
def _scopeSubDefs(schema: JsonSchema, defs: dict[str, JsonSchema], rootdef: str,
|
|
395
|
+
moved: dict[str, str], ids: dict[str, str], delete: list[tuple[Any, str]],
|
|
396
|
+
path: list[str|int] = []):
|
|
397
|
+
|
|
398
|
+
log.debug(f"handing $ids/$defs at {path}")
|
|
399
|
+
|
|
400
|
+
global _SUBCOUNT
|
|
401
|
+
defn, idn = _defId(schema)
|
|
402
|
+
|
|
403
|
+
if defn is None:
|
|
404
|
+
return
|
|
405
|
+
|
|
406
|
+
if path and defn and not idn:
|
|
407
|
+
# nested definitions, move them up
|
|
408
|
+
|
|
409
|
+
prefix = f"_defs_{_SUBCOUNT}_"
|
|
410
|
+
_SUBCOUNT += 1
|
|
411
|
+
|
|
412
|
+
for name, sschema in schema[defn].items():
|
|
413
|
+
# FIXME name may be quite ugly… eg a full URL
|
|
414
|
+
if "/" not in name: # reuse name if simple
|
|
415
|
+
new_name = prefix + name
|
|
416
|
+
old_name = name
|
|
417
|
+
else:
|
|
418
|
+
new_name = f"_dsub_{_SUBCOUNT}_"
|
|
419
|
+
_SUBCOUNT += 1
|
|
420
|
+
old_name = quote(name).replace("~", "~0").replace("/", "~1")
|
|
421
|
+
npath = rootdef + "/" + new_name
|
|
422
|
+
opath = f"#/{'/'.join(path)}/{defn}/{old_name}"
|
|
423
|
+
sschema["$comment"] = f"origin: {opath}"
|
|
424
|
+
moved[opath] = npath
|
|
425
|
+
defs[new_name] = sschema
|
|
426
|
+
|
|
427
|
+
schema["$comment"] = f"{defn} {_SUBCOUNT} moved"
|
|
428
|
+
|
|
429
|
+
delete.append((schema, defn, None, None, None))
|
|
430
|
+
|
|
431
|
+
elif path and defn and idn:
|
|
432
|
+
# if we have a nested id, we move definitions to defs and rewrite local refs
|
|
433
|
+
|
|
434
|
+
sid = schema[idn]
|
|
435
|
+
assert isinstance(sid, str)
|
|
436
|
+
|
|
437
|
+
del schema[idn]
|
|
438
|
+
if "id" in schema: # WTF: both $id and id…
|
|
439
|
+
del schema["id"]
|
|
440
|
+
|
|
441
|
+
# keep track of changes
|
|
442
|
+
schema["$comment"] = f"{idn} {_SUBCOUNT}: {sid}"
|
|
443
|
+
|
|
444
|
+
prefix = f"_id_{_SUBCOUNT}_"
|
|
445
|
+
_SUBCOUNT += 1
|
|
446
|
+
|
|
447
|
+
# to remap long references later
|
|
448
|
+
# we have a local path for an external url
|
|
449
|
+
ids[sid] = rootdef + "/" + prefix
|
|
450
|
+
iddefs = f"#/{defn}/"
|
|
451
|
+
# id's defs with be there
|
|
452
|
+
moved[sid + iddefs] = rootdef + "/" + prefix
|
|
453
|
+
# "#/" + "/".join(p if "/" not in p and "%" not in p else
|
|
454
|
+
# quote(p).replace("~", "~0").replace("/", "~1")
|
|
455
|
+
# for p in path)
|
|
456
|
+
|
|
457
|
+
# remap all sub-schema local references
|
|
458
|
+
def rwtRef(schema, lpath):
|
|
459
|
+
if isinstance(schema, dict) and "$ref" in schema:
|
|
460
|
+
dest = schema["$ref"]
|
|
461
|
+
assert isinstance(dest, str)
|
|
462
|
+
if dest.startswith(iddefs): # local ref
|
|
463
|
+
schema["$ref"] = rootdef + "/" + prefix + dest[len(iddefs):]
|
|
464
|
+
elif dest in ("#", "#/"): # myself, will have to be made consistent later!
|
|
465
|
+
schema["$ref"] = ids[sid]
|
|
466
|
+
return schema
|
|
467
|
+
|
|
468
|
+
recurseSchema(schema, "", rwt=rwtRef)
|
|
469
|
+
|
|
470
|
+
# move local definitions as global
|
|
471
|
+
for name, sschem in schema[defn].items():
|
|
472
|
+
pname = prefix + name
|
|
473
|
+
assert pname not in defs
|
|
474
|
+
defs[pname] = sschem
|
|
475
|
+
|
|
476
|
+
# we need to keep the schema in place for handling arbitrary url
|
|
477
|
+
# whole object will be moved later
|
|
478
|
+
delete.append((schema, defn, prefix, ids[sid], sid))
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def scopeDefs(schema: JsonSchema):
|
|
482
|
+
"""Move internal definitions/$defs to root schema, possibly handing nested $id"""
|
|
483
|
+
|
|
484
|
+
# collect $id/id and $defs/definitions
|
|
485
|
+
todo_ids, todo_defs = [], []
|
|
486
|
+
|
|
487
|
+
def fltDefs(schema, path):
|
|
488
|
+
if path and isinstance(schema, dict):
|
|
489
|
+
defn, idn = _defId(schema)
|
|
490
|
+
if idn is not None:
|
|
491
|
+
todo_ids.append((schema, path))
|
|
492
|
+
elif defn is not None:
|
|
493
|
+
todo_defs.append((schema, path))
|
|
494
|
+
return True
|
|
495
|
+
|
|
496
|
+
recurseSchema(schema, "", flt=fltDefs)
|
|
497
|
+
|
|
498
|
+
if not todo_ids and not todo_defs:
|
|
499
|
+
return
|
|
500
|
+
|
|
501
|
+
# ensure definitions root
|
|
502
|
+
defn, idn = _defId(schema)
|
|
503
|
+
|
|
504
|
+
if defn is None:
|
|
505
|
+
defn = "$defs"
|
|
506
|
+
schema[defn] = {}
|
|
507
|
+
|
|
508
|
+
# do internal renamings
|
|
509
|
+
rootdef, moved, ids, delete = f"#/{defn}", {}, {}, []
|
|
510
|
+
|
|
511
|
+
for s, p in todo_ids:
|
|
512
|
+
_scopeSubDefs(s, schema[defn], rootdef, moved, ids, delete, p)
|
|
513
|
+
|
|
514
|
+
for s, p in todo_defs:
|
|
515
|
+
_scopeSubDefs(s, schema[defn], rootdef, moved, ids, delete, p)
|
|
516
|
+
|
|
517
|
+
# move arbitrary references
|
|
518
|
+
def mvRef(rschema, path):
|
|
519
|
+
if isinstance(rschema, dict) and "$ref" in rschema:
|
|
520
|
+
dest = rschema["$ref"]
|
|
521
|
+
# log.debug(f"found {dest} at {path}")
|
|
522
|
+
if dest.startswith("#/") and dest not in moved:
|
|
523
|
+
dpath = dest[2:].split("/")
|
|
524
|
+
if len(dpath) != 2 or dpath[0] != defn:
|
|
525
|
+
# not a simple name, follow path
|
|
526
|
+
jdest = schema
|
|
527
|
+
for segment in dpath:
|
|
528
|
+
if isinstance(jdest, dict):
|
|
529
|
+
# hmmm
|
|
530
|
+
if segment in jdest:
|
|
531
|
+
jdest = jdest[segment]
|
|
532
|
+
elif "~" in segment or "%" in segment:
|
|
533
|
+
segment = unquote(segment).replace("~1", "/").replace("~0", "~")
|
|
534
|
+
jdest = jdest[segment]
|
|
535
|
+
elif isinstance(jdest, list):
|
|
536
|
+
jdest = jdest[int(segment)] # TODO proper exception
|
|
537
|
+
else:
|
|
538
|
+
raise Exception(f"cannot follow path {dpath} at {segment}")
|
|
539
|
+
global _SUBCOUNT
|
|
540
|
+
name = f"_psub_{_SUBCOUNT}_"
|
|
541
|
+
_SUBCOUNT += 1
|
|
542
|
+
ndest = f"#/{defn}/{name}"
|
|
543
|
+
# log.info(f"moving {dest} to {ndest}")
|
|
544
|
+
schema[defn][name] = copy.deepcopy(jdest)
|
|
545
|
+
rschema["$ref"] = ndest
|
|
546
|
+
moved[dest] = ndest # for other identical references
|
|
547
|
+
# TODO also rename ugly references?
|
|
548
|
+
return rschema
|
|
549
|
+
|
|
550
|
+
recurseSchema(schema, "", rwt=mvRef)
|
|
551
|
+
|
|
552
|
+
# do full url renamings and other references renamings
|
|
553
|
+
def rwtGref(schema, path):
|
|
554
|
+
if isinstance(schema, dict) and "$ref" in schema:
|
|
555
|
+
dest = schema["$ref"]
|
|
556
|
+
assert isinstance(dest, str), f"str $ref at {path}"
|
|
557
|
+
if dest in moved:
|
|
558
|
+
schema["$ref"] = moved[dest]
|
|
559
|
+
elif dest and dest[0] != "#":
|
|
560
|
+
# inefficient
|
|
561
|
+
for old, new in moved.items():
|
|
562
|
+
if dest.startswith(old):
|
|
563
|
+
# log.warning(f"dest={dest} old={old} new={new}")
|
|
564
|
+
schema["$ref"] = new + dest[len(old):]
|
|
565
|
+
if dest in ids:
|
|
566
|
+
log.warning(f"rewriting raw url: {dest} as {ids[dest]}")
|
|
567
|
+
schema["$ref"] = ids[dest]
|
|
568
|
+
return schema
|
|
569
|
+
|
|
570
|
+
recurseSchema(schema, "", rwt=rwtGref)
|
|
571
|
+
|
|
572
|
+
# cleanup internal definitions
|
|
573
|
+
for j, n, prefix, dest, sid in delete:
|
|
574
|
+
del j[n]
|
|
575
|
+
if prefix is not None:
|
|
576
|
+
# move whole id-ed object as global as well, replaced with a ref
|
|
577
|
+
schema[defn][prefix] = { p: s for p, s in j.items() }
|
|
578
|
+
j.clear()
|
|
579
|
+
j["$comment"] = f"{sid} moved as $def"
|
|
580
|
+
j["$ref"] = dest
|