genschema 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- genschema/__init__.py +5 -0
- genschema/cli.py +147 -0
- genschema/comparators/__init__.py +17 -0
- genschema/comparators/delete_element.py +19 -0
- genschema/comparators/empty.py +47 -0
- genschema/comparators/flag.py +14 -0
- genschema/comparators/format.py +89 -0
- genschema/comparators/no_additional_prop.py +28 -0
- genschema/comparators/required.py +38 -0
- genschema/comparators/template.py +35 -0
- genschema/comparators/type.py +75 -0
- genschema/node.py +18 -0
- genschema/pipeline.py +270 -0
- genschema/pseudo_arrays.py +23 -0
- genschema/py.typed +0 -0
- genschema-0.1.0.dist-info/METADATA +913 -0
- genschema-0.1.0.dist-info/RECORD +21 -0
- genschema-0.1.0.dist-info/WHEEL +5 -0
- genschema-0.1.0.dist-info/entry_points.txt +2 -0
- genschema-0.1.0.dist-info/licenses/LICENSE +661 -0
- genschema-0.1.0.dist-info/top_level.txt +1 -0
genschema/pipeline.py
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Literal, Optional
|
|
4
|
+
|
|
5
|
+
from .comparators import TypeComparator
|
|
6
|
+
from .comparators.template import Comparator, ProcessingContext, Resource, ToDelete
|
|
7
|
+
from .pseudo_arrays import PseudoArrayHandlerBase
|
|
8
|
+
|
|
9
|
+
logging.basicConfig(level=logging.ERROR)
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Converter:
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
pseudo_handler: Optional[PseudoArrayHandlerBase] = None,
|
|
17
|
+
base_of: Literal["anyOf", "oneOf", "allOf"] = "anyOf",
|
|
18
|
+
core_comparator: Optional[TypeComparator] = None,
|
|
19
|
+
):
|
|
20
|
+
"""
|
|
21
|
+
Конвертер JSON + JSON Schema структур в JSON Schema.
|
|
22
|
+
|
|
23
|
+
:param pseudo_handler: Обработчик псевдомассивов
|
|
24
|
+
(большие словари с одинаковым паттерном значений, а ключами являются индефикаторы).
|
|
25
|
+
:type pseudo_handler: Optional[PseudoArrayHandlerBase]
|
|
26
|
+
|
|
27
|
+
:param base_of: Базовый оператор объединения схем.
|
|
28
|
+
Логики определения конкретного типа Of индивидуально не предусмотрено.
|
|
29
|
+
:type base_of: Literal["anyOf", "oneOf", "allOf"]
|
|
30
|
+
|
|
31
|
+
:param core_comparator: Базовый компаратор типов.
|
|
32
|
+
Он вынесен отдельно,
|
|
33
|
+
так как type - единственное поле без которого Converter не может построить структуру.
|
|
34
|
+
:type core_comparator: TypeComparator
|
|
35
|
+
"""
|
|
36
|
+
self._schemas: list[Resource] = []
|
|
37
|
+
self._jsons: list[Resource] = []
|
|
38
|
+
self._comparators: list[Comparator] = []
|
|
39
|
+
self._core_comparator = core_comparator or TypeComparator()
|
|
40
|
+
self._id = 0
|
|
41
|
+
self._pseudo_handler = pseudo_handler
|
|
42
|
+
self._base_of = base_of
|
|
43
|
+
|
|
44
|
+
def add_schema(self, s: dict | str) -> None:
|
|
45
|
+
if isinstance(s, str):
|
|
46
|
+
with open(s, "r") as f:
|
|
47
|
+
s = json.loads(f.read())
|
|
48
|
+
|
|
49
|
+
self._schemas.append(Resource(str(self._id), "schema", s))
|
|
50
|
+
self._id += 1
|
|
51
|
+
|
|
52
|
+
def add_json(self, j: dict | list | str) -> None:
|
|
53
|
+
if isinstance(j, str):
|
|
54
|
+
with open(j, "r") as f:
|
|
55
|
+
j = json.loads(f.read())
|
|
56
|
+
|
|
57
|
+
self._jsons.append(Resource(str(self._id), "json", j))
|
|
58
|
+
self._id += 1
|
|
59
|
+
|
|
60
|
+
def register(self, c: Comparator) -> None:
|
|
61
|
+
if isinstance(c, TypeComparator):
|
|
62
|
+
raise UserWarning(
|
|
63
|
+
"A TypeComparator-like comparator must be provided during initialization "
|
|
64
|
+
"using the core_comparator attribute."
|
|
65
|
+
)
|
|
66
|
+
self._comparators.append(c)
|
|
67
|
+
|
|
68
|
+
# ---------------- utils ----------------
|
|
69
|
+
|
|
70
|
+
def _collect_prop_names(self, schemas: list[Resource], jsons: list[Resource]) -> list[str]:
|
|
71
|
+
names = set()
|
|
72
|
+
for s in schemas:
|
|
73
|
+
c = s.content
|
|
74
|
+
if isinstance(c, dict) and isinstance(c.get("properties"), dict):
|
|
75
|
+
names.update(c["properties"].keys())
|
|
76
|
+
for j in jsons:
|
|
77
|
+
if isinstance(j.content, dict):
|
|
78
|
+
names.update(j.content.keys())
|
|
79
|
+
return sorted(names)
|
|
80
|
+
|
|
81
|
+
def _gather_property_candidates(
|
|
82
|
+
self, schemas: list[Resource], jsons: list[Resource], prop: str
|
|
83
|
+
) -> tuple[list[Resource], list[Resource]]:
|
|
84
|
+
s_out, j_out = [], []
|
|
85
|
+
|
|
86
|
+
for s in schemas:
|
|
87
|
+
c = s.content
|
|
88
|
+
if isinstance(c, dict) and prop in c.get("properties", {}):
|
|
89
|
+
s_out.append(Resource(f"{s.id}/{prop}", "schema", c["properties"][prop]))
|
|
90
|
+
|
|
91
|
+
for j in jsons:
|
|
92
|
+
if isinstance(j.content, dict) and prop in j.content:
|
|
93
|
+
j_out.append(Resource(f"{j.id}/{prop}", "json", j.content[prop]))
|
|
94
|
+
|
|
95
|
+
return s_out, j_out
|
|
96
|
+
|
|
97
|
+
def _split_array_ctx(
|
|
98
|
+
self, ctx: ProcessingContext
|
|
99
|
+
) -> tuple[ProcessingContext, ProcessingContext]:
|
|
100
|
+
obj_jsons = []
|
|
101
|
+
item_jsons = []
|
|
102
|
+
|
|
103
|
+
for j in ctx.jsons:
|
|
104
|
+
c = j.content
|
|
105
|
+
if isinstance(c, list):
|
|
106
|
+
for i, el in enumerate(c):
|
|
107
|
+
item_jsons.append(Resource(f"{j.id}/{i}", "json", el))
|
|
108
|
+
elif isinstance(c, dict):
|
|
109
|
+
keys = self._collect_prop_names([], [j])
|
|
110
|
+
if self._pseudo_handler and self._pseudo_handler.is_pseudo_array(keys, ctx):
|
|
111
|
+
sorted_keys = sorted(keys, key=lambda k: int(k) if k.isdigit() else -1)
|
|
112
|
+
for i, k in enumerate(sorted_keys):
|
|
113
|
+
item_jsons.append(Resource(f"{j.id}/{i}", "json", c[k]))
|
|
114
|
+
else:
|
|
115
|
+
obj_jsons.append(j)
|
|
116
|
+
else:
|
|
117
|
+
obj_jsons.append(j)
|
|
118
|
+
|
|
119
|
+
obj_schemas = []
|
|
120
|
+
item_schemas = []
|
|
121
|
+
|
|
122
|
+
for s in ctx.schemas:
|
|
123
|
+
c = s.content
|
|
124
|
+
if isinstance(c, dict):
|
|
125
|
+
t = c.get("type")
|
|
126
|
+
if t == "array" and "items" in c:
|
|
127
|
+
item_schemas.append(Resource(f"{s.id}/items", "schema", c["items"]))
|
|
128
|
+
elif t == "object" and "properties" in c:
|
|
129
|
+
keys = sorted(c["properties"].keys())
|
|
130
|
+
if self._pseudo_handler and self._pseudo_handler.is_pseudo_array(keys, ctx):
|
|
131
|
+
sorted_keys = sorted(keys, key=lambda k: int(k) if k.isdigit() else -1)
|
|
132
|
+
for i, k in enumerate(sorted_keys):
|
|
133
|
+
item_schemas.append(
|
|
134
|
+
Resource(f"{s.id}/{i}", "schema", c["properties"][k])
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
obj_schemas.append(s)
|
|
138
|
+
else:
|
|
139
|
+
obj_schemas.append(s)
|
|
140
|
+
else:
|
|
141
|
+
obj_schemas.append(s)
|
|
142
|
+
|
|
143
|
+
return (
|
|
144
|
+
ProcessingContext(obj_schemas, obj_jsons, ctx.sealed),
|
|
145
|
+
ProcessingContext(item_schemas, item_jsons, ctx.sealed),
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def _filter_ctx_by_ids(self, ctx: ProcessingContext, ids: set) -> ProcessingContext:
|
|
149
|
+
if not ids:
|
|
150
|
+
return ctx
|
|
151
|
+
schemas = [s for s in ctx.schemas if s.id in ids]
|
|
152
|
+
jsons = [j for j in ctx.jsons if j.id in ids]
|
|
153
|
+
return ProcessingContext(schemas, jsons, ctx.sealed)
|
|
154
|
+
|
|
155
|
+
# ---------------- core ----------------
|
|
156
|
+
|
|
157
|
+
def _run_level(self, ctx: ProcessingContext, env: str, prev: dict) -> dict:
|
|
158
|
+
logger.debug("Entering _run_level: env=%s, prev_result=%s", env, prev)
|
|
159
|
+
node = dict(prev)
|
|
160
|
+
|
|
161
|
+
def use_comp(comp: Comparator) -> bool:
|
|
162
|
+
if not comp.can_process(ctx, env, node):
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
g, alts = comp.process(ctx, env, node)
|
|
166
|
+
if g:
|
|
167
|
+
node.update(g)
|
|
168
|
+
if alts:
|
|
169
|
+
node.setdefault(self._base_of, []).extend(alts)
|
|
170
|
+
return True
|
|
171
|
+
|
|
172
|
+
# Вызов базового компаратора
|
|
173
|
+
use_comp(self._core_comparator)
|
|
174
|
+
|
|
175
|
+
# Определение является ли объект псевдомассивом
|
|
176
|
+
if node.get("type") == "object":
|
|
177
|
+
props = self._collect_prop_names(ctx.schemas, ctx.jsons)
|
|
178
|
+
if self._pseudo_handler:
|
|
179
|
+
is_pseudo_array, pattern = self._pseudo_handler.is_pseudo_array(props, ctx)
|
|
180
|
+
node["isPseudoArray"] = is_pseudo_array
|
|
181
|
+
else:
|
|
182
|
+
# node["isPseudoArray"] = False
|
|
183
|
+
is_pseudo_array = False
|
|
184
|
+
|
|
185
|
+
# Вызов остальных компараторов
|
|
186
|
+
for comp in self._comparators:
|
|
187
|
+
use_comp(comp)
|
|
188
|
+
|
|
189
|
+
# Удаление атрибутов помеченных на удаление
|
|
190
|
+
to_delete_keys = []
|
|
191
|
+
for key, element in node.items():
|
|
192
|
+
if isinstance(element, ToDelete):
|
|
193
|
+
to_delete_keys.append(key)
|
|
194
|
+
for key in to_delete_keys:
|
|
195
|
+
del node[key]
|
|
196
|
+
|
|
197
|
+
# если есть Of — обработаем каждую альтернативу через _run_level
|
|
198
|
+
if self._base_of in node:
|
|
199
|
+
new_of = []
|
|
200
|
+
for idx, alt in enumerate(node[self._base_of]):
|
|
201
|
+
alt_ids = set(alt.get("j2sElementTrigger", []))
|
|
202
|
+
alt_ctx = self._filter_ctx_by_ids(ctx, alt_ids) if alt_ids else ctx
|
|
203
|
+
processed_alt = self._run_level(alt_ctx, env + f"/{self._base_of}/{idx}", alt)
|
|
204
|
+
new_of.append(processed_alt)
|
|
205
|
+
node[self._base_of] = new_of
|
|
206
|
+
logger.debug(
|
|
207
|
+
"Exiting _run_level (%s handled): env=%s, node=%s", self._base_of, env, node
|
|
208
|
+
)
|
|
209
|
+
return node
|
|
210
|
+
|
|
211
|
+
# recursion based on type
|
|
212
|
+
if node.get("type") == "object":
|
|
213
|
+
if is_pseudo_array:
|
|
214
|
+
node = self._run_pseudo_array(ctx, env, node, str(pattern))
|
|
215
|
+
else:
|
|
216
|
+
node = self._run_object(ctx, env, node)
|
|
217
|
+
elif node.get("type") == "array":
|
|
218
|
+
node = self._run_array(ctx, env, node)
|
|
219
|
+
|
|
220
|
+
logger.debug("Exiting _run_level: env=%s, node=%s", env, node)
|
|
221
|
+
return node
|
|
222
|
+
|
|
223
|
+
# ---------------- object ----------------
|
|
224
|
+
|
|
225
|
+
def _run_object(self, ctx: ProcessingContext, env: str, node: dict) -> dict:
|
|
226
|
+
node = dict(node)
|
|
227
|
+
node.setdefault("properties", {})
|
|
228
|
+
|
|
229
|
+
props = self._collect_prop_names(ctx.schemas, ctx.jsons)
|
|
230
|
+
for name in props:
|
|
231
|
+
s, j = self._gather_property_candidates(ctx.schemas, ctx.jsons, name)
|
|
232
|
+
sub_ctx = ProcessingContext(s, j, ctx.sealed)
|
|
233
|
+
node["properties"][name] = self._run_level(
|
|
234
|
+
sub_ctx, f"{env}/properties/{name}", node["properties"].get(name, {})
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
if not node["properties"]:
|
|
238
|
+
node.pop("properties", None)
|
|
239
|
+
|
|
240
|
+
return node
|
|
241
|
+
|
|
242
|
+
# ---------------- pseudo array ----------------
|
|
243
|
+
|
|
244
|
+
def _run_pseudo_array(self, ctx: ProcessingContext, env: str, node: dict, pattern: str) -> dict:
|
|
245
|
+
node = dict(node)
|
|
246
|
+
node.setdefault("patternProperties", {})
|
|
247
|
+
_, items_ctx = self._split_array_ctx(ctx)
|
|
248
|
+
node["patternProperties"][pattern] = self._run_level(
|
|
249
|
+
items_ctx, f"{env}/patternProperties/{pattern}", {}
|
|
250
|
+
)
|
|
251
|
+
if not node["patternProperties"]:
|
|
252
|
+
node.pop("patternProperties", None)
|
|
253
|
+
return node
|
|
254
|
+
|
|
255
|
+
# ---------------- array ----------------
|
|
256
|
+
|
|
257
|
+
def _run_array(self, ctx: ProcessingContext, env: str, node: dict) -> dict:
|
|
258
|
+
node = dict(node)
|
|
259
|
+
node.setdefault("items", {})
|
|
260
|
+
|
|
261
|
+
_, items_ctx = self._split_array_ctx(ctx)
|
|
262
|
+
node["items"] = self._run_level(items_ctx, f"{env}/items", node.get("items", {}))
|
|
263
|
+
|
|
264
|
+
return node
|
|
265
|
+
|
|
266
|
+
# ---------------- entry ----------------
|
|
267
|
+
|
|
268
|
+
def run(self) -> dict:
|
|
269
|
+
ctx = ProcessingContext(self._schemas, self._jsons, sealed=False)
|
|
270
|
+
return self._run_level(ctx, "/", {})
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from .comparators.template import ProcessingContext
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class PseudoArrayHandlerBase:
|
|
7
|
+
def is_pseudo_array(
|
|
8
|
+
self, keys: list[str], ctx: ProcessingContext
|
|
9
|
+
) -> tuple[bool, Optional[str]]:
|
|
10
|
+
return False, None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PseudoArrayHandler(PseudoArrayHandlerBase):
|
|
14
|
+
def is_pseudo_array(
|
|
15
|
+
self, keys: list[str], ctx: ProcessingContext
|
|
16
|
+
) -> tuple[bool, Optional[str]]:
|
|
17
|
+
if not keys:
|
|
18
|
+
return False, None
|
|
19
|
+
try:
|
|
20
|
+
[int(k) for k in keys]
|
|
21
|
+
return True, "^[0-9]+$"
|
|
22
|
+
except ValueError:
|
|
23
|
+
return False, None
|
genschema/py.typed
ADDED
|
File without changes
|