genschema 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
genschema/pipeline.py ADDED
@@ -0,0 +1,270 @@
1
+ import json
2
+ import logging
3
+ from typing import Literal, Optional
4
+
5
+ from .comparators import TypeComparator
6
+ from .comparators.template import Comparator, ProcessingContext, Resource, ToDelete
7
+ from .pseudo_arrays import PseudoArrayHandlerBase
8
+
9
+ logging.basicConfig(level=logging.ERROR)
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class Converter:
14
+ def __init__(
15
+ self,
16
+ pseudo_handler: Optional[PseudoArrayHandlerBase] = None,
17
+ base_of: Literal["anyOf", "oneOf", "allOf"] = "anyOf",
18
+ core_comparator: Optional[TypeComparator] = None,
19
+ ):
20
+ """
21
+ Конвертер JSON + JSON Schema структур в JSON Schema.
22
+
23
+ :param pseudo_handler: Обработчик псевдомассивов
24
+ (большие словари с одинаковым паттерном значений, а ключами являются индефикаторы).
25
+ :type pseudo_handler: Optional[PseudoArrayHandlerBase]
26
+
27
+ :param base_of: Базовый оператор объединения схем.
28
+ Логики определения конкретного типа Of индивидуально не предусмотрено.
29
+ :type base_of: Literal["anyOf", "oneOf", "allOf"]
30
+
31
+ :param core_comparator: Базовый компаратор типов.
32
+ Он вынесен отдельно,
33
+ так как type - единственное поле без которого Converter не может построить структуру.
34
+ :type core_comparator: TypeComparator
35
+ """
36
+ self._schemas: list[Resource] = []
37
+ self._jsons: list[Resource] = []
38
+ self._comparators: list[Comparator] = []
39
+ self._core_comparator = core_comparator or TypeComparator()
40
+ self._id = 0
41
+ self._pseudo_handler = pseudo_handler
42
+ self._base_of = base_of
43
+
44
+ def add_schema(self, s: dict | str) -> None:
45
+ if isinstance(s, str):
46
+ with open(s, "r") as f:
47
+ s = json.loads(f.read())
48
+
49
+ self._schemas.append(Resource(str(self._id), "schema", s))
50
+ self._id += 1
51
+
52
+ def add_json(self, j: dict | list | str) -> None:
53
+ if isinstance(j, str):
54
+ with open(j, "r") as f:
55
+ j = json.loads(f.read())
56
+
57
+ self._jsons.append(Resource(str(self._id), "json", j))
58
+ self._id += 1
59
+
60
+ def register(self, c: Comparator) -> None:
61
+ if isinstance(c, TypeComparator):
62
+ raise UserWarning(
63
+ "A TypeComparator-like comparator must be provided during initialization "
64
+ "using the core_comparator attribute."
65
+ )
66
+ self._comparators.append(c)
67
+
68
+ # ---------------- utils ----------------
69
+
70
+ def _collect_prop_names(self, schemas: list[Resource], jsons: list[Resource]) -> list[str]:
71
+ names = set()
72
+ for s in schemas:
73
+ c = s.content
74
+ if isinstance(c, dict) and isinstance(c.get("properties"), dict):
75
+ names.update(c["properties"].keys())
76
+ for j in jsons:
77
+ if isinstance(j.content, dict):
78
+ names.update(j.content.keys())
79
+ return sorted(names)
80
+
81
+ def _gather_property_candidates(
82
+ self, schemas: list[Resource], jsons: list[Resource], prop: str
83
+ ) -> tuple[list[Resource], list[Resource]]:
84
+ s_out, j_out = [], []
85
+
86
+ for s in schemas:
87
+ c = s.content
88
+ if isinstance(c, dict) and prop in c.get("properties", {}):
89
+ s_out.append(Resource(f"{s.id}/{prop}", "schema", c["properties"][prop]))
90
+
91
+ for j in jsons:
92
+ if isinstance(j.content, dict) and prop in j.content:
93
+ j_out.append(Resource(f"{j.id}/{prop}", "json", j.content[prop]))
94
+
95
+ return s_out, j_out
96
+
97
+ def _split_array_ctx(
98
+ self, ctx: ProcessingContext
99
+ ) -> tuple[ProcessingContext, ProcessingContext]:
100
+ obj_jsons = []
101
+ item_jsons = []
102
+
103
+ for j in ctx.jsons:
104
+ c = j.content
105
+ if isinstance(c, list):
106
+ for i, el in enumerate(c):
107
+ item_jsons.append(Resource(f"{j.id}/{i}", "json", el))
108
+ elif isinstance(c, dict):
109
+ keys = self._collect_prop_names([], [j])
110
+ if self._pseudo_handler and self._pseudo_handler.is_pseudo_array(keys, ctx):
111
+ sorted_keys = sorted(keys, key=lambda k: int(k) if k.isdigit() else -1)
112
+ for i, k in enumerate(sorted_keys):
113
+ item_jsons.append(Resource(f"{j.id}/{i}", "json", c[k]))
114
+ else:
115
+ obj_jsons.append(j)
116
+ else:
117
+ obj_jsons.append(j)
118
+
119
+ obj_schemas = []
120
+ item_schemas = []
121
+
122
+ for s in ctx.schemas:
123
+ c = s.content
124
+ if isinstance(c, dict):
125
+ t = c.get("type")
126
+ if t == "array" and "items" in c:
127
+ item_schemas.append(Resource(f"{s.id}/items", "schema", c["items"]))
128
+ elif t == "object" and "properties" in c:
129
+ keys = sorted(c["properties"].keys())
130
+ if self._pseudo_handler and self._pseudo_handler.is_pseudo_array(keys, ctx):
131
+ sorted_keys = sorted(keys, key=lambda k: int(k) if k.isdigit() else -1)
132
+ for i, k in enumerate(sorted_keys):
133
+ item_schemas.append(
134
+ Resource(f"{s.id}/{i}", "schema", c["properties"][k])
135
+ )
136
+ else:
137
+ obj_schemas.append(s)
138
+ else:
139
+ obj_schemas.append(s)
140
+ else:
141
+ obj_schemas.append(s)
142
+
143
+ return (
144
+ ProcessingContext(obj_schemas, obj_jsons, ctx.sealed),
145
+ ProcessingContext(item_schemas, item_jsons, ctx.sealed),
146
+ )
147
+
148
+ def _filter_ctx_by_ids(self, ctx: ProcessingContext, ids: set) -> ProcessingContext:
149
+ if not ids:
150
+ return ctx
151
+ schemas = [s for s in ctx.schemas if s.id in ids]
152
+ jsons = [j for j in ctx.jsons if j.id in ids]
153
+ return ProcessingContext(schemas, jsons, ctx.sealed)
154
+
155
+ # ---------------- core ----------------
156
+
157
+ def _run_level(self, ctx: ProcessingContext, env: str, prev: dict) -> dict:
158
+ logger.debug("Entering _run_level: env=%s, prev_result=%s", env, prev)
159
+ node = dict(prev)
160
+
161
+ def use_comp(comp: Comparator) -> bool:
162
+ if not comp.can_process(ctx, env, node):
163
+ return False
164
+
165
+ g, alts = comp.process(ctx, env, node)
166
+ if g:
167
+ node.update(g)
168
+ if alts:
169
+ node.setdefault(self._base_of, []).extend(alts)
170
+ return True
171
+
172
+ # Вызов базового компаратора
173
+ use_comp(self._core_comparator)
174
+
175
+ # Определение является ли объект псевдомассивом
176
+ if node.get("type") == "object":
177
+ props = self._collect_prop_names(ctx.schemas, ctx.jsons)
178
+ if self._pseudo_handler:
179
+ is_pseudo_array, pattern = self._pseudo_handler.is_pseudo_array(props, ctx)
180
+ node["isPseudoArray"] = is_pseudo_array
181
+ else:
182
+ # node["isPseudoArray"] = False
183
+ is_pseudo_array = False
184
+
185
+ # Вызов остальных компараторов
186
+ for comp in self._comparators:
187
+ use_comp(comp)
188
+
189
+ # Удаление атрибутов помеченных на удаление
190
+ to_delete_keys = []
191
+ for key, element in node.items():
192
+ if isinstance(element, ToDelete):
193
+ to_delete_keys.append(key)
194
+ for key in to_delete_keys:
195
+ del node[key]
196
+
197
+ # если есть Of — обработаем каждую альтернативу через _run_level
198
+ if self._base_of in node:
199
+ new_of = []
200
+ for idx, alt in enumerate(node[self._base_of]):
201
+ alt_ids = set(alt.get("j2sElementTrigger", []))
202
+ alt_ctx = self._filter_ctx_by_ids(ctx, alt_ids) if alt_ids else ctx
203
+ processed_alt = self._run_level(alt_ctx, env + f"/{self._base_of}/{idx}", alt)
204
+ new_of.append(processed_alt)
205
+ node[self._base_of] = new_of
206
+ logger.debug(
207
+ "Exiting _run_level (%s handled): env=%s, node=%s", self._base_of, env, node
208
+ )
209
+ return node
210
+
211
+ # recursion based on type
212
+ if node.get("type") == "object":
213
+ if is_pseudo_array:
214
+ node = self._run_pseudo_array(ctx, env, node, str(pattern))
215
+ else:
216
+ node = self._run_object(ctx, env, node)
217
+ elif node.get("type") == "array":
218
+ node = self._run_array(ctx, env, node)
219
+
220
+ logger.debug("Exiting _run_level: env=%s, node=%s", env, node)
221
+ return node
222
+
223
+ # ---------------- object ----------------
224
+
225
+ def _run_object(self, ctx: ProcessingContext, env: str, node: dict) -> dict:
226
+ node = dict(node)
227
+ node.setdefault("properties", {})
228
+
229
+ props = self._collect_prop_names(ctx.schemas, ctx.jsons)
230
+ for name in props:
231
+ s, j = self._gather_property_candidates(ctx.schemas, ctx.jsons, name)
232
+ sub_ctx = ProcessingContext(s, j, ctx.sealed)
233
+ node["properties"][name] = self._run_level(
234
+ sub_ctx, f"{env}/properties/{name}", node["properties"].get(name, {})
235
+ )
236
+
237
+ if not node["properties"]:
238
+ node.pop("properties", None)
239
+
240
+ return node
241
+
242
+ # ---------------- pseudo array ----------------
243
+
244
+ def _run_pseudo_array(self, ctx: ProcessingContext, env: str, node: dict, pattern: str) -> dict:
245
+ node = dict(node)
246
+ node.setdefault("patternProperties", {})
247
+ _, items_ctx = self._split_array_ctx(ctx)
248
+ node["patternProperties"][pattern] = self._run_level(
249
+ items_ctx, f"{env}/patternProperties/{pattern}", {}
250
+ )
251
+ if not node["patternProperties"]:
252
+ node.pop("patternProperties", None)
253
+ return node
254
+
255
+ # ---------------- array ----------------
256
+
257
+ def _run_array(self, ctx: ProcessingContext, env: str, node: dict) -> dict:
258
+ node = dict(node)
259
+ node.setdefault("items", {})
260
+
261
+ _, items_ctx = self._split_array_ctx(ctx)
262
+ node["items"] = self._run_level(items_ctx, f"{env}/items", node.get("items", {}))
263
+
264
+ return node
265
+
266
+ # ---------------- entry ----------------
267
+
268
+ def run(self) -> dict:
269
+ ctx = ProcessingContext(self._schemas, self._jsons, sealed=False)
270
+ return self._run_level(ctx, "/", {})
@@ -0,0 +1,23 @@
1
+ from typing import Optional
2
+
3
+ from .comparators.template import ProcessingContext
4
+
5
+
6
+ class PseudoArrayHandlerBase:
7
+ def is_pseudo_array(
8
+ self, keys: list[str], ctx: ProcessingContext
9
+ ) -> tuple[bool, Optional[str]]:
10
+ return False, None
11
+
12
+
13
+ class PseudoArrayHandler(PseudoArrayHandlerBase):
14
+ def is_pseudo_array(
15
+ self, keys: list[str], ctx: ProcessingContext
16
+ ) -> tuple[bool, Optional[str]]:
17
+ if not keys:
18
+ return False, None
19
+ try:
20
+ [int(k) for k in keys]
21
+ return True, "^[0-9]+$"
22
+ except ValueError:
23
+ return False, None
genschema/py.typed ADDED
File without changes