genschema 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
genschema/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .pipeline import Converter
2
+ from .pseudo_arrays import PseudoArrayHandler, PseudoArrayHandlerBase
3
+
4
+ __all__ = ["Converter", "PseudoArrayHandler", "PseudoArrayHandlerBase"]
5
+ __version__ = "0.1.0"
genschema/cli.py ADDED
@@ -0,0 +1,147 @@
1
+ import argparse
2
+ import json
3
+ import sys
4
+ import time
5
+
6
+ from rich.console import Console
7
+
8
+ from . import Converter, PseudoArrayHandler
9
+ from .comparators import (
10
+ DeleteElement,
11
+ EmptyComparator,
12
+ FormatComparator,
13
+ RequiredComparator,
14
+ )
15
+
16
+ console = Console()
17
+
18
+
19
+ def main() -> None:
20
+ parser = argparse.ArgumentParser(
21
+ description="Generate JSON Schema from JSON input using genschema.",
22
+ formatter_class=argparse.RawDescriptionHelpFormatter,
23
+ epilog="""
24
+ Examples:
25
+ genschema input.json -o schema.json
26
+ genschema input1.json input2.json --base-of oneOf
27
+ cat input.json | genschema -
28
+ genschema --base-of anyOf < input.json
29
+ genschema dir/file1.json dir/file2.json -o schema.json
30
+ """,
31
+ )
32
+ parser.add_argument(
33
+ "inputs",
34
+ nargs="*",
35
+ help="Paths to input JSON files. Use '-' for stdin. "
36
+ "If no arguments are provided, show this help message.",
37
+ )
38
+ parser.add_argument(
39
+ "-o",
40
+ "--output",
41
+ help="Path to output JSON Schema file. If not specified, output to stdout.",
42
+ )
43
+ parser.add_argument(
44
+ "--base-of",
45
+ choices=["anyOf", "oneOf"],
46
+ default="anyOf",
47
+ help="Combinator for differing types (default: anyOf).",
48
+ )
49
+ parser.add_argument(
50
+ "--no-pseudo-array", action="store_true", help="Disable pseudo-array handling."
51
+ )
52
+ parser.add_argument("--no-format", action="store_true", help="Disable FormatComparator.")
53
+ parser.add_argument("--no-required", action="store_true", help="Disable RequiredComparator.")
54
+ parser.add_argument("--no-empty", action="store_true", help="Disable EmptyComparator.")
55
+ parser.add_argument(
56
+ "--no-delete-element", action="store_true", help="Disable DeleteElement comparators."
57
+ )
58
+
59
+ # If no arguments, show help and exit
60
+ if len(sys.argv) == 1:
61
+ parser.print_help(sys.stderr)
62
+ sys.exit(1)
63
+
64
+ args = parser.parse_args()
65
+
66
+ # Collect input data
67
+ datas = []
68
+ if not args.inputs:
69
+ # This case shouldn't happen due to the check above, but for safety
70
+ try:
71
+ data = json.load(sys.stdin)
72
+ datas.append(data)
73
+ except json.JSONDecodeError as e:
74
+ console.print(f"[red]Error reading JSON from stdin: {e}[/red]")
75
+ sys.exit(1)
76
+ else:
77
+ for input_path in args.inputs:
78
+ if input_path == "-":
79
+ try:
80
+ data = json.load(sys.stdin)
81
+ datas.append(data)
82
+ except json.JSONDecodeError as e:
83
+ console.print(f"[red]Error reading JSON from stdin: {e}[/red]")
84
+ sys.exit(1)
85
+ else:
86
+ try:
87
+ with open(input_path, "r", encoding="utf-8") as f:
88
+ data = json.load(f)
89
+ datas.append(data)
90
+ except FileNotFoundError:
91
+ console.print(f"[red]File not found: {input_path}[/red]")
92
+ sys.exit(1)
93
+ except json.JSONDecodeError as e:
94
+ console.print(f"[red]Invalid JSON in file {input_path}: {e}[/red]")
95
+ sys.exit(1)
96
+
97
+ if not datas:
98
+ console.print("[red]No valid JSON provided.[/red]")
99
+ sys.exit(1)
100
+
101
+ # Converter setup
102
+ pseudo_handler = None if args.no_pseudo_array else PseudoArrayHandler()
103
+ conv = Converter(pseudo_handler=pseudo_handler, base_of=args.base_of)
104
+
105
+ for data in datas:
106
+ conv.add_json(data)
107
+
108
+ # Register comparators conditionally
109
+ if not args.no_format:
110
+ conv.register(FormatComparator())
111
+ if not args.no_required:
112
+ conv.register(RequiredComparator())
113
+ if not args.no_empty:
114
+ conv.register(EmptyComparator())
115
+ if not args.no_delete_element:
116
+ conv.register(DeleteElement())
117
+ conv.register(DeleteElement("isPseudoArray"))
118
+
119
+ # Generate schema
120
+ start_time = time.time()
121
+ try:
122
+ result = conv.run()
123
+ except Exception as e:
124
+ console.print(f"[red]Error generating schema: {e}[/red]")
125
+ sys.exit(1)
126
+ elapsed = round(time.time() - start_time, 4)
127
+
128
+ # Output result
129
+ if args.output:
130
+ try:
131
+ with open(args.output, "w", encoding="utf-8") as f:
132
+ json.dump(result, f, indent=2, ensure_ascii=False)
133
+ console.print(f"[green]Schema successfully written to {args.output}[/green]")
134
+ except Exception as e:
135
+ console.print(f"[red]Error writing file {args.output}: {e}[/red]")
136
+ sys.exit(1)
137
+ else:
138
+ console.print(result)
139
+
140
+ # Execution info
141
+ instances_word = "instance" if len(datas) == 1 else "instances"
142
+ console.print(f"Generated from {len(datas)} JSON {instances_word}.")
143
+ console.print(f"Elapsed time: {elapsed} sec.")
144
+
145
+
146
+ if __name__ == "__main__":
147
+ main()
@@ -0,0 +1,17 @@
1
+ from .delete_element import DeleteElement
2
+ from .empty import EmptyComparator
3
+ from .flag import FlagMaker
4
+ from .format import FormatComparator
5
+ from .no_additional_prop import NoAdditionalProperties
6
+ from .required import RequiredComparator
7
+ from .type import TypeComparator
8
+
9
+ __all__ = [
10
+ "FormatComparator",
11
+ "TypeComparator",
12
+ "RequiredComparator",
13
+ "FlagMaker",
14
+ "EmptyComparator",
15
+ "NoAdditionalProperties",
16
+ "DeleteElement",
17
+ ]
@@ -0,0 +1,19 @@
1
+ from .template import Comparator, ComparatorResult, ProcessingContext, ToDelete
2
+
3
+
4
+ class DeleteElement(Comparator):
5
+ """Визуально показывает где именно могут сработать компораторы"""
6
+
7
+ name = "delete-element"
8
+ attribute = ""
9
+
10
+ def __init__(self, attribute: str = "j2sElementTrigger"):
11
+ super().__init__()
12
+ self.attribute = attribute
13
+
14
+ def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
15
+ # Обрабатываем объекты и массивы
16
+ return self.attribute in node
17
+
18
+ def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
19
+ return {self.attribute: ToDelete(node.get(self.attribute, -1), self)}, None
@@ -0,0 +1,47 @@
1
+ from .template import Comparator, ComparatorResult, ProcessingContext, Resource
2
+
3
+
4
+ class EmptyComparator(Comparator):
5
+ """
6
+ Добавляет maxItems=0 или maxProperties=0 для полностью пустых массивов/объектов,
7
+ а так же minItems=0 или minProperties=0 для полностью НЕ пустых массивов/объектов,
8
+ если на данном уровне нет кандидатов из непустых схем или JSON.
9
+ """
10
+
11
+ name = "empty"
12
+
13
+ def __init__(self, flag_empty: bool = True, flag_non_empty: bool = True):
14
+ self.flag_empty = flag_empty
15
+ self.flag_non_empty = flag_non_empty
16
+
17
+ def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
18
+ t = node.get("type")
19
+ return t == "object" or t == "array"
20
+
21
+ def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
22
+
23
+ # Проверяем есть ли непустые кандидаты на этом уровне
24
+ def is_nonempty(r: Resource) -> bool:
25
+ c = r.content
26
+ if isinstance(c, dict):
27
+ return bool(c) # не пустой словарь
28
+ if isinstance(c, list):
29
+ return bool(c) # не пустой список
30
+ return True # скаляры считаем непустыми
31
+
32
+ candidates = [is_nonempty(r) for r in ctx.schemas + ctx.jsons]
33
+
34
+ if self.flag_empty and not any(candidates):
35
+ t = node.get("type")
36
+ if t == "object":
37
+ return {"maxProperties": 0}, None
38
+ elif t == "array":
39
+ return {"maxItems": 0}, None
40
+ elif self.flag_non_empty and all(candidates):
41
+ t = node.get("type")
42
+ if t == "object":
43
+ return {"minProperties": 1}, None
44
+ elif t == "array":
45
+ return {"minItems": 1}, None
46
+
47
+ return None, None
@@ -0,0 +1,14 @@
1
+ from .template import Comparator, ComparatorResult, ProcessingContext
2
+
3
+
4
+ class FlagMaker(Comparator):
5
+ """Визуально показывает где именно могут сработать компораторы"""
6
+
7
+ name = "flag"
8
+
9
+ def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
10
+ # Обрабатываем объекты и массивы
11
+ return True
12
+
13
+ def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
14
+ return {"Flag": True}, None
@@ -0,0 +1,89 @@
1
+ import re
2
+ from collections import defaultdict
3
+ from functools import lru_cache
4
+ from typing import Any, Optional
5
+
6
+ from .template import Comparator, ComparatorResult, ProcessingContext
7
+
8
+
9
+ class FormatDetector:
10
+ """Глобальный детектор форматов. Расширяем — просто добавляем в _registry."""
11
+
12
+ _registry = {
13
+ "string": {
14
+ re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"): "email",
15
+ re.compile(
16
+ r"^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
17
+ re.I,
18
+ ): "uuid",
19
+ re.compile(r"^\d{4}-\d{2}-\d{2}$"): "date",
20
+ re.compile(
21
+ r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?$"
22
+ ): "date-time",
23
+ re.compile(r"^https?://[^\s/$.?#].[^\s]*$", re.I): "uri",
24
+ re.compile(
25
+ r"^(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}" r"(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$"
26
+ ): "ipv4",
27
+ }
28
+ }
29
+
30
+ @classmethod
31
+ @lru_cache(maxsize=512)
32
+ def detect(cls, value: Any, type_hint: str = "string") -> Optional[str]:
33
+ patterns = cls._registry.get(type_hint, {})
34
+ for pattern, name in patterns.items():
35
+ if pattern.fullmatch(str(value)):
36
+ return name
37
+ return None
38
+
39
+
40
+ class FormatComparator(Comparator):
41
+ name = "format"
42
+
43
+ def can_process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> bool:
44
+ # Обрабатываем только если на текущем уровне уже есть type: "string"
45
+ return prev_result.get("type") == "string"
46
+
47
+ def process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> ComparatorResult:
48
+
49
+ # Базовые триггеры из предыдущих компараторов (обычно из TypeComparator)
50
+ base_triggers = set(prev_result.get("j2sElementTrigger", []))
51
+
52
+ # Собираем все возможные форматы и их источники
53
+ format_to_ids: dict[str | None, set[str]] = defaultdict(set)
54
+ format_to_ids[None].update(base_triggers)
55
+
56
+ # 1. Форматы, явно указанные в схемах
57
+ for s in ctx.schemas:
58
+ if isinstance(s.content, dict) and s.content.get("type") == "string":
59
+ fmt = s.content.get("format")
60
+ format_to_ids[fmt].add(s.id)
61
+ if fmt is not None:
62
+ format_to_ids[None].discard(s.id)
63
+
64
+ # 2. Форматы, выведенные из значений JSON
65
+ for j in ctx.jsons:
66
+ if isinstance(j.content, str):
67
+ fmt = FormatDetector.detect(j.content)
68
+ format_to_ids[fmt].add(j.id)
69
+ if fmt is not None:
70
+ format_to_ids[None].discard(j.id)
71
+
72
+ # Формируем варианты
73
+ variants: list[dict] = []
74
+ for fmt, ids in format_to_ids.items():
75
+ if not ids:
76
+ continue
77
+ variant = {"type": "string", "j2sElementTrigger": sorted(ids)}
78
+ if fmt is not None:
79
+ variant["format"] = fmt
80
+ variants.append(variant)
81
+
82
+ # Результат
83
+ if len(variants) == 1:
84
+ return variants[0], None
85
+ if len(variants) > 1:
86
+ return None, variants
87
+
88
+ # Если ничего нового не нашли — оставляем как есть
89
+ return None, None
@@ -0,0 +1,28 @@
1
+ from typing import Any
2
+
3
+ from .template import Comparator, ComparatorResult, ProcessingContext, ToDelete
4
+
5
+
6
+ class NoAdditionalProperties(Comparator):
7
+ """
8
+ Компаратор, который всегда добавляет additionalProperties: false
9
+ ко всем объектам (type: "object"), если это поле ещё не задано.
10
+
11
+ Работает только на уровне объектов.
12
+ Не перезаписывает уже существующие значения additionalProperties.
13
+ """
14
+
15
+ name = "no_additional_properties"
16
+
17
+ def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
18
+ # Обрабатываем только те узлы, где уже определён тип object
19
+ # и additionalProperties ещё не задан
20
+ return node.get("type") == "object" and "additionalProperties" not in node
21
+
22
+ def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
23
+ """
24
+ Добавляет additionalProperties: false, если его ещё нет.
25
+ Возвращает обновление только для текущего узла.
26
+ """
27
+ updated: dict[str, ToDelete | Any | bool] = {"additionalProperties": False}
28
+ return updated, None
@@ -0,0 +1,38 @@
1
+ import logging
2
+
3
+ from .template import Comparator, ComparatorResult, ProcessingContext
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class RequiredComparator(Comparator):
9
+ """
10
+ Компаратор для определения обязательных полей.
11
+ Устанавливает "required" на основе наличия ключей в JSON на текущем уровне.
12
+ """
13
+
14
+ def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
15
+ # обрабатываем только объекты
16
+ return (
17
+ (node.get("type") == "object" and not node.get("isPseudoArray", False))
18
+ or node.get("type") is None
19
+ or not ctx.jsons
20
+ )
21
+
22
+ def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
23
+ # собираем все ключи в JSON на этом уровне
24
+ keys: set[str] = set()
25
+ for j in ctx.jsons:
26
+ if isinstance(j.content, dict):
27
+ keys.update(j.content.keys())
28
+
29
+ # определяем обязательные: ключи, которые есть во всех JSON
30
+ required = [
31
+ k
32
+ for k in sorted(keys)
33
+ if all(isinstance(j.content, dict) and k in j.content for j in ctx.jsons)
34
+ ]
35
+
36
+ if required:
37
+ return {"required": required}, None
38
+ return None, None
@@ -0,0 +1,35 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Optional
3
+
4
+
5
+ @dataclass
6
+ class ToDelete:
7
+ content: int | float | str | list | dict = ""
8
+ comparator_trigger: Optional["Comparator"] = None
9
+
10
+
11
+ @dataclass
12
+ class Resource:
13
+ id: str
14
+ type: str
15
+ content: Any
16
+
17
+
18
+ @dataclass
19
+ class ProcessingContext:
20
+ schemas: list[Resource]
21
+ jsons: list[Resource]
22
+ sealed: bool = False
23
+
24
+
25
+ ComparatorResult = tuple[Optional[dict[str, ToDelete | Any | bool]], Optional[list[dict]]]
26
+
27
+
28
+ class Comparator:
29
+ name = "base"
30
+
31
+ def can_process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> bool:
32
+ return False
33
+
34
+ def process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> ComparatorResult:
35
+ return None, None
@@ -0,0 +1,75 @@
1
+ from typing import Any
2
+
3
+ from .template import Comparator, ComparatorResult, ProcessingContext
4
+
5
+
6
+ def infer_json_type(v: Any) -> str:
7
+ if v is None:
8
+ return "null"
9
+ if isinstance(v, bool):
10
+ return "boolean"
11
+ if isinstance(v, int):
12
+ return "integer"
13
+ if isinstance(v, float):
14
+ return "number"
15
+ if isinstance(v, str):
16
+ return "string"
17
+ if isinstance(v, list):
18
+ return "array"
19
+ if isinstance(v, dict):
20
+ return "object"
21
+ return "any"
22
+
23
+
24
+ def infer_schema_type(s: dict | str) -> None | str:
25
+ if not isinstance(s, dict):
26
+ return None
27
+ if "type" in s:
28
+ t = s["type"]
29
+ if isinstance(t, str):
30
+ return t
31
+ if "properties" in s:
32
+ return "object"
33
+ if "items" in s:
34
+ return "array"
35
+ return None
36
+
37
+
38
+ class TypeComparator(Comparator):
39
+ name = "type"
40
+
41
+ def can_process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> bool:
42
+ return "type" not in prev_result and bool(ctx.schemas or ctx.jsons)
43
+
44
+ def process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> ComparatorResult:
45
+ type_map: dict[str, set[str]] = {}
46
+
47
+ for s in ctx.schemas:
48
+ t = infer_schema_type(s.content)
49
+ if t:
50
+ type_map.setdefault(t, set()).add(s.id)
51
+
52
+ for j in ctx.jsons:
53
+ t = infer_json_type(j.content)
54
+ type_map.setdefault(t, set()).add(j.id)
55
+
56
+ # Нормализация: number поглощает integer
57
+ if "number" in type_map and "integer" in type_map:
58
+ type_map["number"].update(type_map["integer"])
59
+ del type_map["integer"]
60
+
61
+ if not type_map:
62
+ return None, None
63
+
64
+ variants: list[dict[str, Any]] = [
65
+ {"type": t, "j2sElementTrigger": sorted(ids)} for t, ids in type_map.items()
66
+ ]
67
+
68
+ if ctx.sealed:
69
+ # cannot create Of inside sealed context — choose first deterministic
70
+ return variants[0], None
71
+
72
+ if len(variants) == 1:
73
+ return variants[0], None
74
+
75
+ return None, variants
genschema/node.py ADDED
@@ -0,0 +1,18 @@
1
+ from enum import Enum, auto
2
+ from typing import Any
3
+
4
+
5
+ class NodeKind(Enum):
6
+ SCALAR = auto()
7
+ OBJECT = auto()
8
+ ARRAY = auto()
9
+ UNION = auto()
10
+
11
+
12
+ class SchemaNode:
13
+ def __init__(self, kind: NodeKind):
14
+ self.kind = kind
15
+ self.schema: dict[str, Any] = {}
16
+
17
+ def as_dict(self) -> dict[str, Any]:
18
+ return self.schema