PyPI - genschema - Versions diffs - 0.1.0__py3-none-any.whl - Mend

genschema 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

genschema/__init__.py +5 -0
genschema/cli.py +147 -0
genschema/comparators/__init__.py +17 -0
genschema/comparators/delete_element.py +19 -0
genschema/comparators/empty.py +47 -0
genschema/comparators/flag.py +14 -0
genschema/comparators/format.py +89 -0
genschema/comparators/no_additional_prop.py +28 -0
genschema/comparators/required.py +38 -0
genschema/comparators/template.py +35 -0
genschema/comparators/type.py +75 -0
genschema/node.py +18 -0
genschema/pipeline.py +270 -0
genschema/pseudo_arrays.py +23 -0
genschema/py.typed +0 -0
genschema-0.1.0.dist-info/METADATA +913 -0
genschema-0.1.0.dist-info/RECORD +21 -0
genschema-0.1.0.dist-info/WHEEL +5 -0
genschema-0.1.0.dist-info/entry_points.txt +2 -0
genschema-0.1.0.dist-info/licenses/LICENSE +661 -0
genschema-0.1.0.dist-info/top_level.txt +1 -0

genschema/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .pipeline import Converter
+from .pseudo_arrays import PseudoArrayHandler, PseudoArrayHandlerBase
+__all__ = ["Converter", "PseudoArrayHandler", "PseudoArrayHandlerBase"]
+__version__ = "0.1.0"

genschema/cli.py ADDED Viewed

@@ -0,0 +1,147 @@
+import argparse
+import json
+import sys
+import time
+from rich.console import Console
+from . import Converter, PseudoArrayHandler
+from .comparators import (
+    DeleteElement,
+    EmptyComparator,
+    FormatComparator,
+    RequiredComparator,
+)
+console = Console()
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Generate JSON Schema from JSON input using genschema.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  genschema input.json -o schema.json
+  genschema input1.json input2.json --base-of oneOf
+  cat input.json | genschema -
+  genschema --base-of anyOf < input.json
+  genschema dir/file1.json dir/file2.json -o schema.json
+        """,
+    )
+    parser.add_argument(
+        "inputs",
+        nargs="*",
+        help="Paths to input JSON files. Use '-' for stdin. "
+        "If no arguments are provided, show this help message.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="Path to output JSON Schema file. If not specified, output to stdout.",
+    )
+    parser.add_argument(
+        "--base-of",
+        choices=["anyOf", "oneOf"],
+        default="anyOf",
+        help="Combinator for differing types (default: anyOf).",
+    )
+    parser.add_argument(
+        "--no-pseudo-array", action="store_true", help="Disable pseudo-array handling."
+    )
+    parser.add_argument("--no-format", action="store_true", help="Disable FormatComparator.")
+    parser.add_argument("--no-required", action="store_true", help="Disable RequiredComparator.")
+    parser.add_argument("--no-empty", action="store_true", help="Disable EmptyComparator.")
+    parser.add_argument(
+        "--no-delete-element", action="store_true", help="Disable DeleteElement comparators."
+    )
+    # If no arguments, show help and exit
+    if len(sys.argv) == 1:
+        parser.print_help(sys.stderr)
+        sys.exit(1)
+    args = parser.parse_args()
+    # Collect input data
+    datas = []
+    if not args.inputs:
+        # This case shouldn't happen due to the check above, but for safety
+        try:
+            data = json.load(sys.stdin)
+            datas.append(data)
+        except json.JSONDecodeError as e:
+            console.print(f"[red]Error reading JSON from stdin: {e}[/red]")
+            sys.exit(1)
+    else:
+        for input_path in args.inputs:
+            if input_path == "-":
+                try:
+                    data = json.load(sys.stdin)
+                    datas.append(data)
+                except json.JSONDecodeError as e:
+                    console.print(f"[red]Error reading JSON from stdin: {e}[/red]")
+                    sys.exit(1)
+            else:
+                try:
+                    with open(input_path, "r", encoding="utf-8") as f:
+                        data = json.load(f)
+                    datas.append(data)
+                except FileNotFoundError:
+                    console.print(f"[red]File not found: {input_path}[/red]")
+                    sys.exit(1)
+                except json.JSONDecodeError as e:
+                    console.print(f"[red]Invalid JSON in file {input_path}: {e}[/red]")
+                    sys.exit(1)
+    if not datas:
+        console.print("[red]No valid JSON provided.[/red]")
+        sys.exit(1)
+    # Converter setup
+    pseudo_handler = None if args.no_pseudo_array else PseudoArrayHandler()
+    conv = Converter(pseudo_handler=pseudo_handler, base_of=args.base_of)
+    for data in datas:
+        conv.add_json(data)
+    # Register comparators conditionally
+    if not args.no_format:
+        conv.register(FormatComparator())
+    if not args.no_required:
+        conv.register(RequiredComparator())
+    if not args.no_empty:
+        conv.register(EmptyComparator())
+    if not args.no_delete_element:
+        conv.register(DeleteElement())
+        conv.register(DeleteElement("isPseudoArray"))
+    # Generate schema
+    start_time = time.time()
+    try:
+        result = conv.run()
+    except Exception as e:
+        console.print(f"[red]Error generating schema: {e}[/red]")
+        sys.exit(1)
+    elapsed = round(time.time() - start_time, 4)
+    # Output result
+    if args.output:
+        try:
+            with open(args.output, "w", encoding="utf-8") as f:
+                json.dump(result, f, indent=2, ensure_ascii=False)
+            console.print(f"[green]Schema successfully written to {args.output}[/green]")
+        except Exception as e:
+            console.print(f"[red]Error writing file {args.output}: {e}[/red]")
+            sys.exit(1)
+    else:
+        console.print(result)
+    # Execution info
+    instances_word = "instance" if len(datas) == 1 else "instances"
+    console.print(f"Generated from {len(datas)} JSON {instances_word}.")
+    console.print(f"Elapsed time: {elapsed} sec.")
+if __name__ == "__main__":
+    main()

genschema/comparators/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+from .delete_element import DeleteElement
+from .empty import EmptyComparator
+from .flag import FlagMaker
+from .format import FormatComparator
+from .no_additional_prop import NoAdditionalProperties
+from .required import RequiredComparator
+from .type import TypeComparator
+__all__ = [
+    "FormatComparator",
+    "TypeComparator",
+    "RequiredComparator",
+    "FlagMaker",
+    "EmptyComparator",
+    "NoAdditionalProperties",
+    "DeleteElement",
+]

genschema/comparators/delete_element.py ADDED Viewed

@@ -0,0 +1,19 @@
+from .template import Comparator, ComparatorResult, ProcessingContext, ToDelete
+class DeleteElement(Comparator):
+    """Визуально показывает где именно могут сработать компораторы"""
+    name = "delete-element"
+    attribute = ""
+    def __init__(self, attribute: str = "j2sElementTrigger"):
+        super().__init__()
+        self.attribute = attribute
+    def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
+        # Обрабатываем объекты и массивы
+        return self.attribute in node
+    def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
+        return {self.attribute: ToDelete(node.get(self.attribute, -1), self)}, None

genschema/comparators/empty.py ADDED Viewed

@@ -0,0 +1,47 @@
+from .template import Comparator, ComparatorResult, ProcessingContext, Resource
+class EmptyComparator(Comparator):
+    """
+    Добавляет maxItems=0 или maxProperties=0 для полностью пустых массивов/объектов,
+    а так же minItems=0 или minProperties=0 для полностью НЕ пустых массивов/объектов,
+    если на данном уровне нет кандидатов из непустых схем или JSON.
+    """
+    name = "empty"
+    def __init__(self, flag_empty: bool = True, flag_non_empty: bool = True):
+        self.flag_empty = flag_empty
+        self.flag_non_empty = flag_non_empty
+    def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
+        t = node.get("type")
+        return t == "object" or t == "array"
+    def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
+        # Проверяем есть ли непустые кандидаты на этом уровне
+        def is_nonempty(r: Resource) -> bool:
+            c = r.content
+            if isinstance(c, dict):
+                return bool(c)  # не пустой словарь
+            if isinstance(c, list):
+                return bool(c)  # не пустой список
+            return True  # скаляры считаем непустыми
+        candidates = [is_nonempty(r) for r in ctx.schemas + ctx.jsons]
+        if self.flag_empty and not any(candidates):
+            t = node.get("type")
+            if t == "object":
+                return {"maxProperties": 0}, None
+            elif t == "array":
+                return {"maxItems": 0}, None
+        elif self.flag_non_empty and all(candidates):
+            t = node.get("type")
+            if t == "object":
+                return {"minProperties": 1}, None
+            elif t == "array":
+                return {"minItems": 1}, None
+        return None, None

genschema/comparators/flag.py ADDED Viewed

@@ -0,0 +1,14 @@
+from .template import Comparator, ComparatorResult, ProcessingContext
+class FlagMaker(Comparator):
+    """Визуально показывает где именно могут сработать компораторы"""
+    name = "flag"
+    def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
+        # Обрабатываем объекты и массивы
+        return True
+    def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
+        return {"Flag": True}, None

genschema/comparators/format.py ADDED Viewed

@@ -0,0 +1,89 @@
+import re
+from collections import defaultdict
+from functools import lru_cache
+from typing import Any, Optional
+from .template import Comparator, ComparatorResult, ProcessingContext
+class FormatDetector:
+    """Глобальный детектор форматов. Расширяем — просто добавляем в _registry."""
+    _registry = {
+        "string": {
+            re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"): "email",
+            re.compile(
+                r"^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
+                re.I,
+            ): "uuid",
+            re.compile(r"^\d{4}-\d{2}-\d{2}$"): "date",
+            re.compile(
+                r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?$"
+            ): "date-time",
+            re.compile(r"^https?://[^\s/$.?#].[^\s]*$", re.I): "uri",
+            re.compile(
+                r"^(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}" r"(?:25[0-5]|2[0-4]\d|[01]?\d\d?)$"
+            ): "ipv4",
+        }
+    }
+    @classmethod
+    @lru_cache(maxsize=512)
+    def detect(cls, value: Any, type_hint: str = "string") -> Optional[str]:
+        patterns = cls._registry.get(type_hint, {})
+        for pattern, name in patterns.items():
+            if pattern.fullmatch(str(value)):
+                return name
+        return None
+class FormatComparator(Comparator):
+    name = "format"
+    def can_process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> bool:
+        # Обрабатываем только если на текущем уровне уже есть type: "string"
+        return prev_result.get("type") == "string"
+    def process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> ComparatorResult:
+        # Базовые триггеры из предыдущих компараторов (обычно из TypeComparator)
+        base_triggers = set(prev_result.get("j2sElementTrigger", []))
+        # Собираем все возможные форматы и их источники
+        format_to_ids: dict[str | None, set[str]] = defaultdict(set)
+        format_to_ids[None].update(base_triggers)
+        # 1. Форматы, явно указанные в схемах
+        for s in ctx.schemas:
+            if isinstance(s.content, dict) and s.content.get("type") == "string":
+                fmt = s.content.get("format")
+                format_to_ids[fmt].add(s.id)
+                if fmt is not None:
+                    format_to_ids[None].discard(s.id)
+        # 2. Форматы, выведенные из значений JSON
+        for j in ctx.jsons:
+            if isinstance(j.content, str):
+                fmt = FormatDetector.detect(j.content)
+                format_to_ids[fmt].add(j.id)
+                if fmt is not None:
+                    format_to_ids[None].discard(j.id)
+        # Формируем варианты
+        variants: list[dict] = []
+        for fmt, ids in format_to_ids.items():
+            if not ids:
+                continue
+            variant = {"type": "string", "j2sElementTrigger": sorted(ids)}
+            if fmt is not None:
+                variant["format"] = fmt
+            variants.append(variant)
+        # Результат
+        if len(variants) == 1:
+            return variants[0], None
+        if len(variants) > 1:
+            return None, variants
+        # Если ничего нового не нашли — оставляем как есть
+        return None, None

genschema/comparators/no_additional_prop.py ADDED Viewed

@@ -0,0 +1,28 @@
+from typing import Any
+from .template import Comparator, ComparatorResult, ProcessingContext, ToDelete
+class NoAdditionalProperties(Comparator):
+    """
+    Компаратор, который всегда добавляет additionalProperties: false
+    ко всем объектам (type: "object"), если это поле ещё не задано.
+    Работает только на уровне объектов.
+    Не перезаписывает уже существующие значения additionalProperties.
+    """
+    name = "no_additional_properties"
+    def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
+        # Обрабатываем только те узлы, где уже определён тип object
+        # и additionalProperties ещё не задан
+        return node.get("type") == "object" and "additionalProperties" not in node
+    def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
+        """
+        Добавляет additionalProperties: false, если его ещё нет.
+        Возвращает обновление только для текущего узла.
+        """
+        updated: dict[str, ToDelete | Any | bool] = {"additionalProperties": False}
+        return updated, None

genschema/comparators/required.py ADDED Viewed

@@ -0,0 +1,38 @@
+import logging
+from .template import Comparator, ComparatorResult, ProcessingContext
+logger = logging.getLogger(__name__)
+class RequiredComparator(Comparator):
+    """
+    Компаратор для определения обязательных полей.
+    Устанавливает "required" на основе наличия ключей в JSON на текущем уровне.
+    """
+    def can_process(self, ctx: ProcessingContext, env: str, node: dict) -> bool:
+        # обрабатываем только объекты
+        return (
+            (node.get("type") == "object" and not node.get("isPseudoArray", False))
+            or node.get("type") is None
+            or not ctx.jsons
+        )
+    def process(self, ctx: ProcessingContext, env: str, node: dict) -> ComparatorResult:
+        # собираем все ключи в JSON на этом уровне
+        keys: set[str] = set()
+        for j in ctx.jsons:
+            if isinstance(j.content, dict):
+                keys.update(j.content.keys())
+        # определяем обязательные: ключи, которые есть во всех JSON
+        required = [
+            k
+            for k in sorted(keys)
+            if all(isinstance(j.content, dict) and k in j.content for j in ctx.jsons)
+        ]
+        if required:
+            return {"required": required}, None
+        return None, None

genschema/comparators/template.py ADDED Viewed

@@ -0,0 +1,35 @@
+from dataclasses import dataclass
+from typing import Any, Optional
+@dataclass
+class ToDelete:
+    content: int | float | str | list | dict = ""
+    comparator_trigger: Optional["Comparator"] = None
+@dataclass
+class Resource:
+    id: str
+    type: str
+    content: Any
+@dataclass
+class ProcessingContext:
+    schemas: list[Resource]
+    jsons: list[Resource]
+    sealed: bool = False
+ComparatorResult = tuple[Optional[dict[str, ToDelete | Any | bool]], Optional[list[dict]]]
+class Comparator:
+    name = "base"
+    def can_process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> bool:
+        return False
+    def process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> ComparatorResult:
+        return None, None

genschema/comparators/type.py ADDED Viewed

@@ -0,0 +1,75 @@
+from typing import Any
+from .template import Comparator, ComparatorResult, ProcessingContext
+def infer_json_type(v: Any) -> str:
+    if v is None:
+        return "null"
+    if isinstance(v, bool):
+        return "boolean"
+    if isinstance(v, int):
+        return "integer"
+    if isinstance(v, float):
+        return "number"
+    if isinstance(v, str):
+        return "string"
+    if isinstance(v, list):
+        return "array"
+    if isinstance(v, dict):
+        return "object"
+    return "any"
+def infer_schema_type(s: dict | str) -> None | str:
+    if not isinstance(s, dict):
+        return None
+    if "type" in s:
+        t = s["type"]
+        if isinstance(t, str):
+            return t
+    if "properties" in s:
+        return "object"
+    if "items" in s:
+        return "array"
+    return None
+class TypeComparator(Comparator):
+    name = "type"
+    def can_process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> bool:
+        return "type" not in prev_result and bool(ctx.schemas or ctx.jsons)
+    def process(self, ctx: ProcessingContext, env: str, prev_result: dict) -> ComparatorResult:
+        type_map: dict[str, set[str]] = {}
+        for s in ctx.schemas:
+            t = infer_schema_type(s.content)
+            if t:
+                type_map.setdefault(t, set()).add(s.id)
+        for j in ctx.jsons:
+            t = infer_json_type(j.content)
+            type_map.setdefault(t, set()).add(j.id)
+        # Нормализация: number поглощает integer
+        if "number" in type_map and "integer" in type_map:
+            type_map["number"].update(type_map["integer"])
+            del type_map["integer"]
+        if not type_map:
+            return None, None
+        variants: list[dict[str, Any]] = [
+            {"type": t, "j2sElementTrigger": sorted(ids)} for t, ids in type_map.items()
+        ]
+        if ctx.sealed:
+            # cannot create Of inside sealed context — choose first deterministic
+            return variants[0], None
+        if len(variants) == 1:
+            return variants[0], None
+        return None, variants

genschema/node.py ADDED Viewed

@@ -0,0 +1,18 @@
+from enum import Enum, auto
+from typing import Any
+class NodeKind(Enum):
+    SCALAR = auto()
+    OBJECT = auto()
+    ARRAY = auto()
+    UNION = auto()
+class SchemaNode:
+    def __init__(self, kind: NodeKind):
+        self.kind = kind
+        self.schema: dict[str, Any] = {}
+    def as_dict(self) -> dict[str, Any]:
+        return self.schema