schemafit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- schemafit/__init__.py +16 -0
- schemafit/__main__.py +10 -0
- schemafit/cli.py +187 -0
- schemafit/linter.py +125 -0
- schemafit/model.py +37 -0
- schemafit/py.typed +0 -0
- schemafit/repair.py +63 -0
- schemafit/report.py +48 -0
- schemafit/rules/anthropic.json +19 -0
- schemafit/rules/gemini.json +41 -0
- schemafit/rules/openai.json +43 -0
- schemafit/walk.py +120 -0
- schemafit-0.1.0.dist-info/METADATA +154 -0
- schemafit-0.1.0.dist-info/RECORD +18 -0
- schemafit-0.1.0.dist-info/WHEEL +5 -0
- schemafit-0.1.0.dist-info/entry_points.txt +2 -0
- schemafit-0.1.0.dist-info/licenses/LICENSE +21 -0
- schemafit-0.1.0.dist-info/top_level.txt +1 -0
schemafit/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""schemafit — provider-aware structured-output / JSON-Schema CI linter.
|
|
2
|
+
|
|
3
|
+
Statically lint a JSON Schema / tool definition / response_format against each
|
|
4
|
+
LLM provider's documented constraint surface (OpenAI, Anthropic, Gemini) and
|
|
5
|
+
fail CI *before* the schema 400s in production on provider X.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
__version__ = "0.1.0"
|
|
11
|
+
|
|
12
|
+
from .linter import PROVIDERS, has_errors, lint, lint_multi
|
|
13
|
+
from .model import Finding
|
|
14
|
+
from .repair import repair
|
|
15
|
+
|
|
16
|
+
__all__ = ["PROVIDERS", "Finding", "__version__", "has_errors", "lint", "lint_multi", "repair"]
|
schemafit/__main__.py
ADDED
schemafit/cli.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""schemafit command-line interface.
|
|
2
|
+
|
|
3
|
+
Commands:
|
|
4
|
+
lint <schema.json> --provider openai[,anthropic,gemini] # exit 1 on violations
|
|
5
|
+
repair <schema.json> --provider <p> [--out fixed.json]
|
|
6
|
+
providers
|
|
7
|
+
demo # hermetic proof
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
from . import __version__, report
|
|
17
|
+
from .linter import PROVIDERS, has_errors, lint, lint_multi
|
|
18
|
+
from .repair import repair
|
|
19
|
+
|
|
20
|
+
# A schema that is valid for OpenAI but deliberately trips Anthropic (rejected
|
|
21
|
+
# validation keywords) and Gemini (anyOf). Used by `demo` for a hermetic proof.
|
|
22
|
+
DEMO_BAD_SCHEMA: dict = {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"additionalProperties": False,
|
|
25
|
+
"properties": {
|
|
26
|
+
"name": {"type": "string", "minLength": 1, "maxLength": 50},
|
|
27
|
+
"email": {"type": "string", "format": "email"},
|
|
28
|
+
"age": {"type": "integer", "minimum": 0, "maximum": 120},
|
|
29
|
+
"status": {"anyOf": [{"type": "string"}, {"type": "null"}]},
|
|
30
|
+
},
|
|
31
|
+
"required": ["name", "email", "age", "status"],
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _load_schema(path: str) -> object:
|
|
36
|
+
if path == "-":
|
|
37
|
+
return json.load(sys.stdin)
|
|
38
|
+
with open(path, encoding="utf-8") as fh:
|
|
39
|
+
return json.load(fh)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _parse_providers(spec: str) -> list[str]:
|
|
43
|
+
provs = [p.strip() for p in spec.split(",") if p.strip()]
|
|
44
|
+
if not provs:
|
|
45
|
+
raise SystemExit("error: --provider requires at least one provider")
|
|
46
|
+
for p in provs:
|
|
47
|
+
if p not in PROVIDERS:
|
|
48
|
+
raise SystemExit(f"error: unknown provider {p!r} (choose from {', '.join(PROVIDERS)})")
|
|
49
|
+
return provs
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def cmd_lint(args: argparse.Namespace) -> int:
|
|
53
|
+
providers = _parse_providers(args.provider)
|
|
54
|
+
all_results: dict[str, dict] = {}
|
|
55
|
+
overall_fail = False
|
|
56
|
+
for path in args.schemas:
|
|
57
|
+
try:
|
|
58
|
+
schema = _load_schema(path)
|
|
59
|
+
results = lint_multi(schema, providers)
|
|
60
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
61
|
+
print(f"error: cannot read schema {path!r}: {exc}", file=sys.stderr)
|
|
62
|
+
return 2
|
|
63
|
+
except RecursionError:
|
|
64
|
+
print(f"error: schema {path!r} is too deeply nested to lint safely", file=sys.stderr)
|
|
65
|
+
return 2
|
|
66
|
+
all_results[path] = results
|
|
67
|
+
if args.strict:
|
|
68
|
+
failed = any(findings for findings in results.values())
|
|
69
|
+
else:
|
|
70
|
+
failed = any(has_errors(findings) for findings in results.values())
|
|
71
|
+
overall_fail = overall_fail or failed
|
|
72
|
+
if args.format != "json":
|
|
73
|
+
if len(args.schemas) > 1:
|
|
74
|
+
print(f"== {path} ==")
|
|
75
|
+
print(report.format_human(results))
|
|
76
|
+
if args.format == "json":
|
|
77
|
+
print(report.format_json_multi(all_results))
|
|
78
|
+
return 1 if overall_fail else 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def cmd_repair(args: argparse.Namespace) -> int:
|
|
82
|
+
if args.provider not in PROVIDERS:
|
|
83
|
+
raise SystemExit(f"error: unknown provider {args.provider!r}")
|
|
84
|
+
try:
|
|
85
|
+
schema = _load_schema(args.schema)
|
|
86
|
+
fixed, rep = repair(schema, args.provider)
|
|
87
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
88
|
+
print(f"error: cannot read schema {args.schema!r}: {exc}", file=sys.stderr)
|
|
89
|
+
return 2
|
|
90
|
+
except RecursionError:
|
|
91
|
+
print(
|
|
92
|
+
f"error: schema {args.schema!r} is too deeply nested to repair safely",
|
|
93
|
+
file=sys.stderr,
|
|
94
|
+
)
|
|
95
|
+
return 2
|
|
96
|
+
rendered = json.dumps(fixed, indent=2, sort_keys=True)
|
|
97
|
+
if args.out:
|
|
98
|
+
with open(args.out, "w", encoding="utf-8") as fh:
|
|
99
|
+
fh.write(rendered + "\n")
|
|
100
|
+
print(f"wrote {args.out}", file=sys.stderr)
|
|
101
|
+
else:
|
|
102
|
+
print(rendered)
|
|
103
|
+
print(
|
|
104
|
+
f"repair: auto_fixed={len(rep['auto_fixed'])} "
|
|
105
|
+
f"lossy={len(rep['lossy'])} manual_required={len(rep['manual_required'])}",
|
|
106
|
+
file=sys.stderr,
|
|
107
|
+
)
|
|
108
|
+
for tag in rep["manual_required"]:
|
|
109
|
+
print(f" MANUAL {tag}", file=sys.stderr)
|
|
110
|
+
return 0
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def cmd_providers(_args: argparse.Namespace) -> int:
|
|
114
|
+
for p in PROVIDERS:
|
|
115
|
+
print(p)
|
|
116
|
+
return 0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def cmd_demo(_args: argparse.Namespace) -> int:
|
|
120
|
+
"""Hermetic end-to-end proof of the spine: lint -> repair -> matrix."""
|
|
121
|
+
print("== schemafit demo ==")
|
|
122
|
+
before = lint(DEMO_BAD_SCHEMA, "anthropic")
|
|
123
|
+
n_before = sum(1 for f in before if f.severity == "error")
|
|
124
|
+
exit_before = 1 if has_errors(before) else 0
|
|
125
|
+
print(f"PROVIDER=anthropic INPUT=demo-bad VIOLATIONS={n_before} EXIT={exit_before}")
|
|
126
|
+
if before:
|
|
127
|
+
print(
|
|
128
|
+
f"VIOLATION_PATH={before[0].json_pointer} "
|
|
129
|
+
f"(keyword: {before[0].keyword} -> rejected by anthropic)"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
fixed, rep = repair(DEMO_BAD_SCHEMA, "anthropic")
|
|
133
|
+
after = lint(fixed, "anthropic")
|
|
134
|
+
n_after = sum(1 for f in after if f.severity == "error")
|
|
135
|
+
exit_after = 1 if has_errors(after) else 0
|
|
136
|
+
print(
|
|
137
|
+
f"--- after `schemafit repair --provider anthropic` --- "
|
|
138
|
+
f"VIOLATIONS={n_after} EXIT={exit_after} auto_fixed={len(rep['auto_fixed'])}"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
matrix = lint_multi(DEMO_BAD_SCHEMA, list(PROVIDERS))
|
|
142
|
+
rendered = " ".join(
|
|
143
|
+
f"{p}={'FAIL' if has_errors(matrix[p]) else 'PASS'}" for p in PROVIDERS
|
|
144
|
+
)
|
|
145
|
+
print(f"MULTI: {rendered}")
|
|
146
|
+
gem_warns = sum(1 for f in matrix["gemini"] if f.severity == "warning")
|
|
147
|
+
print(f"NOTE: gemini portability warnings={gem_warns} (version-sensitive, non-failing)")
|
|
148
|
+
|
|
149
|
+
ok = has_errors(before) and not has_errors(after)
|
|
150
|
+
print("PROOF_OK" if ok else "PROOF_FAILED")
|
|
151
|
+
return 0 if ok else 3
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
155
|
+
parser = argparse.ArgumentParser(
|
|
156
|
+
prog="schemafit",
|
|
157
|
+
description="Provider-aware structured-output / JSON-Schema CI linter.",
|
|
158
|
+
)
|
|
159
|
+
parser.add_argument("--version", action="version", version=f"schemafit {__version__}")
|
|
160
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
161
|
+
|
|
162
|
+
p_lint = sub.add_parser("lint", help="lint one or more schemas against one or more providers")
|
|
163
|
+
p_lint.add_argument("schemas", nargs="+", help="schema JSON file(s) ('-' = stdin)")
|
|
164
|
+
p_lint.add_argument("--provider", required=True, help="comma list: openai,anthropic,gemini")
|
|
165
|
+
p_lint.add_argument("--format", choices=("human", "json"), default="human")
|
|
166
|
+
p_lint.add_argument("--strict", action="store_true", help="also fail (exit 1) on warnings")
|
|
167
|
+
p_lint.set_defaults(func=cmd_lint)
|
|
168
|
+
|
|
169
|
+
p_rep = sub.add_parser("repair", help="emit a best-effort provider-valid variant")
|
|
170
|
+
p_rep.add_argument("schema", help="path to a JSON schema file, or '-' for stdin")
|
|
171
|
+
p_rep.add_argument("--provider", required=True, help="one of: openai|anthropic|gemini")
|
|
172
|
+
p_rep.add_argument("--out", help="write fixed schema here (default: stdout)")
|
|
173
|
+
p_rep.set_defaults(func=cmd_repair)
|
|
174
|
+
|
|
175
|
+
p_prov = sub.add_parser("providers", help="list supported providers")
|
|
176
|
+
p_prov.set_defaults(func=cmd_providers)
|
|
177
|
+
|
|
178
|
+
p_demo = sub.add_parser("demo", help="run a hermetic end-to-end proof")
|
|
179
|
+
p_demo.set_defaults(func=cmd_demo)
|
|
180
|
+
|
|
181
|
+
return parser
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def main(argv: list[str] | None = None) -> int:
|
|
185
|
+
parser = build_parser()
|
|
186
|
+
args = parser.parse_args(argv)
|
|
187
|
+
return args.func(args)
|
schemafit/linter.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""The lint engine: apply a provider's declarative rule pack to a schema."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from importlib.resources import files
|
|
7
|
+
|
|
8
|
+
from .model import Finding
|
|
9
|
+
from .walk import walk
|
|
10
|
+
|
|
11
|
+
PROVIDERS: tuple[str, ...] = ("openai", "anthropic", "gemini")
|
|
12
|
+
|
|
13
|
+
_RULE_CACHE: dict[str, dict] = {}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_rule_pack(provider: str) -> dict:
|
|
17
|
+
"""Load a provider's JSON rule pack from packaged data (cached)."""
|
|
18
|
+
if provider not in PROVIDERS:
|
|
19
|
+
raise ValueError(f"unknown provider: {provider!r} (choose from {', '.join(PROVIDERS)})")
|
|
20
|
+
if provider not in _RULE_CACHE:
|
|
21
|
+
text = files("schemafit").joinpath("rules", f"{provider}.json").read_text(encoding="utf-8")
|
|
22
|
+
_RULE_CACHE[provider] = json.loads(text)
|
|
23
|
+
return _RULE_CACHE[provider]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _is_object_schema(node: dict) -> bool:
|
|
27
|
+
t = node.get("type")
|
|
28
|
+
if t == "object":
|
|
29
|
+
return True
|
|
30
|
+
if isinstance(t, list) and "object" in t:
|
|
31
|
+
return True
|
|
32
|
+
# Untyped schema: a 'properties' key implies an object. But if an explicit
|
|
33
|
+
# non-object type is declared, a stray 'properties' key does NOT make it one
|
|
34
|
+
# (e.g. {"type": "array", "properties": {...}} is an array, not an object).
|
|
35
|
+
if t is None:
|
|
36
|
+
return "properties" in node
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _join(pointer: str, token: str) -> str:
|
|
41
|
+
return f"{pointer}/{token}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def apply_rule(
|
|
45
|
+
rule: dict, provider: str, nodes: list[tuple[str, dict, frozenset[str]]]
|
|
46
|
+
) -> list[Finding]:
|
|
47
|
+
"""Apply one rule across every walked node, returning findings."""
|
|
48
|
+
kind = rule["kind"]
|
|
49
|
+
out: list[Finding] = []
|
|
50
|
+
|
|
51
|
+
def mk(node_ptr: str, json_ptr: str, keyword: str) -> Finding:
|
|
52
|
+
return Finding(
|
|
53
|
+
provider=provider,
|
|
54
|
+
rule_id=rule["id"],
|
|
55
|
+
kind=kind,
|
|
56
|
+
node_pointer=node_ptr,
|
|
57
|
+
json_pointer=json_ptr,
|
|
58
|
+
keyword=keyword,
|
|
59
|
+
reason=rule.get("reason", ""),
|
|
60
|
+
severity=rule.get("severity", "error"),
|
|
61
|
+
doc_url=rule.get("doc_url", ""),
|
|
62
|
+
auto_repair=rule.get("auto_repair", "manual"),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
if kind == "forbidden_keyword":
|
|
66
|
+
kw = rule["keyword"]
|
|
67
|
+
for ptr, node, _ctx in nodes:
|
|
68
|
+
if kw in node:
|
|
69
|
+
out.append(mk(ptr, _join(ptr, kw), kw))
|
|
70
|
+
|
|
71
|
+
elif kind == "forbidden_keyword_in_context":
|
|
72
|
+
kw = rule["keyword"]
|
|
73
|
+
need = rule["context"]
|
|
74
|
+
for ptr, node, ctx in nodes:
|
|
75
|
+
if need in ctx and kw in node:
|
|
76
|
+
out.append(mk(ptr, _join(ptr, kw), kw))
|
|
77
|
+
|
|
78
|
+
elif kind == "object_requires":
|
|
79
|
+
kw = rule["keyword"]
|
|
80
|
+
val = rule["value"]
|
|
81
|
+
for ptr, node, _ctx in nodes:
|
|
82
|
+
if _is_object_schema(node) and node.get(kw) != val:
|
|
83
|
+
# If the keyword is absent, point at the offending object node
|
|
84
|
+
# (a "<ptr>/<kw>" pointer would not resolve in the source).
|
|
85
|
+
json_ptr = _join(ptr, kw) if kw in node else ptr
|
|
86
|
+
out.append(mk(ptr, json_ptr, kw))
|
|
87
|
+
|
|
88
|
+
elif kind == "object_all_properties_required":
|
|
89
|
+
for ptr, node, _ctx in nodes:
|
|
90
|
+
props = node.get("properties")
|
|
91
|
+
if _is_object_schema(node) and isinstance(props, dict):
|
|
92
|
+
required = set(node.get("required", []) or [])
|
|
93
|
+
for name in props:
|
|
94
|
+
if name not in required:
|
|
95
|
+
# Point at the property that should be required (resolves).
|
|
96
|
+
out.append(mk(ptr, _join(_join(ptr, "properties"), name), name))
|
|
97
|
+
|
|
98
|
+
elif kind == "forbidden_additional_properties_schema":
|
|
99
|
+
for ptr, node, _ctx in nodes:
|
|
100
|
+
if isinstance(node.get("additionalProperties"), dict):
|
|
101
|
+
out.append(mk(ptr, _join(ptr, "additionalProperties"), "additionalProperties"))
|
|
102
|
+
|
|
103
|
+
else: # defensive: unknown rule kind in a pack
|
|
104
|
+
raise ValueError(f"unknown rule kind: {kind!r} (rule {rule.get('id')!r})")
|
|
105
|
+
|
|
106
|
+
return out
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def lint(schema: object, provider: str) -> list[Finding]:
|
|
110
|
+
"""Lint ``schema`` against one provider; return sorted findings."""
|
|
111
|
+
pack = load_rule_pack(provider)
|
|
112
|
+
nodes = list(walk(schema))
|
|
113
|
+
findings: list[Finding] = []
|
|
114
|
+
for rule in pack.get("rules", []):
|
|
115
|
+
findings.extend(apply_rule(rule, provider, nodes))
|
|
116
|
+
findings.sort(key=lambda f: (f.json_pointer, f.rule_id, f.keyword))
|
|
117
|
+
return findings
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def lint_multi(schema: object, providers: list[str]) -> dict[str, list[Finding]]:
|
|
121
|
+
return {p: lint(schema, p) for p in providers}
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def has_errors(findings: list[Finding]) -> bool:
|
|
125
|
+
return any(f.severity == "error" for f in findings)
|
schemafit/model.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Data model for lint findings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import asdict, dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class Finding:
|
|
11
|
+
"""A single provider-compatibility violation in a schema.
|
|
12
|
+
|
|
13
|
+
Attributes:
|
|
14
|
+
provider: the provider the rule belongs to (openai|anthropic|gemini).
|
|
15
|
+
rule_id: stable identifier of the rule that fired.
|
|
16
|
+
kind: the rule kind (drives auto-repair dispatch).
|
|
17
|
+
node_pointer: JSON Pointer to the subschema the rule applies to.
|
|
18
|
+
json_pointer: JSON Pointer to the precise offending location (keyword).
|
|
19
|
+
keyword: the JSON-Schema keyword / property name involved.
|
|
20
|
+
reason: human-readable explanation.
|
|
21
|
+
severity: "error" (fails CI) or "warning".
|
|
22
|
+
doc_url: primary-source link documenting the constraint.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
provider: str
|
|
26
|
+
rule_id: str
|
|
27
|
+
kind: str
|
|
28
|
+
node_pointer: str
|
|
29
|
+
json_pointer: str
|
|
30
|
+
keyword: str
|
|
31
|
+
reason: str
|
|
32
|
+
severity: str = "error"
|
|
33
|
+
doc_url: str = ""
|
|
34
|
+
auto_repair: str = "manual" # strip | set_false | fill_required | manual
|
|
35
|
+
|
|
36
|
+
def to_dict(self) -> dict[str, Any]:
|
|
37
|
+
return asdict(self)
|
schemafit/py.typed
ADDED
|
File without changes
|
schemafit/repair.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Best-effort, conservative auto-repair toward a provider-valid schema.
|
|
2
|
+
|
|
3
|
+
Repair is deliberately limited to transforms that are *safe* or whose lossiness
|
|
4
|
+
is reported. It never silently changes meaning without flagging it. Transforms
|
|
5
|
+
it cannot perform safely are returned as ``manual_required`` rather than guessed.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import copy
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from .linter import lint
|
|
14
|
+
from .walk import resolve_pointer
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def repair(schema: object, provider: str) -> tuple[Any, dict]:
|
|
18
|
+
"""Return ``(fixed_schema, report)``.
|
|
19
|
+
|
|
20
|
+
report = {auto_fixed: [...], lossy: [...], manual_required: [...]}
|
|
21
|
+
Each entry is a short ``"<rule_id> @ <pointer>"`` string.
|
|
22
|
+
"""
|
|
23
|
+
fixed = copy.deepcopy(schema)
|
|
24
|
+
report: dict[str, list[str]] = {"auto_fixed": [], "lossy": [], "manual_required": []}
|
|
25
|
+
|
|
26
|
+
# Re-lint the working copy; apply fixes by each rule's declared strategy.
|
|
27
|
+
for finding in lint(fixed, provider):
|
|
28
|
+
if finding.severity != "error":
|
|
29
|
+
continue # warnings don't fail CI; leave them for the author
|
|
30
|
+
node = resolve_pointer(fixed, finding.node_pointer)
|
|
31
|
+
if not isinstance(node, dict):
|
|
32
|
+
# The node was removed/overwritten by an earlier fix in this pass —
|
|
33
|
+
# already handled; do not emit a dangling tag.
|
|
34
|
+
continue
|
|
35
|
+
tag = f"{finding.rule_id} @ {finding.json_pointer}"
|
|
36
|
+
|
|
37
|
+
strategy = finding.auto_repair
|
|
38
|
+
if strategy == "strip":
|
|
39
|
+
# Dropping a validation constraint is provider-valid but lossy.
|
|
40
|
+
if finding.keyword in node:
|
|
41
|
+
del node[finding.keyword]
|
|
42
|
+
report["auto_fixed"].append(tag)
|
|
43
|
+
report["lossy"].append(tag)
|
|
44
|
+
elif strategy == "set_false":
|
|
45
|
+
# Overwriting a subschema (open-map) with False discards a constraint.
|
|
46
|
+
if isinstance(node.get(finding.keyword), dict):
|
|
47
|
+
report["lossy"].append(tag)
|
|
48
|
+
node[finding.keyword] = False
|
|
49
|
+
report["auto_fixed"].append(tag)
|
|
50
|
+
elif strategy == "fill_required":
|
|
51
|
+
props = node.get("properties")
|
|
52
|
+
if isinstance(props, dict):
|
|
53
|
+
node["required"] = sorted(set(node.get("required", []) or []) | set(props))
|
|
54
|
+
report["auto_fixed"].append(tag)
|
|
55
|
+
report["lossy"].append(tag) # optional fields made required
|
|
56
|
+
else:
|
|
57
|
+
report["manual_required"].append(tag)
|
|
58
|
+
else:
|
|
59
|
+
# "manual": structural rewrites (anyOf/oneOf removal, dict->object)
|
|
60
|
+
# are unsafe to guess — surface for a human.
|
|
61
|
+
report["manual_required"].append(tag)
|
|
62
|
+
|
|
63
|
+
return fixed, report
|
schemafit/report.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Human and JSON reporters for lint results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
from .model import Finding
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _status(findings: list[Finding]) -> str:
|
|
11
|
+
return "FAIL" if any(f.severity == "error" for f in findings) else "PASS"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def format_human(results: dict[str, list[Finding]]) -> str:
|
|
15
|
+
lines: list[str] = []
|
|
16
|
+
for provider, findings in results.items():
|
|
17
|
+
errs = [f for f in findings if f.severity == "error"]
|
|
18
|
+
warns = [f for f in findings if f.severity != "error"]
|
|
19
|
+
lines.append(
|
|
20
|
+
f"[{provider}] {_status(findings)} — {len(errs)} error(s), {len(warns)} warning(s)"
|
|
21
|
+
)
|
|
22
|
+
for f in findings:
|
|
23
|
+
lines.append(f" {f.severity.upper():<7} {f.json_pointer} ({f.rule_id})")
|
|
24
|
+
lines.append(f" {f.reason}")
|
|
25
|
+
if f.doc_url:
|
|
26
|
+
lines.append(f" ref: {f.doc_url}")
|
|
27
|
+
if not lines:
|
|
28
|
+
lines.append("(no providers selected)")
|
|
29
|
+
return "\n".join(lines)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _results_payload(results: dict[str, list[Finding]]) -> dict:
|
|
33
|
+
return {
|
|
34
|
+
provider: {
|
|
35
|
+
"status": _status(findings),
|
|
36
|
+
"findings": [f.to_dict() for f in findings],
|
|
37
|
+
}
|
|
38
|
+
for provider, findings in results.items()
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def format_json(results: dict[str, list[Finding]]) -> str:
|
|
43
|
+
return json.dumps(_results_payload(results), indent=2, sort_keys=True)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def format_json_multi(all_results: dict[str, dict[str, list[Finding]]]) -> str:
|
|
47
|
+
payload = {path: _results_payload(results) for path, results in all_results.items()}
|
|
48
|
+
return json.dumps(payload, indent=2, sort_keys=True)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"provider": "anthropic",
|
|
3
|
+
"doc": "JSON-Schema validation keywords Anthropic rejects on the STRICT structured-output path (400 Bad Request), keyword set confirmed verbatim in vercel/ai#13355. SCOPE: these target the structured-output / response_format surface (where the AI SDK had to strip them to avoid a 400); general Messages-API tool input_schema is more permissive and may accept (ignore) some of these. Run this pack against schemas you send on the structured-output path.",
|
|
4
|
+
"rules": [
|
|
5
|
+
{"id": "anthropic-no-minLength", "kind": "forbidden_keyword", "keyword": "minLength", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'minLength' validation keyword in structured-output/tool schemas (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
6
|
+
{"id": "anthropic-no-maxLength", "kind": "forbidden_keyword", "keyword": "maxLength", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'maxLength' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
7
|
+
{"id": "anthropic-no-pattern", "kind": "forbidden_keyword", "keyword": "pattern", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'pattern' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
8
|
+
{"id": "anthropic-no-format", "kind": "forbidden_keyword", "keyword": "format", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'format' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
9
|
+
{"id": "anthropic-no-minimum", "kind": "forbidden_keyword", "keyword": "minimum", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'minimum' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
10
|
+
{"id": "anthropic-no-maximum", "kind": "forbidden_keyword", "keyword": "maximum", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'maximum' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
11
|
+
{"id": "anthropic-no-exclusiveMinimum", "kind": "forbidden_keyword", "keyword": "exclusiveMinimum", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'exclusiveMinimum' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
12
|
+
{"id": "anthropic-no-exclusiveMaximum", "kind": "forbidden_keyword", "keyword": "exclusiveMaximum", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'exclusiveMaximum' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
13
|
+
{"id": "anthropic-no-minItems", "kind": "forbidden_keyword", "keyword": "minItems", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'minItems' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
14
|
+
{"id": "anthropic-no-maxItems", "kind": "forbidden_keyword", "keyword": "maxItems", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'maxItems' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
15
|
+
{"id": "anthropic-no-uniqueItems", "kind": "forbidden_keyword", "keyword": "uniqueItems", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'uniqueItems' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
16
|
+
{"id": "anthropic-no-minProperties", "kind": "forbidden_keyword", "keyword": "minProperties", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'minProperties' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
|
|
17
|
+
{"id": "anthropic-no-maxProperties", "kind": "forbidden_keyword", "keyword": "maxProperties", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'maxProperties' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"}
|
|
18
|
+
]
|
|
19
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"provider": "gemini",
|
|
3
|
+
"doc": "Gemini response_schema portability cautions. NOTE: Gemini's structured-output schema support has evolved fast — Gemini 2.5 added anyOf (Jan 2026) and the API added additionalProperties (Nov 2025); older models (<=2.0) and older python-genai SDK versions rejected them. These are therefore emitted as WARNINGS (version-sensitive portability cautions), not hard errors — use --strict to fail CI on them.",
|
|
4
|
+
"rules": [
|
|
5
|
+
{
|
|
6
|
+
"id": "gemini-anyof-version-sensitive",
|
|
7
|
+
"kind": "forbidden_keyword",
|
|
8
|
+
"keyword": "anyOf",
|
|
9
|
+
"severity": "warning",
|
|
10
|
+
"auto_repair": "manual",
|
|
11
|
+
"reason": "'anyOf' is supported by current Gemini 2.5 (Jan 2026) but raises 'AnyOf is not supported' on older Gemini models / older python-genai. Portability caution for older targets.",
|
|
12
|
+
"doc_url": "https://github.com/googleapis/python-genai/issues/460"
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"id": "gemini-oneof-unsupported",
|
|
16
|
+
"kind": "forbidden_keyword",
|
|
17
|
+
"keyword": "oneOf",
|
|
18
|
+
"severity": "warning",
|
|
19
|
+
"auto_repair": "manual",
|
|
20
|
+
"reason": "Gemini's response schema models alternatives via 'anyOf', not 'oneOf'; 'oneOf' is not a documented response_schema keyword. Prefer anyOf.",
|
|
21
|
+
"doc_url": "https://ai.google.dev/gemini-api/docs/structured-output"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"id": "gemini-open-dict-version-sensitive",
|
|
25
|
+
"kind": "forbidden_additional_properties_schema",
|
|
26
|
+
"severity": "warning",
|
|
27
|
+
"auto_repair": "manual",
|
|
28
|
+
"reason": "Open-ended dict/map types (additionalProperties as a schema): the Gemini API added additionalProperties support (Nov 2025), but older python-genai rejects it client-side. Portability caution for older SDK versions.",
|
|
29
|
+
"doc_url": "https://github.com/googleapis/python-genai/issues/1815"
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"id": "gemini-ref-recursion-risk",
|
|
33
|
+
"kind": "forbidden_keyword",
|
|
34
|
+
"keyword": "$ref",
|
|
35
|
+
"severity": "warning",
|
|
36
|
+
"auto_repair": "manual",
|
|
37
|
+
"reason": "Gemini may fail on '$ref' / self-referencing schemas (maximum recursion errors); inline or flatten the reference.",
|
|
38
|
+
"doc_url": "https://github.com/googleapis/python-genai/issues/460"
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"provider": "openai",
|
|
3
|
+
"doc": "OpenAI strict structured-outputs / response_format constraints",
|
|
4
|
+
"rules": [
|
|
5
|
+
{
|
|
6
|
+
"id": "openai-additional-properties-false",
|
|
7
|
+
"kind": "object_requires",
|
|
8
|
+
"keyword": "additionalProperties",
|
|
9
|
+
"value": false,
|
|
10
|
+
"severity": "error",
|
|
11
|
+
"auto_repair": "set_false",
|
|
12
|
+
"reason": "OpenAI strict structured outputs require every object schema to set additionalProperties:false.",
|
|
13
|
+
"doc_url": "https://platform.openai.com/docs/guides/structured-outputs"
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"id": "openai-all-properties-required",
|
|
17
|
+
"kind": "object_all_properties_required",
|
|
18
|
+
"severity": "error",
|
|
19
|
+
"auto_repair": "fill_required",
|
|
20
|
+
"reason": "OpenAI strict structured outputs require every defined property to appear in 'required' (model optionality via a nullable union instead).",
|
|
21
|
+
"doc_url": "https://platform.openai.com/docs/guides/structured-outputs"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"id": "openai-no-default",
|
|
25
|
+
"kind": "forbidden_keyword",
|
|
26
|
+
"keyword": "default",
|
|
27
|
+
"severity": "error",
|
|
28
|
+
"auto_repair": "strip",
|
|
29
|
+
"reason": "OpenAI structured outputs reject 'default' within a property definition.",
|
|
30
|
+
"doc_url": "https://github.com/openai/openai-agents-python/issues/474"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"id": "openai-no-oneof-in-array-items",
|
|
34
|
+
"kind": "forbidden_keyword_in_context",
|
|
35
|
+
"keyword": "oneOf",
|
|
36
|
+
"context": "array_items",
|
|
37
|
+
"severity": "error",
|
|
38
|
+
"auto_repair": "manual",
|
|
39
|
+
"reason": "OpenAI rejects 'oneOf' within the items definition of an array property (use anyOf).",
|
|
40
|
+
"doc_url": "https://github.com/openai/openai-agents-python/issues/474"
|
|
41
|
+
}
|
|
42
|
+
]
|
|
43
|
+
}
|
schemafit/walk.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Deterministic JSON-Schema traversal with JSON-Pointer tracking.
|
|
2
|
+
|
|
3
|
+
Yields every *object-form* subschema node, with a precise JSON Pointer and an
|
|
4
|
+
*edge context* (how this node relates to its parent, e.g. it is an array's
|
|
5
|
+
``items`` schema). The context describes only the immediate parent edge and is
|
|
6
|
+
intentionally NOT inherited, so a rule like "oneOf is forbidden inside array
|
|
7
|
+
items" matches the items schema itself and not arbitrarily deep descendants.
|
|
8
|
+
|
|
9
|
+
Boolean subschemas (the JSON-Schema ``true``/``false`` forms) are not yielded:
|
|
10
|
+
they carry no keywords for the rule engine to inspect. Provider rules that care
|
|
11
|
+
about the open-map form check the parent node's keyword value directly.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from collections.abc import Iterator
|
|
17
|
+
|
|
18
|
+
# subschema maps: {name -> schema}
|
|
19
|
+
_CHILD_MAP_KEYS = ("properties", "patternProperties", "$defs", "definitions", "dependentSchemas")
|
|
20
|
+
# single subschema values
|
|
21
|
+
_CHILD_SCHEMA_KEYS = (
|
|
22
|
+
"items",
|
|
23
|
+
"additionalItems",
|
|
24
|
+
"additionalProperties",
|
|
25
|
+
"unevaluatedItems",
|
|
26
|
+
"unevaluatedProperties",
|
|
27
|
+
"contains",
|
|
28
|
+
"contentSchema",
|
|
29
|
+
"propertyNames",
|
|
30
|
+
"not",
|
|
31
|
+
"if",
|
|
32
|
+
"then",
|
|
33
|
+
"else",
|
|
34
|
+
)
|
|
35
|
+
# lists of subschemas
|
|
36
|
+
_CHILD_LIST_KEYS = ("allOf", "anyOf", "oneOf", "prefixItems")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _escape(token: str) -> str:
|
|
40
|
+
"""Escape a JSON Pointer reference token (RFC 6901)."""
|
|
41
|
+
return token.replace("~", "~0").replace("/", "~1")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _unescape(token: str) -> str:
|
|
45
|
+
return token.replace("~1", "/").replace("~0", "~")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def walk(
|
|
49
|
+
schema: object,
|
|
50
|
+
pointer: str = "#",
|
|
51
|
+
context: frozenset[str] = frozenset(),
|
|
52
|
+
) -> Iterator[tuple[str, dict, frozenset[str]]]:
|
|
53
|
+
"""Yield ``(json_pointer, node, edge_context)`` for every subschema dict."""
|
|
54
|
+
if not isinstance(schema, dict):
|
|
55
|
+
return
|
|
56
|
+
yield pointer, schema, context
|
|
57
|
+
|
|
58
|
+
for key in _CHILD_MAP_KEYS:
|
|
59
|
+
sub = schema.get(key)
|
|
60
|
+
if isinstance(sub, dict):
|
|
61
|
+
child_ctx = frozenset({"property"}) if key == "properties" else frozenset()
|
|
62
|
+
for name, child in sub.items():
|
|
63
|
+
yield from walk(child, f"{pointer}/{key}/{_escape(str(name))}", child_ctx)
|
|
64
|
+
|
|
65
|
+
for key in _CHILD_SCHEMA_KEYS:
|
|
66
|
+
sub = schema.get(key)
|
|
67
|
+
if isinstance(sub, dict):
|
|
68
|
+
if key in ("items", "additionalItems", "unevaluatedItems"):
|
|
69
|
+
child_ctx = frozenset({"array_items"})
|
|
70
|
+
elif key in ("additionalProperties", "unevaluatedProperties"):
|
|
71
|
+
child_ctx = frozenset({"additional_properties"})
|
|
72
|
+
else:
|
|
73
|
+
child_ctx = frozenset()
|
|
74
|
+
yield from walk(sub, f"{pointer}/{key}", child_ctx)
|
|
75
|
+
elif key == "items" and isinstance(sub, list):
|
|
76
|
+
# tuple validation: items as an array of schemas
|
|
77
|
+
for i, child in enumerate(sub):
|
|
78
|
+
yield from walk(child, f"{pointer}/items/{i}", frozenset({"array_items"}))
|
|
79
|
+
|
|
80
|
+
for key in _CHILD_LIST_KEYS:
|
|
81
|
+
sub = schema.get(key)
|
|
82
|
+
if isinstance(sub, list):
|
|
83
|
+
for i, child in enumerate(sub):
|
|
84
|
+
child_ctx = (
|
|
85
|
+
frozenset({"array_items"})
|
|
86
|
+
if key == "prefixItems"
|
|
87
|
+
else frozenset({f"{key}_member"})
|
|
88
|
+
)
|
|
89
|
+
yield from walk(child, f"{pointer}/{key}/{i}", child_ctx)
|
|
90
|
+
|
|
91
|
+
# draft-07 "dependencies": map of name -> (schema | list[str]). Only dict
|
|
92
|
+
# values are subschemas; array values are property-name lists, so skip them.
|
|
93
|
+
deps = schema.get("dependencies")
|
|
94
|
+
if isinstance(deps, dict):
|
|
95
|
+
for name, child in deps.items():
|
|
96
|
+
if isinstance(child, dict):
|
|
97
|
+
yield from walk(child, f"{pointer}/dependencies/{_escape(str(name))}", frozenset())
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def resolve_pointer(root: object, pointer: str) -> object | None:
|
|
101
|
+
"""Resolve a JSON Pointer (``#`` / ``#/a/b/0``) to its node, or None."""
|
|
102
|
+
if pointer in ("#", ""):
|
|
103
|
+
return root
|
|
104
|
+
if not pointer.startswith("#/"):
|
|
105
|
+
return None
|
|
106
|
+
node: object = root
|
|
107
|
+
for raw in pointer[2:].split("/"):
|
|
108
|
+
token = _unescape(raw)
|
|
109
|
+
if isinstance(node, list):
|
|
110
|
+
try:
|
|
111
|
+
node = node[int(token)]
|
|
112
|
+
except (ValueError, IndexError):
|
|
113
|
+
return None
|
|
114
|
+
elif isinstance(node, dict):
|
|
115
|
+
if token not in node:
|
|
116
|
+
return None
|
|
117
|
+
node = node[token]
|
|
118
|
+
else:
|
|
119
|
+
return None
|
|
120
|
+
return node
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: schemafit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Provider-aware structured-output / JSON-Schema CI linter — fail CI before your schema 400s on OpenAI, Anthropic, or Gemini
|
|
5
|
+
Author-email: Dan Mercede <dan@danmercede.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/OrionArchitekton/schemafit
|
|
8
|
+
Project-URL: Issues, https://github.com/OrionArchitekton/schemafit/issues
|
|
9
|
+
Keywords: json-schema,structured-output,openai,anthropic,gemini,llm,ci,linter,tool-calling
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
22
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# schemafit
|
|
26
|
+
|
|
27
|
+
**Provider-aware structured-output / JSON-Schema CI linter.** Catch the schema
|
|
28
|
+
incompatibilities that make one provider `400` while another succeeds — *before*
|
|
29
|
+
they hit production, as a fast, offline CI check.
|
|
30
|
+
|
|
31
|
+
A JSON Schema / tool definition / `response_format` that works on OpenAI can
|
|
32
|
+
`400` on Anthropic or Gemini (and vice-versa): nested `oneOf`, a missing
|
|
33
|
+
`additionalProperties: false`, a `default` in a property, Anthropic-rejected
|
|
34
|
+
validation keywords (`minLength`, `format`, `pattern`, …), Gemini's lack of
|
|
35
|
+
`anyOf`/dict support. The API tells you it failed but not *which constraint*
|
|
36
|
+
violated it, so teams hand-port schemas and debug by trial-and-error at runtime.
|
|
37
|
+
|
|
38
|
+
`schemafit` encodes each provider's documented constraint surface as a
|
|
39
|
+
**versioned, declarative rule pack** and lints your schema statically — pointing
|
|
40
|
+
at the exact JSON-Pointer path, the keyword, and why — with a non-zero exit code
|
|
41
|
+
so CI fails the PR instead of prod.
|
|
42
|
+
|
|
43
|
+
> Every rule is grounded in a real, cited provider issue (see
|
|
44
|
+
> [`schemafit/rules/`](schemafit/rules/)). It is **not** a runtime client: it
|
|
45
|
+
> makes no model calls, needs no API key, and has **zero runtime dependencies**.
|
|
46
|
+
|
|
47
|
+
## Why this and not Instructor / BAML / LiteLLM / Vercel AI SDK?
|
|
48
|
+
|
|
49
|
+
Those are excellent **runtime** clients — they normalize, repair, or constrain a
|
|
50
|
+
schema *at call-time*. `schemafit` fills the gap they leave: a **static,
|
|
51
|
+
pre-ship CI lint** that fails the build before the schema ever reaches a
|
|
52
|
+
provider, over the raw schemas you already ship, with no DSL or codegen buy-in.
|
|
53
|
+
|
|
54
|
+
## Install
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# From source (works today):
|
|
58
|
+
pip install "git+https://github.com/OrionArchitekton/schemafit"
|
|
59
|
+
# or build and run the container:
|
|
60
|
+
docker build -t schemafit . && docker run --rm schemafit demo
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Once the first release is tagged (`v0.1.0`), `pip install schemafit` (PyPI) and
|
|
64
|
+
`docker run --rm ghcr.io/orionarchitekton/schemafit demo` (GHCR) become
|
|
65
|
+
available — both are published by the release workflow on a `v*` tag (PyPI via
|
|
66
|
+
Trusted Publishing; image to GHCR).
|
|
67
|
+
|
|
68
|
+
## Usage
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
# Lint one schema against several providers (exit 1 if any error):
|
|
72
|
+
schemafit lint my-schema.json --provider openai,anthropic,gemini
|
|
73
|
+
|
|
74
|
+
# Machine-readable output for CI annotations:
|
|
75
|
+
schemafit lint my-schema.json --provider anthropic --format json
|
|
76
|
+
|
|
77
|
+
# Also fail on warnings (e.g. Gemini $ref recursion risk):
|
|
78
|
+
schemafit lint my-schema.json --provider gemini --strict
|
|
79
|
+
|
|
80
|
+
# Emit a best-effort provider-valid variant (lossy transforms are flagged):
|
|
81
|
+
schemafit repair my-schema.json --provider anthropic --out fixed.json
|
|
82
|
+
|
|
83
|
+
# List supported providers / run a hermetic end-to-end proof:
|
|
84
|
+
schemafit providers
|
|
85
|
+
schemafit demo
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
$ schemafit lint order.json --provider anthropic
|
|
92
|
+
[anthropic] FAIL — 2 error(s), 0 warning(s)
|
|
93
|
+
ERROR #/properties/sku/pattern (anthropic-no-pattern)
|
|
94
|
+
Anthropic rejects the 'pattern' validation keyword (400 Bad Request).
|
|
95
|
+
ref: https://github.com/vercel/ai/issues/13355
|
|
96
|
+
ERROR #/properties/qty/minimum (anthropic-no-minimum)
|
|
97
|
+
Anthropic rejects the 'minimum' validation keyword (400 Bad Request).
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Use in CI
|
|
101
|
+
|
|
102
|
+
GitHub Actions (this repo ships a composite action):
|
|
103
|
+
|
|
104
|
+
```yaml
|
|
105
|
+
- uses: OrionArchitekton/schemafit@v0.1.0
|
|
106
|
+
with:
|
|
107
|
+
schema: schemas/tool.json
|
|
108
|
+
providers: openai,anthropic,gemini
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Or directly / as a pre-commit hook (`.pre-commit-hooks.yaml` is included):
|
|
112
|
+
|
|
113
|
+
```yaml
|
|
114
|
+
- repo: https://github.com/OrionArchitekton/schemafit
|
|
115
|
+
rev: v0.1.0
|
|
116
|
+
hooks:
|
|
117
|
+
- id: schemafit
|
|
118
|
+
args: ["--provider", "openai,anthropic,gemini"]
|
|
119
|
+
files: '^schemas/.*\.json$' # scope to YOUR LLM schemas, not every .json
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
> Scope the hook with `files:` to the directory holding your LLM schemas — the
|
|
123
|
+
> default `types: [json]` would otherwise lint every JSON file in the repo
|
|
124
|
+
> (`package.json`, `tsconfig.json`, lockfiles), which are not LLM schemas.
|
|
125
|
+
|
|
126
|
+
## Supported providers (v0.1)
|
|
127
|
+
|
|
128
|
+
| Provider | Checks (grounded in) |
|
|
129
|
+
|---|---|
|
|
130
|
+
| `openai` | `additionalProperties:false` required; all properties required; no `default`; no `oneOf` in array items ([openai-agents-python#474](https://github.com/openai/openai-agents-python/issues/474), [claude-task-master#1522](https://github.com/eyaltoledano/claude-task-master/issues/1522)) |
|
|
131
|
+
| `anthropic` | 13 rejected validation keywords on the **strict structured-output surface**: `minLength`/`maxLength`/`pattern`/`format`/`minimum`/`maximum`/`exclusiveMinimum`/`exclusiveMaximum`/`minItems`/`maxItems`/`uniqueItems`/`minProperties`/`maxProperties` ([vercel/ai#13355](https://github.com/vercel/ai/issues/13355), [anthropic-sdk-python#1034](https://github.com/anthropics/anthropic-sdk-python/issues/1034)). General Messages-API tool `input_schema` is more permissive — run this pack against schemas you send on the structured-output path. |
|
|
132
|
+
| `gemini` | **Portability warnings** (version-sensitive, non-failing by default): `anyOf` (rejected by ≤2.0 / old SDKs, supported by 2.5), `oneOf`, open dict (`additionalProperties` schema), `$ref` recursion. Gemini's schema support changed fast (`anyOf` Jan 2026, `additionalProperties` Nov 2025), so these *warn* — use `--strict` to gate on them. ([python-genai#460](https://github.com/googleapis/python-genai/issues/460), [docs](https://ai.google.dev/gemini-api/docs/structured-output)) |
|
|
133
|
+
|
|
134
|
+
## Exit codes
|
|
135
|
+
|
|
136
|
+
| code | meaning |
|
|
137
|
+
|---|---|
|
|
138
|
+
| `0` | no errors (warnings allowed unless `--strict`) |
|
|
139
|
+
| `1` | at least one error (CI fail) |
|
|
140
|
+
| `2` | bad input (unreadable / invalid JSON) |
|
|
141
|
+
|
|
142
|
+
## Scope (v0.1) and roadmap
|
|
143
|
+
|
|
144
|
+
In scope now: the `lint` + `repair` core, three provider rule packs, JSON/human
|
|
145
|
+
reporters, Docker image, GitHub Action, pre-commit hook.
|
|
146
|
+
|
|
147
|
+
Deferred (v0.2+): a `--live-verify` mode that calls each provider to confirm,
|
|
148
|
+
an npm/`ajv` port for the JS/TS ecosystem, more providers (Mistral, Cohere,
|
|
149
|
+
Bedrock, Vertex), automatic rule-pack drift detection, SARIF output, and
|
|
150
|
+
source-model (Pydantic/Zod) auto-fix.
|
|
151
|
+
|
|
152
|
+
## License
|
|
153
|
+
|
|
154
|
+
MIT © 2026 Dan Mercede
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
schemafit/__init__.py,sha256=OKm75l--MWFyLsnccCqeksa-8ovPurvAvIKOdQmyKrA,569
|
|
2
|
+
schemafit/__main__.py,sha256=ueatLyYgr_5e0HWnJz5NnQtr1XYm4Trjt-ReXzeimF4,158
|
|
3
|
+
schemafit/cli.py,sha256=h6V1VRKScL7zENWbFiWQhqJotCrnpYjSPLGiVHHVrtc,7086
|
|
4
|
+
schemafit/linter.py,sha256=0H2nlRlOvYqkOidmafIfVQu107S5o1Hxs0qnwiMSoi8,4564
|
|
5
|
+
schemafit/model.py,sha256=3PWvObnayPFd8IQCpHVIQOfIVcvhb-1rZPB8H2AZaWA,1167
|
|
6
|
+
schemafit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
schemafit/repair.py,sha256=weTja-zDlfG7F_Ed8WEb6IYLDEKV1dn9i3S5tnT3z2s,2659
|
|
8
|
+
schemafit/report.py,sha256=eJxoEjv74MpwWtGj8mVk88UV6U6nwdb7sbKr5bEq8ew,1613
|
|
9
|
+
schemafit/walk.py,sha256=Zx6PsC6TMhLTywBvwQxTkOOYfXxPJ_CfpuR-_8m9p4A,4382
|
|
10
|
+
schemafit/rules/anthropic.json,sha256=UgnTjxJSBWMrJfQjzm1LfbwalpPA4_r_Rgdv52DwSGE,4178
|
|
11
|
+
schemafit/rules/gemini.json,sha256=2dqB29YpwmJq35KWGtW9gzGMxkOeof8d32K0RnezPVY,2181
|
|
12
|
+
schemafit/rules/openai.json,sha256=tnrzYKFE0SwuTBWepMUOnYZ4IqUZyhWIEk2r2I5LMAo,1676
|
|
13
|
+
schemafit-0.1.0.dist-info/licenses/LICENSE,sha256=aEsJcRjMZ6XR41a3faplwrgpqBMl1z3fwBQO624QpW8,1068
|
|
14
|
+
schemafit-0.1.0.dist-info/METADATA,sha256=C2MpqKUXbZ6IFA-4UQwThw6iuV0862eXjryOuxqWn6g,7011
|
|
15
|
+
schemafit-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
16
|
+
schemafit-0.1.0.dist-info/entry_points.txt,sha256=WAUOkQAkdGIkE6lZs5q4xymnkJFDdZJVXNTxXUbnDkI,49
|
|
17
|
+
schemafit-0.1.0.dist-info/top_level.txt,sha256=zvvzqzOkWVw8KVruRkvqYC9IUKoXAN7PGiAKlVgVq3w,10
|
|
18
|
+
schemafit-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dan Mercede
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
schemafit
|