schemafit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
schemafit/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """schemafit — provider-aware structured-output / JSON-Schema CI linter.
2
+
3
+ Statically lint a JSON Schema / tool definition / response_format against each
4
+ LLM provider's documented constraint surface (OpenAI, Anthropic, Gemini) and
5
+ fail CI *before* the schema 400s in production on provider X.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ __version__ = "0.1.0"
11
+
12
+ from .linter import PROVIDERS, has_errors, lint, lint_multi
13
+ from .model import Finding
14
+ from .repair import repair
15
+
16
+ __all__ = ["PROVIDERS", "Finding", "__version__", "has_errors", "lint", "lint_multi", "repair"]
schemafit/__main__.py ADDED
@@ -0,0 +1,10 @@
1
+ """Enable ``python -m schemafit``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+ from .cli import main
8
+
9
+ if __name__ == "__main__":
10
+ sys.exit(main())
schemafit/cli.py ADDED
@@ -0,0 +1,187 @@
1
+ """schemafit command-line interface.
2
+
3
+ Commands:
4
+ lint <schema.json> --provider openai[,anthropic,gemini] # exit 1 on violations
5
+ repair <schema.json> --provider <p> [--out fixed.json]
6
+ providers
7
+ demo # hermetic proof
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import json
14
+ import sys
15
+
16
+ from . import __version__, report
17
+ from .linter import PROVIDERS, has_errors, lint, lint_multi
18
+ from .repair import repair
19
+
20
+ # A schema that is valid for OpenAI but deliberately trips Anthropic (rejected
21
+ # validation keywords) and Gemini (anyOf). Used by `demo` for a hermetic proof.
22
+ DEMO_BAD_SCHEMA: dict = {
23
+ "type": "object",
24
+ "additionalProperties": False,
25
+ "properties": {
26
+ "name": {"type": "string", "minLength": 1, "maxLength": 50},
27
+ "email": {"type": "string", "format": "email"},
28
+ "age": {"type": "integer", "minimum": 0, "maximum": 120},
29
+ "status": {"anyOf": [{"type": "string"}, {"type": "null"}]},
30
+ },
31
+ "required": ["name", "email", "age", "status"],
32
+ }
33
+
34
+
35
+ def _load_schema(path: str) -> object:
36
+ if path == "-":
37
+ return json.load(sys.stdin)
38
+ with open(path, encoding="utf-8") as fh:
39
+ return json.load(fh)
40
+
41
+
42
+ def _parse_providers(spec: str) -> list[str]:
43
+ provs = [p.strip() for p in spec.split(",") if p.strip()]
44
+ if not provs:
45
+ raise SystemExit("error: --provider requires at least one provider")
46
+ for p in provs:
47
+ if p not in PROVIDERS:
48
+ raise SystemExit(f"error: unknown provider {p!r} (choose from {', '.join(PROVIDERS)})")
49
+ return provs
50
+
51
+
52
+ def cmd_lint(args: argparse.Namespace) -> int:
53
+ providers = _parse_providers(args.provider)
54
+ all_results: dict[str, dict] = {}
55
+ overall_fail = False
56
+ for path in args.schemas:
57
+ try:
58
+ schema = _load_schema(path)
59
+ results = lint_multi(schema, providers)
60
+ except (OSError, json.JSONDecodeError) as exc:
61
+ print(f"error: cannot read schema {path!r}: {exc}", file=sys.stderr)
62
+ return 2
63
+ except RecursionError:
64
+ print(f"error: schema {path!r} is too deeply nested to lint safely", file=sys.stderr)
65
+ return 2
66
+ all_results[path] = results
67
+ if args.strict:
68
+ failed = any(findings for findings in results.values())
69
+ else:
70
+ failed = any(has_errors(findings) for findings in results.values())
71
+ overall_fail = overall_fail or failed
72
+ if args.format != "json":
73
+ if len(args.schemas) > 1:
74
+ print(f"== {path} ==")
75
+ print(report.format_human(results))
76
+ if args.format == "json":
77
+ print(report.format_json_multi(all_results))
78
+ return 1 if overall_fail else 0
79
+
80
+
81
+ def cmd_repair(args: argparse.Namespace) -> int:
82
+ if args.provider not in PROVIDERS:
83
+ raise SystemExit(f"error: unknown provider {args.provider!r}")
84
+ try:
85
+ schema = _load_schema(args.schema)
86
+ fixed, rep = repair(schema, args.provider)
87
+ except (OSError, json.JSONDecodeError) as exc:
88
+ print(f"error: cannot read schema {args.schema!r}: {exc}", file=sys.stderr)
89
+ return 2
90
+ except RecursionError:
91
+ print(
92
+ f"error: schema {args.schema!r} is too deeply nested to repair safely",
93
+ file=sys.stderr,
94
+ )
95
+ return 2
96
+ rendered = json.dumps(fixed, indent=2, sort_keys=True)
97
+ if args.out:
98
+ with open(args.out, "w", encoding="utf-8") as fh:
99
+ fh.write(rendered + "\n")
100
+ print(f"wrote {args.out}", file=sys.stderr)
101
+ else:
102
+ print(rendered)
103
+ print(
104
+ f"repair: auto_fixed={len(rep['auto_fixed'])} "
105
+ f"lossy={len(rep['lossy'])} manual_required={len(rep['manual_required'])}",
106
+ file=sys.stderr,
107
+ )
108
+ for tag in rep["manual_required"]:
109
+ print(f" MANUAL {tag}", file=sys.stderr)
110
+ return 0
111
+
112
+
113
+ def cmd_providers(_args: argparse.Namespace) -> int:
114
+ for p in PROVIDERS:
115
+ print(p)
116
+ return 0
117
+
118
+
119
+ def cmd_demo(_args: argparse.Namespace) -> int:
120
+ """Hermetic end-to-end proof of the spine: lint -> repair -> matrix."""
121
+ print("== schemafit demo ==")
122
+ before = lint(DEMO_BAD_SCHEMA, "anthropic")
123
+ n_before = sum(1 for f in before if f.severity == "error")
124
+ exit_before = 1 if has_errors(before) else 0
125
+ print(f"PROVIDER=anthropic INPUT=demo-bad VIOLATIONS={n_before} EXIT={exit_before}")
126
+ if before:
127
+ print(
128
+ f"VIOLATION_PATH={before[0].json_pointer} "
129
+ f"(keyword: {before[0].keyword} -> rejected by anthropic)"
130
+ )
131
+
132
+ fixed, rep = repair(DEMO_BAD_SCHEMA, "anthropic")
133
+ after = lint(fixed, "anthropic")
134
+ n_after = sum(1 for f in after if f.severity == "error")
135
+ exit_after = 1 if has_errors(after) else 0
136
+ print(
137
+ f"--- after `schemafit repair --provider anthropic` --- "
138
+ f"VIOLATIONS={n_after} EXIT={exit_after} auto_fixed={len(rep['auto_fixed'])}"
139
+ )
140
+
141
+ matrix = lint_multi(DEMO_BAD_SCHEMA, list(PROVIDERS))
142
+ rendered = " ".join(
143
+ f"{p}={'FAIL' if has_errors(matrix[p]) else 'PASS'}" for p in PROVIDERS
144
+ )
145
+ print(f"MULTI: {rendered}")
146
+ gem_warns = sum(1 for f in matrix["gemini"] if f.severity == "warning")
147
+ print(f"NOTE: gemini portability warnings={gem_warns} (version-sensitive, non-failing)")
148
+
149
+ ok = has_errors(before) and not has_errors(after)
150
+ print("PROOF_OK" if ok else "PROOF_FAILED")
151
+ return 0 if ok else 3
152
+
153
+
154
+ def build_parser() -> argparse.ArgumentParser:
155
+ parser = argparse.ArgumentParser(
156
+ prog="schemafit",
157
+ description="Provider-aware structured-output / JSON-Schema CI linter.",
158
+ )
159
+ parser.add_argument("--version", action="version", version=f"schemafit {__version__}")
160
+ sub = parser.add_subparsers(dest="command", required=True)
161
+
162
+ p_lint = sub.add_parser("lint", help="lint one or more schemas against one or more providers")
163
+ p_lint.add_argument("schemas", nargs="+", help="schema JSON file(s) ('-' = stdin)")
164
+ p_lint.add_argument("--provider", required=True, help="comma list: openai,anthropic,gemini")
165
+ p_lint.add_argument("--format", choices=("human", "json"), default="human")
166
+ p_lint.add_argument("--strict", action="store_true", help="also fail (exit 1) on warnings")
167
+ p_lint.set_defaults(func=cmd_lint)
168
+
169
+ p_rep = sub.add_parser("repair", help="emit a best-effort provider-valid variant")
170
+ p_rep.add_argument("schema", help="path to a JSON schema file, or '-' for stdin")
171
+ p_rep.add_argument("--provider", required=True, help="one of: openai|anthropic|gemini")
172
+ p_rep.add_argument("--out", help="write fixed schema here (default: stdout)")
173
+ p_rep.set_defaults(func=cmd_repair)
174
+
175
+ p_prov = sub.add_parser("providers", help="list supported providers")
176
+ p_prov.set_defaults(func=cmd_providers)
177
+
178
+ p_demo = sub.add_parser("demo", help="run a hermetic end-to-end proof")
179
+ p_demo.set_defaults(func=cmd_demo)
180
+
181
+ return parser
182
+
183
+
184
+ def main(argv: list[str] | None = None) -> int:
185
+ parser = build_parser()
186
+ args = parser.parse_args(argv)
187
+ return args.func(args)
schemafit/linter.py ADDED
@@ -0,0 +1,125 @@
1
+ """The lint engine: apply a provider's declarative rule pack to a schema."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from importlib.resources import files
7
+
8
+ from .model import Finding
9
+ from .walk import walk
10
+
11
+ PROVIDERS: tuple[str, ...] = ("openai", "anthropic", "gemini")
12
+
13
+ _RULE_CACHE: dict[str, dict] = {}
14
+
15
+
16
+ def load_rule_pack(provider: str) -> dict:
17
+ """Load a provider's JSON rule pack from packaged data (cached)."""
18
+ if provider not in PROVIDERS:
19
+ raise ValueError(f"unknown provider: {provider!r} (choose from {', '.join(PROVIDERS)})")
20
+ if provider not in _RULE_CACHE:
21
+ text = files("schemafit").joinpath("rules", f"{provider}.json").read_text(encoding="utf-8")
22
+ _RULE_CACHE[provider] = json.loads(text)
23
+ return _RULE_CACHE[provider]
24
+
25
+
26
+ def _is_object_schema(node: dict) -> bool:
27
+ t = node.get("type")
28
+ if t == "object":
29
+ return True
30
+ if isinstance(t, list) and "object" in t:
31
+ return True
32
+ # Untyped schema: a 'properties' key implies an object. But if an explicit
33
+ # non-object type is declared, a stray 'properties' key does NOT make it one
34
+ # (e.g. {"type": "array", "properties": {...}} is an array, not an object).
35
+ if t is None:
36
+ return "properties" in node
37
+ return False
38
+
39
+
40
+ def _join(pointer: str, token: str) -> str:
41
+ return f"{pointer}/{token}"
42
+
43
+
44
+ def apply_rule(
45
+ rule: dict, provider: str, nodes: list[tuple[str, dict, frozenset[str]]]
46
+ ) -> list[Finding]:
47
+ """Apply one rule across every walked node, returning findings."""
48
+ kind = rule["kind"]
49
+ out: list[Finding] = []
50
+
51
+ def mk(node_ptr: str, json_ptr: str, keyword: str) -> Finding:
52
+ return Finding(
53
+ provider=provider,
54
+ rule_id=rule["id"],
55
+ kind=kind,
56
+ node_pointer=node_ptr,
57
+ json_pointer=json_ptr,
58
+ keyword=keyword,
59
+ reason=rule.get("reason", ""),
60
+ severity=rule.get("severity", "error"),
61
+ doc_url=rule.get("doc_url", ""),
62
+ auto_repair=rule.get("auto_repair", "manual"),
63
+ )
64
+
65
+ if kind == "forbidden_keyword":
66
+ kw = rule["keyword"]
67
+ for ptr, node, _ctx in nodes:
68
+ if kw in node:
69
+ out.append(mk(ptr, _join(ptr, kw), kw))
70
+
71
+ elif kind == "forbidden_keyword_in_context":
72
+ kw = rule["keyword"]
73
+ need = rule["context"]
74
+ for ptr, node, ctx in nodes:
75
+ if need in ctx and kw in node:
76
+ out.append(mk(ptr, _join(ptr, kw), kw))
77
+
78
+ elif kind == "object_requires":
79
+ kw = rule["keyword"]
80
+ val = rule["value"]
81
+ for ptr, node, _ctx in nodes:
82
+ if _is_object_schema(node) and node.get(kw) != val:
83
+ # If the keyword is absent, point at the offending object node
84
+ # (a "<ptr>/<kw>" pointer would not resolve in the source).
85
+ json_ptr = _join(ptr, kw) if kw in node else ptr
86
+ out.append(mk(ptr, json_ptr, kw))
87
+
88
+ elif kind == "object_all_properties_required":
89
+ for ptr, node, _ctx in nodes:
90
+ props = node.get("properties")
91
+ if _is_object_schema(node) and isinstance(props, dict):
92
+ required = set(node.get("required", []) or [])
93
+ for name in props:
94
+ if name not in required:
95
+ # Point at the property that should be required (resolves).
96
+ out.append(mk(ptr, _join(_join(ptr, "properties"), name), name))
97
+
98
+ elif kind == "forbidden_additional_properties_schema":
99
+ for ptr, node, _ctx in nodes:
100
+ if isinstance(node.get("additionalProperties"), dict):
101
+ out.append(mk(ptr, _join(ptr, "additionalProperties"), "additionalProperties"))
102
+
103
+ else: # defensive: unknown rule kind in a pack
104
+ raise ValueError(f"unknown rule kind: {kind!r} (rule {rule.get('id')!r})")
105
+
106
+ return out
107
+
108
+
109
+ def lint(schema: object, provider: str) -> list[Finding]:
110
+ """Lint ``schema`` against one provider; return sorted findings."""
111
+ pack = load_rule_pack(provider)
112
+ nodes = list(walk(schema))
113
+ findings: list[Finding] = []
114
+ for rule in pack.get("rules", []):
115
+ findings.extend(apply_rule(rule, provider, nodes))
116
+ findings.sort(key=lambda f: (f.json_pointer, f.rule_id, f.keyword))
117
+ return findings
118
+
119
+
120
+ def lint_multi(schema: object, providers: list[str]) -> dict[str, list[Finding]]:
121
+ return {p: lint(schema, p) for p in providers}
122
+
123
+
124
+ def has_errors(findings: list[Finding]) -> bool:
125
+ return any(f.severity == "error" for f in findings)
schemafit/model.py ADDED
@@ -0,0 +1,37 @@
1
+ """Data model for lint findings."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import asdict, dataclass
6
+ from typing import Any
7
+
8
+
9
+ @dataclass(frozen=True)
10
+ class Finding:
11
+ """A single provider-compatibility violation in a schema.
12
+
13
+ Attributes:
14
+ provider: the provider the rule belongs to (openai|anthropic|gemini).
15
+ rule_id: stable identifier of the rule that fired.
16
+ kind: the rule kind (drives auto-repair dispatch).
17
+ node_pointer: JSON Pointer to the subschema the rule applies to.
18
+ json_pointer: JSON Pointer to the precise offending location (keyword).
19
+ keyword: the JSON-Schema keyword / property name involved.
20
+ reason: human-readable explanation.
21
+ severity: "error" (fails CI) or "warning".
22
+ doc_url: primary-source link documenting the constraint.
23
+ """
24
+
25
+ provider: str
26
+ rule_id: str
27
+ kind: str
28
+ node_pointer: str
29
+ json_pointer: str
30
+ keyword: str
31
+ reason: str
32
+ severity: str = "error"
33
+ doc_url: str = ""
34
+ auto_repair: str = "manual" # strip | set_false | fill_required | manual
35
+
36
+ def to_dict(self) -> dict[str, Any]:
37
+ return asdict(self)
schemafit/py.typed ADDED
File without changes
schemafit/repair.py ADDED
@@ -0,0 +1,63 @@
1
+ """Best-effort, conservative auto-repair toward a provider-valid schema.
2
+
3
+ Repair is deliberately limited to transforms that are *safe* or whose lossiness
4
+ is reported. It never silently changes meaning without flagging it. Transforms
5
+ it cannot perform safely are returned as ``manual_required`` rather than guessed.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import copy
11
+ from typing import Any
12
+
13
+ from .linter import lint
14
+ from .walk import resolve_pointer
15
+
16
+
17
+ def repair(schema: object, provider: str) -> tuple[Any, dict]:
18
+ """Return ``(fixed_schema, report)``.
19
+
20
+ report = {auto_fixed: [...], lossy: [...], manual_required: [...]}
21
+ Each entry is a short ``"<rule_id> @ <pointer>"`` string.
22
+ """
23
+ fixed = copy.deepcopy(schema)
24
+ report: dict[str, list[str]] = {"auto_fixed": [], "lossy": [], "manual_required": []}
25
+
26
+ # Re-lint the working copy; apply fixes by each rule's declared strategy.
27
+ for finding in lint(fixed, provider):
28
+ if finding.severity != "error":
29
+ continue # warnings don't fail CI; leave them for the author
30
+ node = resolve_pointer(fixed, finding.node_pointer)
31
+ if not isinstance(node, dict):
32
+ # The node was removed/overwritten by an earlier fix in this pass —
33
+ # already handled; do not emit a dangling tag.
34
+ continue
35
+ tag = f"{finding.rule_id} @ {finding.json_pointer}"
36
+
37
+ strategy = finding.auto_repair
38
+ if strategy == "strip":
39
+ # Dropping a validation constraint is provider-valid but lossy.
40
+ if finding.keyword in node:
41
+ del node[finding.keyword]
42
+ report["auto_fixed"].append(tag)
43
+ report["lossy"].append(tag)
44
+ elif strategy == "set_false":
45
+ # Overwriting a subschema (open-map) with False discards a constraint.
46
+ if isinstance(node.get(finding.keyword), dict):
47
+ report["lossy"].append(tag)
48
+ node[finding.keyword] = False
49
+ report["auto_fixed"].append(tag)
50
+ elif strategy == "fill_required":
51
+ props = node.get("properties")
52
+ if isinstance(props, dict):
53
+ node["required"] = sorted(set(node.get("required", []) or []) | set(props))
54
+ report["auto_fixed"].append(tag)
55
+ report["lossy"].append(tag) # optional fields made required
56
+ else:
57
+ report["manual_required"].append(tag)
58
+ else:
59
+ # "manual": structural rewrites (anyOf/oneOf removal, dict->object)
60
+ # are unsafe to guess — surface for a human.
61
+ report["manual_required"].append(tag)
62
+
63
+ return fixed, report
schemafit/report.py ADDED
@@ -0,0 +1,48 @@
1
+ """Human and JSON reporters for lint results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+ from .model import Finding
8
+
9
+
10
+ def _status(findings: list[Finding]) -> str:
11
+ return "FAIL" if any(f.severity == "error" for f in findings) else "PASS"
12
+
13
+
14
+ def format_human(results: dict[str, list[Finding]]) -> str:
15
+ lines: list[str] = []
16
+ for provider, findings in results.items():
17
+ errs = [f for f in findings if f.severity == "error"]
18
+ warns = [f for f in findings if f.severity != "error"]
19
+ lines.append(
20
+ f"[{provider}] {_status(findings)} — {len(errs)} error(s), {len(warns)} warning(s)"
21
+ )
22
+ for f in findings:
23
+ lines.append(f" {f.severity.upper():<7} {f.json_pointer} ({f.rule_id})")
24
+ lines.append(f" {f.reason}")
25
+ if f.doc_url:
26
+ lines.append(f" ref: {f.doc_url}")
27
+ if not lines:
28
+ lines.append("(no providers selected)")
29
+ return "\n".join(lines)
30
+
31
+
32
+ def _results_payload(results: dict[str, list[Finding]]) -> dict:
33
+ return {
34
+ provider: {
35
+ "status": _status(findings),
36
+ "findings": [f.to_dict() for f in findings],
37
+ }
38
+ for provider, findings in results.items()
39
+ }
40
+
41
+
42
+ def format_json(results: dict[str, list[Finding]]) -> str:
43
+ return json.dumps(_results_payload(results), indent=2, sort_keys=True)
44
+
45
+
46
+ def format_json_multi(all_results: dict[str, dict[str, list[Finding]]]) -> str:
47
+ payload = {path: _results_payload(results) for path, results in all_results.items()}
48
+ return json.dumps(payload, indent=2, sort_keys=True)
@@ -0,0 +1,19 @@
1
+ {
2
+ "provider": "anthropic",
3
+ "doc": "JSON-Schema validation keywords Anthropic rejects on the STRICT structured-output path (400 Bad Request), keyword set confirmed verbatim in vercel/ai#13355. SCOPE: these target the structured-output / response_format surface (where the AI SDK had to strip them to avoid a 400); general Messages-API tool input_schema is more permissive and may accept (ignore) some of these. Run this pack against schemas you send on the structured-output path.",
4
+ "rules": [
5
+ {"id": "anthropic-no-minLength", "kind": "forbidden_keyword", "keyword": "minLength", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'minLength' validation keyword in structured-output/tool schemas (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
6
+ {"id": "anthropic-no-maxLength", "kind": "forbidden_keyword", "keyword": "maxLength", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'maxLength' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
7
+ {"id": "anthropic-no-pattern", "kind": "forbidden_keyword", "keyword": "pattern", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'pattern' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
8
+ {"id": "anthropic-no-format", "kind": "forbidden_keyword", "keyword": "format", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'format' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
9
+ {"id": "anthropic-no-minimum", "kind": "forbidden_keyword", "keyword": "minimum", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'minimum' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
10
+ {"id": "anthropic-no-maximum", "kind": "forbidden_keyword", "keyword": "maximum", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'maximum' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
11
+ {"id": "anthropic-no-exclusiveMinimum", "kind": "forbidden_keyword", "keyword": "exclusiveMinimum", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'exclusiveMinimum' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
12
+ {"id": "anthropic-no-exclusiveMaximum", "kind": "forbidden_keyword", "keyword": "exclusiveMaximum", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'exclusiveMaximum' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
13
+ {"id": "anthropic-no-minItems", "kind": "forbidden_keyword", "keyword": "minItems", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'minItems' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
14
+ {"id": "anthropic-no-maxItems", "kind": "forbidden_keyword", "keyword": "maxItems", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'maxItems' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
15
+ {"id": "anthropic-no-uniqueItems", "kind": "forbidden_keyword", "keyword": "uniqueItems", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'uniqueItems' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
16
+ {"id": "anthropic-no-minProperties", "kind": "forbidden_keyword", "keyword": "minProperties", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'minProperties' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"},
17
+ {"id": "anthropic-no-maxProperties", "kind": "forbidden_keyword", "keyword": "maxProperties", "severity": "error", "auto_repair": "strip", "reason": "Anthropic rejects the 'maxProperties' validation keyword (400 Bad Request).", "doc_url": "https://github.com/vercel/ai/issues/13355"}
18
+ ]
19
+ }
@@ -0,0 +1,41 @@
1
+ {
2
+ "provider": "gemini",
3
+ "doc": "Gemini response_schema portability cautions. NOTE: Gemini's structured-output schema support has evolved fast — Gemini 2.5 added anyOf (Jan 2026) and the API added additionalProperties (Nov 2025); older models (<=2.0) and older python-genai SDK versions rejected them. These are therefore emitted as WARNINGS (version-sensitive portability cautions), not hard errors — use --strict to fail CI on them.",
4
+ "rules": [
5
+ {
6
+ "id": "gemini-anyof-version-sensitive",
7
+ "kind": "forbidden_keyword",
8
+ "keyword": "anyOf",
9
+ "severity": "warning",
10
+ "auto_repair": "manual",
11
+ "reason": "'anyOf' is supported by current Gemini 2.5 (Jan 2026) but raises 'AnyOf is not supported' on older Gemini models / older python-genai. Portability caution for older targets.",
12
+ "doc_url": "https://github.com/googleapis/python-genai/issues/460"
13
+ },
14
+ {
15
+ "id": "gemini-oneof-unsupported",
16
+ "kind": "forbidden_keyword",
17
+ "keyword": "oneOf",
18
+ "severity": "warning",
19
+ "auto_repair": "manual",
20
+ "reason": "Gemini's response schema models alternatives via 'anyOf', not 'oneOf'; 'oneOf' is not a documented response_schema keyword. Prefer anyOf.",
21
+ "doc_url": "https://ai.google.dev/gemini-api/docs/structured-output"
22
+ },
23
+ {
24
+ "id": "gemini-open-dict-version-sensitive",
25
+ "kind": "forbidden_additional_properties_schema",
26
+ "severity": "warning",
27
+ "auto_repair": "manual",
28
+ "reason": "Open-ended dict/map types (additionalProperties as a schema): the Gemini API added additionalProperties support (Nov 2025), but older python-genai rejects it client-side. Portability caution for older SDK versions.",
29
+ "doc_url": "https://github.com/googleapis/python-genai/issues/1815"
30
+ },
31
+ {
32
+ "id": "gemini-ref-recursion-risk",
33
+ "kind": "forbidden_keyword",
34
+ "keyword": "$ref",
35
+ "severity": "warning",
36
+ "auto_repair": "manual",
37
+ "reason": "Gemini may fail on '$ref' / self-referencing schemas (maximum recursion errors); inline or flatten the reference.",
38
+ "doc_url": "https://github.com/googleapis/python-genai/issues/460"
39
+ }
40
+ ]
41
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "provider": "openai",
3
+ "doc": "OpenAI strict structured-outputs / response_format constraints",
4
+ "rules": [
5
+ {
6
+ "id": "openai-additional-properties-false",
7
+ "kind": "object_requires",
8
+ "keyword": "additionalProperties",
9
+ "value": false,
10
+ "severity": "error",
11
+ "auto_repair": "set_false",
12
+ "reason": "OpenAI strict structured outputs require every object schema to set additionalProperties:false.",
13
+ "doc_url": "https://platform.openai.com/docs/guides/structured-outputs"
14
+ },
15
+ {
16
+ "id": "openai-all-properties-required",
17
+ "kind": "object_all_properties_required",
18
+ "severity": "error",
19
+ "auto_repair": "fill_required",
20
+ "reason": "OpenAI strict structured outputs require every defined property to appear in 'required' (model optionality via a nullable union instead).",
21
+ "doc_url": "https://platform.openai.com/docs/guides/structured-outputs"
22
+ },
23
+ {
24
+ "id": "openai-no-default",
25
+ "kind": "forbidden_keyword",
26
+ "keyword": "default",
27
+ "severity": "error",
28
+ "auto_repair": "strip",
29
+ "reason": "OpenAI structured outputs reject 'default' within a property definition.",
30
+ "doc_url": "https://github.com/openai/openai-agents-python/issues/474"
31
+ },
32
+ {
33
+ "id": "openai-no-oneof-in-array-items",
34
+ "kind": "forbidden_keyword_in_context",
35
+ "keyword": "oneOf",
36
+ "context": "array_items",
37
+ "severity": "error",
38
+ "auto_repair": "manual",
39
+ "reason": "OpenAI rejects 'oneOf' within the items definition of an array property (use anyOf).",
40
+ "doc_url": "https://github.com/openai/openai-agents-python/issues/474"
41
+ }
42
+ ]
43
+ }
schemafit/walk.py ADDED
@@ -0,0 +1,120 @@
1
+ """Deterministic JSON-Schema traversal with JSON-Pointer tracking.
2
+
3
+ Yields every *object-form* subschema node, with a precise JSON Pointer and an
4
+ *edge context* (how this node relates to its parent, e.g. it is an array's
5
+ ``items`` schema). The context describes only the immediate parent edge and is
6
+ intentionally NOT inherited, so a rule like "oneOf is forbidden inside array
7
+ items" matches the items schema itself and not arbitrarily deep descendants.
8
+
9
+ Boolean subschemas (the JSON-Schema ``true``/``false`` forms) are not yielded:
10
+ they carry no keywords for the rule engine to inspect. Provider rules that care
11
+ about the open-map form check the parent node's keyword value directly.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from collections.abc import Iterator
17
+
18
+ # subschema maps: {name -> schema}
19
+ _CHILD_MAP_KEYS = ("properties", "patternProperties", "$defs", "definitions", "dependentSchemas")
20
+ # single subschema values
21
+ _CHILD_SCHEMA_KEYS = (
22
+ "items",
23
+ "additionalItems",
24
+ "additionalProperties",
25
+ "unevaluatedItems",
26
+ "unevaluatedProperties",
27
+ "contains",
28
+ "contentSchema",
29
+ "propertyNames",
30
+ "not",
31
+ "if",
32
+ "then",
33
+ "else",
34
+ )
35
+ # lists of subschemas
36
+ _CHILD_LIST_KEYS = ("allOf", "anyOf", "oneOf", "prefixItems")
37
+
38
+
39
+ def _escape(token: str) -> str:
40
+ """Escape a JSON Pointer reference token (RFC 6901)."""
41
+ return token.replace("~", "~0").replace("/", "~1")
42
+
43
+
44
+ def _unescape(token: str) -> str:
45
+ return token.replace("~1", "/").replace("~0", "~")
46
+
47
+
48
+ def walk(
49
+ schema: object,
50
+ pointer: str = "#",
51
+ context: frozenset[str] = frozenset(),
52
+ ) -> Iterator[tuple[str, dict, frozenset[str]]]:
53
+ """Yield ``(json_pointer, node, edge_context)`` for every subschema dict."""
54
+ if not isinstance(schema, dict):
55
+ return
56
+ yield pointer, schema, context
57
+
58
+ for key in _CHILD_MAP_KEYS:
59
+ sub = schema.get(key)
60
+ if isinstance(sub, dict):
61
+ child_ctx = frozenset({"property"}) if key == "properties" else frozenset()
62
+ for name, child in sub.items():
63
+ yield from walk(child, f"{pointer}/{key}/{_escape(str(name))}", child_ctx)
64
+
65
+ for key in _CHILD_SCHEMA_KEYS:
66
+ sub = schema.get(key)
67
+ if isinstance(sub, dict):
68
+ if key in ("items", "additionalItems", "unevaluatedItems"):
69
+ child_ctx = frozenset({"array_items"})
70
+ elif key in ("additionalProperties", "unevaluatedProperties"):
71
+ child_ctx = frozenset({"additional_properties"})
72
+ else:
73
+ child_ctx = frozenset()
74
+ yield from walk(sub, f"{pointer}/{key}", child_ctx)
75
+ elif key == "items" and isinstance(sub, list):
76
+ # tuple validation: items as an array of schemas
77
+ for i, child in enumerate(sub):
78
+ yield from walk(child, f"{pointer}/items/{i}", frozenset({"array_items"}))
79
+
80
+ for key in _CHILD_LIST_KEYS:
81
+ sub = schema.get(key)
82
+ if isinstance(sub, list):
83
+ for i, child in enumerate(sub):
84
+ child_ctx = (
85
+ frozenset({"array_items"})
86
+ if key == "prefixItems"
87
+ else frozenset({f"{key}_member"})
88
+ )
89
+ yield from walk(child, f"{pointer}/{key}/{i}", child_ctx)
90
+
91
+ # draft-07 "dependencies": map of name -> (schema | list[str]). Only dict
92
+ # values are subschemas; array values are property-name lists, so skip them.
93
+ deps = schema.get("dependencies")
94
+ if isinstance(deps, dict):
95
+ for name, child in deps.items():
96
+ if isinstance(child, dict):
97
+ yield from walk(child, f"{pointer}/dependencies/{_escape(str(name))}", frozenset())
98
+
99
+
100
+ def resolve_pointer(root: object, pointer: str) -> object | None:
101
+ """Resolve a JSON Pointer (``#`` / ``#/a/b/0``) to its node, or None."""
102
+ if pointer in ("#", ""):
103
+ return root
104
+ if not pointer.startswith("#/"):
105
+ return None
106
+ node: object = root
107
+ for raw in pointer[2:].split("/"):
108
+ token = _unescape(raw)
109
+ if isinstance(node, list):
110
+ try:
111
+ node = node[int(token)]
112
+ except (ValueError, IndexError):
113
+ return None
114
+ elif isinstance(node, dict):
115
+ if token not in node:
116
+ return None
117
+ node = node[token]
118
+ else:
119
+ return None
120
+ return node
@@ -0,0 +1,154 @@
1
+ Metadata-Version: 2.4
2
+ Name: schemafit
3
+ Version: 0.1.0
4
+ Summary: Provider-aware structured-output / JSON-Schema CI linter — fail CI before your schema 400s on OpenAI, Anthropic, or Gemini
5
+ Author-email: Dan Mercede <dan@danmercede.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/OrionArchitekton/schemafit
8
+ Project-URL: Issues, https://github.com/OrionArchitekton/schemafit/issues
9
+ Keywords: json-schema,structured-output,openai,anthropic,gemini,llm,ci,linter,tool-calling
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Software Development :: Quality Assurance
17
+ Requires-Python: >=3.11
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=8.0; extra == "dev"
22
+ Requires-Dist: ruff>=0.6; extra == "dev"
23
+ Dynamic: license-file
24
+
25
+ # schemafit
26
+
27
+ **Provider-aware structured-output / JSON-Schema CI linter.** Catch the schema
28
+ incompatibilities that make one provider `400` while another succeeds — *before*
29
+ they hit production, as a fast, offline CI check.
30
+
31
+ A JSON Schema / tool definition / `response_format` that works on OpenAI can
32
+ `400` on Anthropic or Gemini (and vice-versa): nested `oneOf`, a missing
33
+ `additionalProperties: false`, a `default` in a property, Anthropic-rejected
34
+ validation keywords (`minLength`, `format`, `pattern`, …), Gemini's lack of
35
+ `anyOf`/dict support. The API tells you it failed but not *which constraint*
36
+ violated it, so teams hand-port schemas and debug by trial-and-error at runtime.
37
+
38
+ `schemafit` encodes each provider's documented constraint surface as a
39
+ **versioned, declarative rule pack** and lints your schema statically — pointing
40
+ at the exact JSON-Pointer path, the keyword, and why — with a non-zero exit code
41
+ so CI fails the PR instead of prod.
42
+
43
+ > Every rule is grounded in a real, cited provider issue (see
44
+ > [`schemafit/rules/`](schemafit/rules/)). It is **not** a runtime client: it
45
+ > makes no model calls, needs no API key, and has **zero runtime dependencies**.
46
+
47
+ ## Why this and not Instructor / BAML / LiteLLM / Vercel AI SDK?
48
+
49
+ Those are excellent **runtime** clients — they normalize, repair, or constrain a
50
+ schema *at call-time*. `schemafit` fills the gap they leave: a **static,
51
+ pre-ship CI lint** that fails the build before the schema ever reaches a
52
+ provider, over the raw schemas you already ship, with no DSL or codegen buy-in.
53
+
54
+ ## Install
55
+
56
+ ```bash
57
+ # From source (works today):
58
+ pip install "git+https://github.com/OrionArchitekton/schemafit"
59
+ # or build and run the container:
60
+ docker build -t schemafit . && docker run --rm schemafit demo
61
+ ```
62
+
63
+ Once the first release is tagged (`v0.1.0`), `pip install schemafit` (PyPI) and
64
+ `docker run --rm ghcr.io/orionarchitekton/schemafit demo` (GHCR) become
65
+ available — both are published by the release workflow on a `v*` tag (PyPI via
66
+ Trusted Publishing; image to GHCR).
67
+
68
+ ## Usage
69
+
70
+ ```bash
71
+ # Lint one schema against several providers (exit 1 if any error):
72
+ schemafit lint my-schema.json --provider openai,anthropic,gemini
73
+
74
+ # Machine-readable output for CI annotations:
75
+ schemafit lint my-schema.json --provider anthropic --format json
76
+
77
+ # Also fail on warnings (e.g. Gemini $ref recursion risk):
78
+ schemafit lint my-schema.json --provider gemini --strict
79
+
80
+ # Emit a best-effort provider-valid variant (lossy transforms are flagged):
81
+ schemafit repair my-schema.json --provider anthropic --out fixed.json
82
+
83
+ # List supported providers / run a hermetic end-to-end proof:
84
+ schemafit providers
85
+ schemafit demo
86
+ ```
87
+
88
+ Example:
89
+
90
+ ```
91
+ $ schemafit lint order.json --provider anthropic
92
+ [anthropic] FAIL — 2 error(s), 0 warning(s)
93
+ ERROR #/properties/sku/pattern (anthropic-no-pattern)
94
+ Anthropic rejects the 'pattern' validation keyword (400 Bad Request).
95
+ ref: https://github.com/vercel/ai/issues/13355
96
+ ERROR #/properties/qty/minimum (anthropic-no-minimum)
97
+ Anthropic rejects the 'minimum' validation keyword (400 Bad Request).
98
+ ```
99
+
100
+ ## Use in CI
101
+
102
+ GitHub Actions (this repo ships a composite action):
103
+
104
+ ```yaml
105
+ - uses: OrionArchitekton/schemafit@v0.1.0
106
+ with:
107
+ schema: schemas/tool.json
108
+ providers: openai,anthropic,gemini
109
+ ```
110
+
111
+ Or directly / as a pre-commit hook (`.pre-commit-hooks.yaml` is included):
112
+
113
+ ```yaml
114
+ - repo: https://github.com/OrionArchitekton/schemafit
115
+ rev: v0.1.0
116
+ hooks:
117
+ - id: schemafit
118
+ args: ["--provider", "openai,anthropic,gemini"]
119
+ files: '^schemas/.*\.json$' # scope to YOUR LLM schemas, not every .json
120
+ ```
121
+
122
+ > Scope the hook with `files:` to the directory holding your LLM schemas — the
123
+ > default `types: [json]` would otherwise lint every JSON file in the repo
124
+ > (`package.json`, `tsconfig.json`, lockfiles), which are not LLM schemas.
125
+
126
+ ## Supported providers (v0.1)
127
+
128
+ | Provider | Checks (grounded in) |
129
+ |---|---|
130
+ | `openai` | `additionalProperties:false` required; all properties required; no `default`; no `oneOf` in array items ([openai-agents-python#474](https://github.com/openai/openai-agents-python/issues/474), [claude-task-master#1522](https://github.com/eyaltoledano/claude-task-master/issues/1522)) |
131
+ | `anthropic` | 13 rejected validation keywords on the **strict structured-output surface**: `minLength`/`maxLength`/`pattern`/`format`/`minimum`/`maximum`/`exclusiveMinimum`/`exclusiveMaximum`/`minItems`/`maxItems`/`uniqueItems`/`minProperties`/`maxProperties` ([vercel/ai#13355](https://github.com/vercel/ai/issues/13355), [anthropic-sdk-python#1034](https://github.com/anthropics/anthropic-sdk-python/issues/1034)). General Messages-API tool `input_schema` is more permissive — run this pack against schemas you send on the structured-output path. |
132
+ | `gemini` | **Portability warnings** (version-sensitive, non-failing by default): `anyOf` (rejected by ≤2.0 / old SDKs, supported by 2.5), `oneOf`, open dict (`additionalProperties` schema), `$ref` recursion. Gemini's schema support changed fast (`anyOf` Jan 2026, `additionalProperties` Nov 2025), so these *warn* — use `--strict` to gate on them. ([python-genai#460](https://github.com/googleapis/python-genai/issues/460), [docs](https://ai.google.dev/gemini-api/docs/structured-output)) |
133
+
134
+ ## Exit codes
135
+
136
+ | code | meaning |
137
+ |---|---|
138
+ | `0` | no errors (warnings allowed unless `--strict`) |
139
+ | `1` | at least one error (CI fail) |
140
+ | `2` | bad input (unreadable / invalid JSON) |
141
+
142
+ ## Scope (v0.1) and roadmap
143
+
144
+ In scope now: the `lint` + `repair` core, three provider rule packs, JSON/human
145
+ reporters, Docker image, GitHub Action, pre-commit hook.
146
+
147
+ Deferred (v0.2+): a `--live-verify` mode that calls each provider to confirm,
148
+ an npm/`ajv` port for the JS/TS ecosystem, more providers (Mistral, Cohere,
149
+ Bedrock, Vertex), automatic rule-pack drift detection, SARIF output, and
150
+ source-model (Pydantic/Zod) auto-fix.
151
+
152
+ ## License
153
+
154
+ MIT © 2026 Dan Mercede
@@ -0,0 +1,18 @@
1
+ schemafit/__init__.py,sha256=OKm75l--MWFyLsnccCqeksa-8ovPurvAvIKOdQmyKrA,569
2
+ schemafit/__main__.py,sha256=ueatLyYgr_5e0HWnJz5NnQtr1XYm4Trjt-ReXzeimF4,158
3
+ schemafit/cli.py,sha256=h6V1VRKScL7zENWbFiWQhqJotCrnpYjSPLGiVHHVrtc,7086
4
+ schemafit/linter.py,sha256=0H2nlRlOvYqkOidmafIfVQu107S5o1Hxs0qnwiMSoi8,4564
5
+ schemafit/model.py,sha256=3PWvObnayPFd8IQCpHVIQOfIVcvhb-1rZPB8H2AZaWA,1167
6
+ schemafit/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ schemafit/repair.py,sha256=weTja-zDlfG7F_Ed8WEb6IYLDEKV1dn9i3S5tnT3z2s,2659
8
+ schemafit/report.py,sha256=eJxoEjv74MpwWtGj8mVk88UV6U6nwdb7sbKr5bEq8ew,1613
9
+ schemafit/walk.py,sha256=Zx6PsC6TMhLTywBvwQxTkOOYfXxPJ_CfpuR-_8m9p4A,4382
10
+ schemafit/rules/anthropic.json,sha256=UgnTjxJSBWMrJfQjzm1LfbwalpPA4_r_Rgdv52DwSGE,4178
11
+ schemafit/rules/gemini.json,sha256=2dqB29YpwmJq35KWGtW9gzGMxkOeof8d32K0RnezPVY,2181
12
+ schemafit/rules/openai.json,sha256=tnrzYKFE0SwuTBWepMUOnYZ4IqUZyhWIEk2r2I5LMAo,1676
13
+ schemafit-0.1.0.dist-info/licenses/LICENSE,sha256=aEsJcRjMZ6XR41a3faplwrgpqBMl1z3fwBQO624QpW8,1068
14
+ schemafit-0.1.0.dist-info/METADATA,sha256=C2MpqKUXbZ6IFA-4UQwThw6iuV0862eXjryOuxqWn6g,7011
15
+ schemafit-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
16
+ schemafit-0.1.0.dist-info/entry_points.txt,sha256=WAUOkQAkdGIkE6lZs5q4xymnkJFDdZJVXNTxXUbnDkI,49
17
+ schemafit-0.1.0.dist-info/top_level.txt,sha256=zvvzqzOkWVw8KVruRkvqYC9IUKoXAN7PGiAKlVgVq3w,10
18
+ schemafit-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ schemafit = schemafit.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dan Mercede
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ schemafit