schemafit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- schemafit-0.1.0/LICENSE +21 -0
- schemafit-0.1.0/PKG-INFO +154 -0
- schemafit-0.1.0/README.md +130 -0
- schemafit-0.1.0/pyproject.toml +54 -0
- schemafit-0.1.0/schemafit/__init__.py +16 -0
- schemafit-0.1.0/schemafit/__main__.py +10 -0
- schemafit-0.1.0/schemafit/cli.py +187 -0
- schemafit-0.1.0/schemafit/linter.py +125 -0
- schemafit-0.1.0/schemafit/model.py +37 -0
- schemafit-0.1.0/schemafit/py.typed +0 -0
- schemafit-0.1.0/schemafit/repair.py +63 -0
- schemafit-0.1.0/schemafit/report.py +48 -0
- schemafit-0.1.0/schemafit/rules/anthropic.json +19 -0
- schemafit-0.1.0/schemafit/rules/gemini.json +41 -0
- schemafit-0.1.0/schemafit/rules/openai.json +43 -0
- schemafit-0.1.0/schemafit/walk.py +120 -0
- schemafit-0.1.0/schemafit.egg-info/PKG-INFO +154 -0
- schemafit-0.1.0/schemafit.egg-info/SOURCES.txt +25 -0
- schemafit-0.1.0/schemafit.egg-info/dependency_links.txt +1 -0
- schemafit-0.1.0/schemafit.egg-info/entry_points.txt +2 -0
- schemafit-0.1.0/schemafit.egg-info/requires.txt +4 -0
- schemafit-0.1.0/schemafit.egg-info/top_level.txt +1 -0
- schemafit-0.1.0/setup.cfg +4 -0
- schemafit-0.1.0/tests/test_cli.py +107 -0
- schemafit-0.1.0/tests/test_lint.py +191 -0
- schemafit-0.1.0/tests/test_repair.py +62 -0
- schemafit-0.1.0/tests/test_walk.py +54 -0
schemafit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dan Mercede
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
schemafit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: schemafit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Provider-aware structured-output / JSON-Schema CI linter — fail CI before your schema 400s on OpenAI, Anthropic, or Gemini
|
|
5
|
+
Author-email: Dan Mercede <dan@danmercede.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/OrionArchitekton/schemafit
|
|
8
|
+
Project-URL: Issues, https://github.com/OrionArchitekton/schemafit/issues
|
|
9
|
+
Keywords: json-schema,structured-output,openai,anthropic,gemini,llm,ci,linter,tool-calling
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
22
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# schemafit
|
|
26
|
+
|
|
27
|
+
**Provider-aware structured-output / JSON-Schema CI linter.** Catch the schema
|
|
28
|
+
incompatibilities that make one provider `400` while another succeeds — *before*
|
|
29
|
+
they hit production, as a fast, offline CI check.
|
|
30
|
+
|
|
31
|
+
A JSON Schema / tool definition / `response_format` that works on OpenAI can
|
|
32
|
+
`400` on Anthropic or Gemini (and vice-versa): nested `oneOf`, a missing
|
|
33
|
+
`additionalProperties: false`, a `default` in a property, Anthropic-rejected
|
|
34
|
+
validation keywords (`minLength`, `format`, `pattern`, …), Gemini's lack of
|
|
35
|
+
`anyOf`/dict support. The API tells you it failed but not *which constraint*
|
|
36
|
+
violated it, so teams hand-port schemas and debug by trial-and-error at runtime.
|
|
37
|
+
|
|
38
|
+
`schemafit` encodes each provider's documented constraint surface as a
|
|
39
|
+
**versioned, declarative rule pack** and lints your schema statically — pointing
|
|
40
|
+
at the exact JSON-Pointer path, the keyword, and why — with a non-zero exit code
|
|
41
|
+
so CI fails the PR instead of prod.
|
|
42
|
+
|
|
43
|
+
> Every rule is grounded in a real, cited provider issue (see
|
|
44
|
+
> [`schemafit/rules/`](schemafit/rules/)). It is **not** a runtime client: it
|
|
45
|
+
> makes no model calls, needs no API key, and has **zero runtime dependencies**.
|
|
46
|
+
|
|
47
|
+
## Why this and not Instructor / BAML / LiteLLM / Vercel AI SDK?
|
|
48
|
+
|
|
49
|
+
Those are excellent **runtime** clients — they normalize, repair, or constrain a
|
|
50
|
+
schema *at call-time*. `schemafit` fills the gap they leave: a **static,
|
|
51
|
+
pre-ship CI lint** that fails the build before the schema ever reaches a
|
|
52
|
+
provider, over the raw schemas you already ship, with no DSL or codegen buy-in.
|
|
53
|
+
|
|
54
|
+
## Install
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# From source (works today):
|
|
58
|
+
pip install "git+https://github.com/OrionArchitekton/schemafit"
|
|
59
|
+
# or build and run the container:
|
|
60
|
+
docker build -t schemafit . && docker run --rm schemafit demo
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Once the first release is tagged (`v0.1.0`), `pip install schemafit` (PyPI) and
|
|
64
|
+
`docker run --rm ghcr.io/orionarchitekton/schemafit demo` (GHCR) become
|
|
65
|
+
available — both are published by the release workflow on a `v*` tag (PyPI via
|
|
66
|
+
Trusted Publishing; image to GHCR).
|
|
67
|
+
|
|
68
|
+
## Usage
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
# Lint one schema against several providers (exit 1 if any error):
|
|
72
|
+
schemafit lint my-schema.json --provider openai,anthropic,gemini
|
|
73
|
+
|
|
74
|
+
# Machine-readable output for CI annotations:
|
|
75
|
+
schemafit lint my-schema.json --provider anthropic --format json
|
|
76
|
+
|
|
77
|
+
# Also fail on warnings (e.g. Gemini $ref recursion risk):
|
|
78
|
+
schemafit lint my-schema.json --provider gemini --strict
|
|
79
|
+
|
|
80
|
+
# Emit a best-effort provider-valid variant (lossy transforms are flagged):
|
|
81
|
+
schemafit repair my-schema.json --provider anthropic --out fixed.json
|
|
82
|
+
|
|
83
|
+
# List supported providers / run a hermetic end-to-end proof:
|
|
84
|
+
schemafit providers
|
|
85
|
+
schemafit demo
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
$ schemafit lint order.json --provider anthropic
|
|
92
|
+
[anthropic] FAIL — 2 error(s), 0 warning(s)
|
|
93
|
+
ERROR #/properties/sku/pattern (anthropic-no-pattern)
|
|
94
|
+
Anthropic rejects the 'pattern' validation keyword (400 Bad Request).
|
|
95
|
+
ref: https://github.com/vercel/ai/issues/13355
|
|
96
|
+
ERROR #/properties/qty/minimum (anthropic-no-minimum)
|
|
97
|
+
Anthropic rejects the 'minimum' validation keyword (400 Bad Request).
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Use in CI
|
|
101
|
+
|
|
102
|
+
GitHub Actions (this repo ships a composite action):
|
|
103
|
+
|
|
104
|
+
```yaml
|
|
105
|
+
- uses: OrionArchitekton/schemafit@v0.1.0
|
|
106
|
+
with:
|
|
107
|
+
schema: schemas/tool.json
|
|
108
|
+
providers: openai,anthropic,gemini
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Or directly / as a pre-commit hook (`.pre-commit-hooks.yaml` is included):
|
|
112
|
+
|
|
113
|
+
```yaml
|
|
114
|
+
- repo: https://github.com/OrionArchitekton/schemafit
|
|
115
|
+
rev: v0.1.0
|
|
116
|
+
hooks:
|
|
117
|
+
- id: schemafit
|
|
118
|
+
args: ["--provider", "openai,anthropic,gemini"]
|
|
119
|
+
files: '^schemas/.*\.json$' # scope to YOUR LLM schemas, not every .json
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
> Scope the hook with `files:` to the directory holding your LLM schemas — the
|
|
123
|
+
> default `types: [json]` would otherwise lint every JSON file in the repo
|
|
124
|
+
> (`package.json`, `tsconfig.json`, lockfiles), which are not LLM schemas.
|
|
125
|
+
|
|
126
|
+
## Supported providers (v0.1)
|
|
127
|
+
|
|
128
|
+
| Provider | Checks (grounded in) |
|
|
129
|
+
|---|---|
|
|
130
|
+
| `openai` | `additionalProperties:false` required; all properties required; no `default`; no `oneOf` in array items ([openai-agents-python#474](https://github.com/openai/openai-agents-python/issues/474), [claude-task-master#1522](https://github.com/eyaltoledano/claude-task-master/issues/1522)) |
|
|
131
|
+
| `anthropic` | 13 rejected validation keywords on the **strict structured-output surface**: `minLength`/`maxLength`/`pattern`/`format`/`minimum`/`maximum`/`exclusiveMinimum`/`exclusiveMaximum`/`minItems`/`maxItems`/`uniqueItems`/`minProperties`/`maxProperties` ([vercel/ai#13355](https://github.com/vercel/ai/issues/13355), [anthropic-sdk-python#1034](https://github.com/anthropics/anthropic-sdk-python/issues/1034)). General Messages-API tool `input_schema` is more permissive — run this pack against schemas you send on the structured-output path. |
|
|
132
|
+
| `gemini` | **Portability warnings** (version-sensitive, non-failing by default): `anyOf` (rejected by ≤2.0 / old SDKs, supported by 2.5), `oneOf`, open dict (`additionalProperties` schema), `$ref` recursion. Gemini's schema support changed fast (`anyOf` Jan 2026, `additionalProperties` Nov 2025), so these *warn* — use `--strict` to gate on them. ([python-genai#460](https://github.com/googleapis/python-genai/issues/460), [docs](https://ai.google.dev/gemini-api/docs/structured-output)) |
|
|
133
|
+
|
|
134
|
+
## Exit codes
|
|
135
|
+
|
|
136
|
+
| code | meaning |
|
|
137
|
+
|---|---|
|
|
138
|
+
| `0` | no errors (warnings allowed unless `--strict`) |
|
|
139
|
+
| `1` | at least one error (CI fail) |
|
|
140
|
+
| `2` | bad input (unreadable / invalid JSON) |
|
|
141
|
+
|
|
142
|
+
## Scope (v0.1) and roadmap
|
|
143
|
+
|
|
144
|
+
In scope now: the `lint` + `repair` core, three provider rule packs, JSON/human
|
|
145
|
+
reporters, Docker image, GitHub Action, pre-commit hook.
|
|
146
|
+
|
|
147
|
+
Deferred (v0.2+): a `--live-verify` mode that calls each provider to confirm,
|
|
148
|
+
an npm/`ajv` port for the JS/TS ecosystem, more providers (Mistral, Cohere,
|
|
149
|
+
Bedrock, Vertex), automatic rule-pack drift detection, SARIF output, and
|
|
150
|
+
source-model (Pydantic/Zod) auto-fix.
|
|
151
|
+
|
|
152
|
+
## License
|
|
153
|
+
|
|
154
|
+
MIT © 2026 Dan Mercede
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# schemafit
|
|
2
|
+
|
|
3
|
+
**Provider-aware structured-output / JSON-Schema CI linter.** Catch the schema
|
|
4
|
+
incompatibilities that make one provider `400` while another succeeds — *before*
|
|
5
|
+
they hit production, as a fast, offline CI check.
|
|
6
|
+
|
|
7
|
+
A JSON Schema / tool definition / `response_format` that works on OpenAI can
|
|
8
|
+
`400` on Anthropic or Gemini (and vice-versa): nested `oneOf`, a missing
|
|
9
|
+
`additionalProperties: false`, a `default` in a property, Anthropic-rejected
|
|
10
|
+
validation keywords (`minLength`, `format`, `pattern`, …), Gemini's lack of
|
|
11
|
+
`anyOf`/dict support. The API tells you it failed but not *which constraint*
|
|
12
|
+
violated it, so teams hand-port schemas and debug by trial-and-error at runtime.
|
|
13
|
+
|
|
14
|
+
`schemafit` encodes each provider's documented constraint surface as a
|
|
15
|
+
**versioned, declarative rule pack** and lints your schema statically — pointing
|
|
16
|
+
at the exact JSON-Pointer path, the keyword, and why — with a non-zero exit code
|
|
17
|
+
so CI fails the PR instead of prod.
|
|
18
|
+
|
|
19
|
+
> Every rule is grounded in a real, cited provider issue (see
|
|
20
|
+
> [`schemafit/rules/`](schemafit/rules/)). It is **not** a runtime client: it
|
|
21
|
+
> makes no model calls, needs no API key, and has **zero runtime dependencies**.
|
|
22
|
+
|
|
23
|
+
## Why this and not Instructor / BAML / LiteLLM / Vercel AI SDK?
|
|
24
|
+
|
|
25
|
+
Those are excellent **runtime** clients — they normalize, repair, or constrain a
|
|
26
|
+
schema *at call-time*. `schemafit` fills the gap they leave: a **static,
|
|
27
|
+
pre-ship CI lint** that fails the build before the schema ever reaches a
|
|
28
|
+
provider, over the raw schemas you already ship, with no DSL or codegen buy-in.
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# From source (works today):
|
|
34
|
+
pip install "git+https://github.com/OrionArchitekton/schemafit"
|
|
35
|
+
# or build and run the container:
|
|
36
|
+
docker build -t schemafit . && docker run --rm schemafit demo
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Once the first release is tagged (`v0.1.0`), `pip install schemafit` (PyPI) and
|
|
40
|
+
`docker run --rm ghcr.io/orionarchitekton/schemafit demo` (GHCR) become
|
|
41
|
+
available — both are published by the release workflow on a `v*` tag (PyPI via
|
|
42
|
+
Trusted Publishing; image to GHCR).
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# Lint one schema against several providers (exit 1 if any error):
|
|
48
|
+
schemafit lint my-schema.json --provider openai,anthropic,gemini
|
|
49
|
+
|
|
50
|
+
# Machine-readable output for CI annotations:
|
|
51
|
+
schemafit lint my-schema.json --provider anthropic --format json
|
|
52
|
+
|
|
53
|
+
# Also fail on warnings (e.g. Gemini $ref recursion risk):
|
|
54
|
+
schemafit lint my-schema.json --provider gemini --strict
|
|
55
|
+
|
|
56
|
+
# Emit a best-effort provider-valid variant (lossy transforms are flagged):
|
|
57
|
+
schemafit repair my-schema.json --provider anthropic --out fixed.json
|
|
58
|
+
|
|
59
|
+
# List supported providers / run a hermetic end-to-end proof:
|
|
60
|
+
schemafit providers
|
|
61
|
+
schemafit demo
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
$ schemafit lint order.json --provider anthropic
|
|
68
|
+
[anthropic] FAIL — 2 error(s), 0 warning(s)
|
|
69
|
+
ERROR #/properties/sku/pattern (anthropic-no-pattern)
|
|
70
|
+
Anthropic rejects the 'pattern' validation keyword (400 Bad Request).
|
|
71
|
+
ref: https://github.com/vercel/ai/issues/13355
|
|
72
|
+
ERROR #/properties/qty/minimum (anthropic-no-minimum)
|
|
73
|
+
Anthropic rejects the 'minimum' validation keyword (400 Bad Request).
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Use in CI
|
|
77
|
+
|
|
78
|
+
GitHub Actions (this repo ships a composite action):
|
|
79
|
+
|
|
80
|
+
```yaml
|
|
81
|
+
- uses: OrionArchitekton/schemafit@v0.1.0
|
|
82
|
+
with:
|
|
83
|
+
schema: schemas/tool.json
|
|
84
|
+
providers: openai,anthropic,gemini
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Or directly / as a pre-commit hook (`.pre-commit-hooks.yaml` is included):
|
|
88
|
+
|
|
89
|
+
```yaml
|
|
90
|
+
- repo: https://github.com/OrionArchitekton/schemafit
|
|
91
|
+
rev: v0.1.0
|
|
92
|
+
hooks:
|
|
93
|
+
- id: schemafit
|
|
94
|
+
args: ["--provider", "openai,anthropic,gemini"]
|
|
95
|
+
files: '^schemas/.*\.json$' # scope to YOUR LLM schemas, not every .json
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
> Scope the hook with `files:` to the directory holding your LLM schemas — the
|
|
99
|
+
> default `types: [json]` would otherwise lint every JSON file in the repo
|
|
100
|
+
> (`package.json`, `tsconfig.json`, lockfiles), which are not LLM schemas.
|
|
101
|
+
|
|
102
|
+
## Supported providers (v0.1)
|
|
103
|
+
|
|
104
|
+
| Provider | Checks (grounded in) |
|
|
105
|
+
|---|---|
|
|
106
|
+
| `openai` | `additionalProperties:false` required; all properties required; no `default`; no `oneOf` in array items ([openai-agents-python#474](https://github.com/openai/openai-agents-python/issues/474), [claude-task-master#1522](https://github.com/eyaltoledano/claude-task-master/issues/1522)) |
|
|
107
|
+
| `anthropic` | 13 rejected validation keywords on the **strict structured-output surface**: `minLength`/`maxLength`/`pattern`/`format`/`minimum`/`maximum`/`exclusiveMinimum`/`exclusiveMaximum`/`minItems`/`maxItems`/`uniqueItems`/`minProperties`/`maxProperties` ([vercel/ai#13355](https://github.com/vercel/ai/issues/13355), [anthropic-sdk-python#1034](https://github.com/anthropics/anthropic-sdk-python/issues/1034)). General Messages-API tool `input_schema` is more permissive — run this pack against schemas you send on the structured-output path. |
|
|
108
|
+
| `gemini` | **Portability warnings** (version-sensitive, non-failing by default): `anyOf` (rejected by ≤2.0 / old SDKs, supported by 2.5), `oneOf`, open dict (`additionalProperties` schema), `$ref` recursion. Gemini's schema support changed fast (`anyOf` Jan 2026, `additionalProperties` Nov 2025), so these *warn* — use `--strict` to gate on them. ([python-genai#460](https://github.com/googleapis/python-genai/issues/460), [docs](https://ai.google.dev/gemini-api/docs/structured-output)) |
|
|
109
|
+
|
|
110
|
+
## Exit codes
|
|
111
|
+
|
|
112
|
+
| code | meaning |
|
|
113
|
+
|---|---|
|
|
114
|
+
| `0` | no errors (warnings allowed unless `--strict`) |
|
|
115
|
+
| `1` | at least one error (CI fail) |
|
|
116
|
+
| `2` | bad input (unreadable / invalid JSON) |
|
|
117
|
+
|
|
118
|
+
## Scope (v0.1) and roadmap
|
|
119
|
+
|
|
120
|
+
In scope now: the `lint` + `repair` core, three provider rule packs, JSON/human
|
|
121
|
+
reporters, Docker image, GitHub Action, pre-commit hook.
|
|
122
|
+
|
|
123
|
+
Deferred (v0.2+): a `--live-verify` mode that calls each provider to confirm,
|
|
124
|
+
an npm/`ajv` port for the JS/TS ecosystem, more providers (Mistral, Cohere,
|
|
125
|
+
Bedrock, Vertex), automatic rule-pack drift detection, SARIF output, and
|
|
126
|
+
source-model (Pydantic/Zod) auto-fix.
|
|
127
|
+
|
|
128
|
+
## License
|
|
129
|
+
|
|
130
|
+
MIT © 2026 Dan Mercede
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "schemafit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Provider-aware structured-output / JSON-Schema CI linter — fail CI before your schema 400s on OpenAI, Anthropic, or Gemini"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Dan Mercede", email = "dan@danmercede.com" }]
|
|
13
|
+
keywords = ["json-schema", "structured-output", "openai", "anthropic", "gemini", "llm", "ci", "linter", "tool-calling"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
22
|
+
]
|
|
23
|
+
dependencies = []
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://github.com/OrionArchitekton/schemafit"
|
|
27
|
+
Issues = "https://github.com/OrionArchitekton/schemafit/issues"
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
dev = ["pytest>=8.0", "ruff>=0.6"]
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
schemafit = "schemafit.cli:main"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools.packages.find]
|
|
36
|
+
where = ["."]
|
|
37
|
+
include = ["schemafit*"]
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.package-data]
|
|
40
|
+
schemafit = ["rules/*.json", "py.typed"]
|
|
41
|
+
|
|
42
|
+
[tool.ruff]
|
|
43
|
+
line-length = 100
|
|
44
|
+
target-version = "py311"
|
|
45
|
+
|
|
46
|
+
[tool.ruff.lint]
|
|
47
|
+
select = ["E", "F", "W", "I", "B", "UP", "S", "RUF"]
|
|
48
|
+
|
|
49
|
+
[tool.ruff.lint.per-file-ignores]
|
|
50
|
+
"tests/**" = ["S"]
|
|
51
|
+
|
|
52
|
+
[tool.pytest.ini_options]
|
|
53
|
+
testpaths = ["tests"]
|
|
54
|
+
python_files = "test_*.py"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""schemafit — provider-aware structured-output / JSON-Schema CI linter.
|
|
2
|
+
|
|
3
|
+
Statically lint a JSON Schema / tool definition / response_format against each
|
|
4
|
+
LLM provider's documented constraint surface (OpenAI, Anthropic, Gemini) and
|
|
5
|
+
fail CI *before* the schema 400s in production on provider X.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
__version__ = "0.1.0"
|
|
11
|
+
|
|
12
|
+
from .linter import PROVIDERS, has_errors, lint, lint_multi
|
|
13
|
+
from .model import Finding
|
|
14
|
+
from .repair import repair
|
|
15
|
+
|
|
16
|
+
__all__ = ["PROVIDERS", "Finding", "__version__", "has_errors", "lint", "lint_multi", "repair"]
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""schemafit command-line interface.
|
|
2
|
+
|
|
3
|
+
Commands:
|
|
4
|
+
lint <schema.json> --provider openai[,anthropic,gemini] # exit 1 on violations
|
|
5
|
+
repair <schema.json> --provider <p> [--out fixed.json]
|
|
6
|
+
providers
|
|
7
|
+
demo # hermetic proof
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
from . import __version__, report
|
|
17
|
+
from .linter import PROVIDERS, has_errors, lint, lint_multi
|
|
18
|
+
from .repair import repair
|
|
19
|
+
|
|
20
|
+
# A schema that is valid for OpenAI but deliberately trips Anthropic (rejected
|
|
21
|
+
# validation keywords) and Gemini (anyOf). Used by `demo` for a hermetic proof.
|
|
22
|
+
DEMO_BAD_SCHEMA: dict = {
|
|
23
|
+
"type": "object",
|
|
24
|
+
"additionalProperties": False,
|
|
25
|
+
"properties": {
|
|
26
|
+
"name": {"type": "string", "minLength": 1, "maxLength": 50},
|
|
27
|
+
"email": {"type": "string", "format": "email"},
|
|
28
|
+
"age": {"type": "integer", "minimum": 0, "maximum": 120},
|
|
29
|
+
"status": {"anyOf": [{"type": "string"}, {"type": "null"}]},
|
|
30
|
+
},
|
|
31
|
+
"required": ["name", "email", "age", "status"],
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _load_schema(path: str) -> object:
|
|
36
|
+
if path == "-":
|
|
37
|
+
return json.load(sys.stdin)
|
|
38
|
+
with open(path, encoding="utf-8") as fh:
|
|
39
|
+
return json.load(fh)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _parse_providers(spec: str) -> list[str]:
|
|
43
|
+
provs = [p.strip() for p in spec.split(",") if p.strip()]
|
|
44
|
+
if not provs:
|
|
45
|
+
raise SystemExit("error: --provider requires at least one provider")
|
|
46
|
+
for p in provs:
|
|
47
|
+
if p not in PROVIDERS:
|
|
48
|
+
raise SystemExit(f"error: unknown provider {p!r} (choose from {', '.join(PROVIDERS)})")
|
|
49
|
+
return provs
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def cmd_lint(args: argparse.Namespace) -> int:
|
|
53
|
+
providers = _parse_providers(args.provider)
|
|
54
|
+
all_results: dict[str, dict] = {}
|
|
55
|
+
overall_fail = False
|
|
56
|
+
for path in args.schemas:
|
|
57
|
+
try:
|
|
58
|
+
schema = _load_schema(path)
|
|
59
|
+
results = lint_multi(schema, providers)
|
|
60
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
61
|
+
print(f"error: cannot read schema {path!r}: {exc}", file=sys.stderr)
|
|
62
|
+
return 2
|
|
63
|
+
except RecursionError:
|
|
64
|
+
print(f"error: schema {path!r} is too deeply nested to lint safely", file=sys.stderr)
|
|
65
|
+
return 2
|
|
66
|
+
all_results[path] = results
|
|
67
|
+
if args.strict:
|
|
68
|
+
failed = any(findings for findings in results.values())
|
|
69
|
+
else:
|
|
70
|
+
failed = any(has_errors(findings) for findings in results.values())
|
|
71
|
+
overall_fail = overall_fail or failed
|
|
72
|
+
if args.format != "json":
|
|
73
|
+
if len(args.schemas) > 1:
|
|
74
|
+
print(f"== {path} ==")
|
|
75
|
+
print(report.format_human(results))
|
|
76
|
+
if args.format == "json":
|
|
77
|
+
print(report.format_json_multi(all_results))
|
|
78
|
+
return 1 if overall_fail else 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def cmd_repair(args: argparse.Namespace) -> int:
|
|
82
|
+
if args.provider not in PROVIDERS:
|
|
83
|
+
raise SystemExit(f"error: unknown provider {args.provider!r}")
|
|
84
|
+
try:
|
|
85
|
+
schema = _load_schema(args.schema)
|
|
86
|
+
fixed, rep = repair(schema, args.provider)
|
|
87
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
88
|
+
print(f"error: cannot read schema {args.schema!r}: {exc}", file=sys.stderr)
|
|
89
|
+
return 2
|
|
90
|
+
except RecursionError:
|
|
91
|
+
print(
|
|
92
|
+
f"error: schema {args.schema!r} is too deeply nested to repair safely",
|
|
93
|
+
file=sys.stderr,
|
|
94
|
+
)
|
|
95
|
+
return 2
|
|
96
|
+
rendered = json.dumps(fixed, indent=2, sort_keys=True)
|
|
97
|
+
if args.out:
|
|
98
|
+
with open(args.out, "w", encoding="utf-8") as fh:
|
|
99
|
+
fh.write(rendered + "\n")
|
|
100
|
+
print(f"wrote {args.out}", file=sys.stderr)
|
|
101
|
+
else:
|
|
102
|
+
print(rendered)
|
|
103
|
+
print(
|
|
104
|
+
f"repair: auto_fixed={len(rep['auto_fixed'])} "
|
|
105
|
+
f"lossy={len(rep['lossy'])} manual_required={len(rep['manual_required'])}",
|
|
106
|
+
file=sys.stderr,
|
|
107
|
+
)
|
|
108
|
+
for tag in rep["manual_required"]:
|
|
109
|
+
print(f" MANUAL {tag}", file=sys.stderr)
|
|
110
|
+
return 0
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def cmd_providers(_args: argparse.Namespace) -> int:
|
|
114
|
+
for p in PROVIDERS:
|
|
115
|
+
print(p)
|
|
116
|
+
return 0
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def cmd_demo(_args: argparse.Namespace) -> int:
|
|
120
|
+
"""Hermetic end-to-end proof of the spine: lint -> repair -> matrix."""
|
|
121
|
+
print("== schemafit demo ==")
|
|
122
|
+
before = lint(DEMO_BAD_SCHEMA, "anthropic")
|
|
123
|
+
n_before = sum(1 for f in before if f.severity == "error")
|
|
124
|
+
exit_before = 1 if has_errors(before) else 0
|
|
125
|
+
print(f"PROVIDER=anthropic INPUT=demo-bad VIOLATIONS={n_before} EXIT={exit_before}")
|
|
126
|
+
if before:
|
|
127
|
+
print(
|
|
128
|
+
f"VIOLATION_PATH={before[0].json_pointer} "
|
|
129
|
+
f"(keyword: {before[0].keyword} -> rejected by anthropic)"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
fixed, rep = repair(DEMO_BAD_SCHEMA, "anthropic")
|
|
133
|
+
after = lint(fixed, "anthropic")
|
|
134
|
+
n_after = sum(1 for f in after if f.severity == "error")
|
|
135
|
+
exit_after = 1 if has_errors(after) else 0
|
|
136
|
+
print(
|
|
137
|
+
f"--- after `schemafit repair --provider anthropic` --- "
|
|
138
|
+
f"VIOLATIONS={n_after} EXIT={exit_after} auto_fixed={len(rep['auto_fixed'])}"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
matrix = lint_multi(DEMO_BAD_SCHEMA, list(PROVIDERS))
|
|
142
|
+
rendered = " ".join(
|
|
143
|
+
f"{p}={'FAIL' if has_errors(matrix[p]) else 'PASS'}" for p in PROVIDERS
|
|
144
|
+
)
|
|
145
|
+
print(f"MULTI: {rendered}")
|
|
146
|
+
gem_warns = sum(1 for f in matrix["gemini"] if f.severity == "warning")
|
|
147
|
+
print(f"NOTE: gemini portability warnings={gem_warns} (version-sensitive, non-failing)")
|
|
148
|
+
|
|
149
|
+
ok = has_errors(before) and not has_errors(after)
|
|
150
|
+
print("PROOF_OK" if ok else "PROOF_FAILED")
|
|
151
|
+
return 0 if ok else 3
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
155
|
+
parser = argparse.ArgumentParser(
|
|
156
|
+
prog="schemafit",
|
|
157
|
+
description="Provider-aware structured-output / JSON-Schema CI linter.",
|
|
158
|
+
)
|
|
159
|
+
parser.add_argument("--version", action="version", version=f"schemafit {__version__}")
|
|
160
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
161
|
+
|
|
162
|
+
p_lint = sub.add_parser("lint", help="lint one or more schemas against one or more providers")
|
|
163
|
+
p_lint.add_argument("schemas", nargs="+", help="schema JSON file(s) ('-' = stdin)")
|
|
164
|
+
p_lint.add_argument("--provider", required=True, help="comma list: openai,anthropic,gemini")
|
|
165
|
+
p_lint.add_argument("--format", choices=("human", "json"), default="human")
|
|
166
|
+
p_lint.add_argument("--strict", action="store_true", help="also fail (exit 1) on warnings")
|
|
167
|
+
p_lint.set_defaults(func=cmd_lint)
|
|
168
|
+
|
|
169
|
+
p_rep = sub.add_parser("repair", help="emit a best-effort provider-valid variant")
|
|
170
|
+
p_rep.add_argument("schema", help="path to a JSON schema file, or '-' for stdin")
|
|
171
|
+
p_rep.add_argument("--provider", required=True, help="one of: openai|anthropic|gemini")
|
|
172
|
+
p_rep.add_argument("--out", help="write fixed schema here (default: stdout)")
|
|
173
|
+
p_rep.set_defaults(func=cmd_repair)
|
|
174
|
+
|
|
175
|
+
p_prov = sub.add_parser("providers", help="list supported providers")
|
|
176
|
+
p_prov.set_defaults(func=cmd_providers)
|
|
177
|
+
|
|
178
|
+
p_demo = sub.add_parser("demo", help="run a hermetic end-to-end proof")
|
|
179
|
+
p_demo.set_defaults(func=cmd_demo)
|
|
180
|
+
|
|
181
|
+
return parser
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def main(argv: list[str] | None = None) -> int:
|
|
185
|
+
parser = build_parser()
|
|
186
|
+
args = parser.parse_args(argv)
|
|
187
|
+
return args.func(args)
|