schemaglow 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- schemaglow-1.0.0/.github/workflows/ci.yml +43 -0
- schemaglow-1.0.0/.github/workflows/pypi_publish.yml +49 -0
- schemaglow-1.0.0/.gitignore +60 -0
- schemaglow-1.0.0/.pre-commit-config.yaml +47 -0
- schemaglow-1.0.0/CITATION.cff +22 -0
- schemaglow-1.0.0/CONTRIBUTING.md +31 -0
- schemaglow-1.0.0/LICENSE +21 -0
- schemaglow-1.0.0/PKG-INFO +270 -0
- schemaglow-1.0.0/README.md +235 -0
- schemaglow-1.0.0/SECURITY.md +21 -0
- schemaglow-1.0.0/TESTING.md +318 -0
- schemaglow-1.0.0/pyproject.toml +110 -0
- schemaglow-1.0.0/requirements-dev.txt +1 -0
- schemaglow-1.0.0/requirements.txt +6 -0
- schemaglow-1.0.0/src/schemaglow/__init__.py +24 -0
- schemaglow-1.0.0/src/schemaglow/cli.py +282 -0
- schemaglow-1.0.0/src/schemaglow/diffing.py +334 -0
- schemaglow-1.0.0/src/schemaglow/infer.py +348 -0
- schemaglow-1.0.0/src/schemaglow/models.py +119 -0
- schemaglow-1.0.0/src/schemaglow/renderers.py +371 -0
- schemaglow-1.0.0/src/schemaglow/schema_sources.py +701 -0
- schemaglow-1.0.0/src/schemaglow/service.py +240 -0
- schemaglow-1.0.0/tests/conftest.py +10 -0
- schemaglow-1.0.0/tests/fixtures/downloaded/SOURCES.md +26 -0
- schemaglow-1.0.0/tests/fixtures/downloaded/addressbook.proto +56 -0
- schemaglow-1.0.0/tests/fixtures/downloaded/alltypes_plain.parquet +0 -0
- schemaglow-1.0.0/tests/fixtures/downloaded/example.jsonl +5 -0
- schemaglow-1.0.0/tests/fixtures/downloaded/miserables.json +1 -0
- schemaglow-1.0.0/tests/fixtures/downloaded/petstore.yaml +839 -0
- schemaglow-1.0.0/tests/fixtures/downloaded/seattle-weather.csv +1462 -0
- schemaglow-1.0.0/tests/fixtures/downloaded/user.avsc +9 -0
- schemaglow-1.0.0/tests/fixtures/manual/avro/user-baseline.avsc +25 -0
- schemaglow-1.0.0/tests/fixtures/manual/avro/user-candidate.avsc +29 -0
- schemaglow-1.0.0/tests/fixtures/manual/csv/weather-baseline.csv +6 -0
- schemaglow-1.0.0/tests/fixtures/manual/csv/weather-candidate.csv +6 -0
- schemaglow-1.0.0/tests/fixtures/manual/json/miserables-baseline.json +35 -0
- schemaglow-1.0.0/tests/fixtures/manual/json/miserables-candidate.json +36 -0
- schemaglow-1.0.0/tests/fixtures/manual/jsonl/search-baseline.jsonl +3 -0
- schemaglow-1.0.0/tests/fixtures/manual/jsonl/search-candidate.jsonl +3 -0
- schemaglow-1.0.0/tests/fixtures/manual/openapi/petstore-baseline.yaml +851 -0
- schemaglow-1.0.0/tests/fixtures/manual/openapi/petstore-candidate.yaml +854 -0
- schemaglow-1.0.0/tests/fixtures/manual/options/ignore-new.jsonl +2 -0
- schemaglow-1.0.0/tests/fixtures/manual/options/ignore-old.jsonl +2 -0
- schemaglow-1.0.0/tests/fixtures/manual/options/rename-new.csv +3 -0
- schemaglow-1.0.0/tests/fixtures/manual/options/rename-old.csv +3 -0
- schemaglow-1.0.0/tests/fixtures/manual/options/strict-new.csv +3 -0
- schemaglow-1.0.0/tests/fixtures/manual/options/strict-old.csv +3 -0
- schemaglow-1.0.0/tests/fixtures/manual/parquet/alltypes-baseline.parquet +0 -0
- schemaglow-1.0.0/tests/fixtures/manual/parquet/alltypes-candidate.parquet +0 -0
- schemaglow-1.0.0/tests/fixtures/manual/proto/addressbook-baseline.proto +56 -0
- schemaglow-1.0.0/tests/fixtures/manual/proto/addressbook-candidate.proto +57 -0
- schemaglow-1.0.0/tests/fixtures/manual/scan/new/addressbook.proto +57 -0
- schemaglow-1.0.0/tests/fixtures/manual/scan/new/petstore.yaml +854 -0
- schemaglow-1.0.0/tests/fixtures/manual/scan/new/weather.csv +6 -0
- schemaglow-1.0.0/tests/fixtures/manual/scan/old/petstore.yaml +851 -0
- schemaglow-1.0.0/tests/fixtures/manual/scan/old/weather.csv +6 -0
- schemaglow-1.0.0/tests/integration/test_cli.py +154 -0
- schemaglow-1.0.0/tests/integration/test_sample_fixtures.py +142 -0
- schemaglow-1.0.0/tests/unit/test_infer_and_diff.py +146 -0
- schemaglow-1.0.0/tests/unit/test_options_and_edge_cases.py +121 -0
- schemaglow-1.0.0/tests/unit/test_renderers_and_service.py +148 -0
- schemaglow-1.0.0/tests/unit/test_schema_sources.py +176 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: ["main"]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: ["main"]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
cache: "pip"
|
|
24
|
+
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip
|
|
28
|
+
pip install -e ".[dev]"
|
|
29
|
+
|
|
30
|
+
- name: Check formatting
|
|
31
|
+
run: ruff format --check .
|
|
32
|
+
|
|
33
|
+
- name: Run linter
|
|
34
|
+
run: ruff check .
|
|
35
|
+
|
|
36
|
+
- name: Run type checks
|
|
37
|
+
run: mypy src
|
|
38
|
+
|
|
39
|
+
- name: Run tests
|
|
40
|
+
run: pytest -v
|
|
41
|
+
|
|
42
|
+
- name: Run security audit
|
|
43
|
+
run: pip-audit
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
release:
|
|
6
|
+
types: [published]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
contents: read
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.12"
|
|
20
|
+
|
|
21
|
+
- name: Build
|
|
22
|
+
run: |
|
|
23
|
+
python -m pip install --upgrade pip
|
|
24
|
+
pip install build twine
|
|
25
|
+
python -m build
|
|
26
|
+
python -m twine check dist/*
|
|
27
|
+
|
|
28
|
+
- name: Upload build artifacts
|
|
29
|
+
uses: actions/upload-artifact@v4
|
|
30
|
+
with:
|
|
31
|
+
name: dist
|
|
32
|
+
path: dist/*
|
|
33
|
+
|
|
34
|
+
publish:
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
needs: build
|
|
37
|
+
permissions:
|
|
38
|
+
id-token: write
|
|
39
|
+
environment:
|
|
40
|
+
name: pypi
|
|
41
|
+
steps:
|
|
42
|
+
- name: Download artifacts
|
|
43
|
+
uses: actions/download-artifact@v4
|
|
44
|
+
with:
|
|
45
|
+
name: dist
|
|
46
|
+
path: dist
|
|
47
|
+
|
|
48
|
+
- name: Publish to PyPI
|
|
49
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*$py.class
|
|
4
|
+
|
|
5
|
+
build/
|
|
6
|
+
dist/
|
|
7
|
+
*.egg-info/
|
|
8
|
+
.eggs/
|
|
9
|
+
*.egg
|
|
10
|
+
pip-wheel-metadata/
|
|
11
|
+
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
ENV/
|
|
16
|
+
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
.ruff_cache/
|
|
20
|
+
.hypothesis/
|
|
21
|
+
.tox/
|
|
22
|
+
.nox/
|
|
23
|
+
.coverage
|
|
24
|
+
.coverage.*
|
|
25
|
+
htmlcov/
|
|
26
|
+
|
|
27
|
+
.pyre/
|
|
28
|
+
.pytype/
|
|
29
|
+
|
|
30
|
+
.python-version
|
|
31
|
+
.poetry/
|
|
32
|
+
poetry.lock
|
|
33
|
+
Pipfile.lock
|
|
34
|
+
pdm.lock
|
|
35
|
+
|
|
36
|
+
.vscode/
|
|
37
|
+
.idea/
|
|
38
|
+
*.swp
|
|
39
|
+
*.swo
|
|
40
|
+
*~
|
|
41
|
+
|
|
42
|
+
.DS_Store
|
|
43
|
+
Thumbs.db
|
|
44
|
+
Desktop.ini
|
|
45
|
+
|
|
46
|
+
.ipynb_checkpoints/
|
|
47
|
+
|
|
48
|
+
*.log
|
|
49
|
+
tmp/
|
|
50
|
+
temp/
|
|
51
|
+
*.tmp
|
|
52
|
+
|
|
53
|
+
.env
|
|
54
|
+
.env.*
|
|
55
|
+
*.pem
|
|
56
|
+
*.key
|
|
57
|
+
*.p12
|
|
58
|
+
release.sh
|
|
59
|
+
requirement.txt
|
|
60
|
+
skills/
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v6.0.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: trailing-whitespace
|
|
6
|
+
- id: end-of-file-fixer
|
|
7
|
+
- id: check-yaml
|
|
8
|
+
- id: check-toml
|
|
9
|
+
- id: check-json
|
|
10
|
+
- id: check-merge-conflict
|
|
11
|
+
- id: detect-private-key
|
|
12
|
+
|
|
13
|
+
- repo: https://github.com/asottile/pyupgrade
|
|
14
|
+
rev: v3.21.2
|
|
15
|
+
hooks:
|
|
16
|
+
- id: pyupgrade
|
|
17
|
+
args: [--py311-plus]
|
|
18
|
+
|
|
19
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
20
|
+
rev: v0.15.1
|
|
21
|
+
hooks:
|
|
22
|
+
- id: ruff-check
|
|
23
|
+
args: [--fix]
|
|
24
|
+
- id: ruff-format
|
|
25
|
+
|
|
26
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
27
|
+
rev: v1.19.1
|
|
28
|
+
hooks:
|
|
29
|
+
- id: mypy
|
|
30
|
+
args: [--config-file=pyproject.toml]
|
|
31
|
+
additional_dependencies: [pydantic, pyarrow, rich, typer]
|
|
32
|
+
|
|
33
|
+
- repo: https://github.com/pypa/pip-audit
|
|
34
|
+
rev: v2.10.0
|
|
35
|
+
hooks:
|
|
36
|
+
- id: pip-audit
|
|
37
|
+
args: [--progress-spinner=off]
|
|
38
|
+
stages: [manual]
|
|
39
|
+
|
|
40
|
+
- repo: local
|
|
41
|
+
hooks:
|
|
42
|
+
- id: pytest
|
|
43
|
+
name: pytest
|
|
44
|
+
entry: python -m pytest -q
|
|
45
|
+
language: system
|
|
46
|
+
pass_filenames: false
|
|
47
|
+
stages: [manual]
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use this software, please cite it as below."
|
|
3
|
+
authors:
|
|
4
|
+
- family-names: "Adwant"
|
|
5
|
+
given-names: "Goutam"
|
|
6
|
+
alias: "adwantg"
|
|
7
|
+
title: "SchemaGlow: Human-friendly schema diff and contract drift detection for CSV, JSON, JSONL, Parquet, OpenAPI, Avro, and protobuf."
|
|
8
|
+
version: "1.0.0"
|
|
9
|
+
date-released: "2026-03-09"
|
|
10
|
+
url: "https://github.com/adwantg/schemaglow"
|
|
11
|
+
repository-code: "https://github.com/adwantg/schemaglow"
|
|
12
|
+
license: MIT
|
|
13
|
+
keywords:
|
|
14
|
+
- python
|
|
15
|
+
- schema-diff
|
|
16
|
+
- openapi
|
|
17
|
+
- avro
|
|
18
|
+
- protobuf
|
|
19
|
+
- parquet
|
|
20
|
+
- json
|
|
21
|
+
- jsonl
|
|
22
|
+
- csv
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Contributing to SchemaGlow
|
|
2
|
+
|
|
3
|
+
## Developer Quick Start
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
git clone https://github.com/gadwant/schemaglow.git
|
|
7
|
+
cd schemaglow
|
|
8
|
+
|
|
9
|
+
python3 -m venv .venv
|
|
10
|
+
source .venv/bin/activate
|
|
11
|
+
|
|
12
|
+
pip install -e ".[dev]"
|
|
13
|
+
pre-commit install
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Quality Gates
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
ruff format .
|
|
20
|
+
ruff check .
|
|
21
|
+
mypy src
|
|
22
|
+
pytest
|
|
23
|
+
pip-audit
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Pull Request Requirements
|
|
27
|
+
|
|
28
|
+
1. Keep tests, docs, and CLI examples aligned with the implemented behavior.
|
|
29
|
+
2. Do not reduce coverage below 90%.
|
|
30
|
+
3. Add or update tests for every user-visible behavior change.
|
|
31
|
+
4. Update `README.md` whenever commands, flags, report formats, or compatibility rules change.
|
schemaglow-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Goutam adwant
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: schemaglow
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Human-friendly schema diff and contract drift detection for CSV, JSON, JSONL, Parquet, OpenAPI, Avro, and protobuf.
|
|
5
|
+
Project-URL: Homepage, https://github.com/adwantg/schemaglow
|
|
6
|
+
Project-URL: Repository, https://github.com/adwantg/schemaglow
|
|
7
|
+
Project-URL: Issues, https://github.com/adwantg/schemaglow/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/adwantg/schemaglow#readme
|
|
9
|
+
Author-email: Goutam Adwant <workwithgoutam@gmail.com>
|
|
10
|
+
License: MIT
|
|
11
|
+
Keywords: avro,cli,csv,data-quality,json,jsonl,openapi,parquet,protobuf,schema-diff
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Requires-Dist: jinja2>=3.1.0
|
|
20
|
+
Requires-Dist: pyarrow>=16.1.0
|
|
21
|
+
Requires-Dist: pydantic>=2.8.0
|
|
22
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
23
|
+
Requires-Dist: rich>=13.9.0
|
|
24
|
+
Requires-Dist: typer>=0.16.0
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: build>=1.2.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: mypy>=1.13.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: pip-audit>=2.8.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pre-commit>=4.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest>=8.3.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: ruff>=0.8.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: twine>=6.0.0; extra == 'dev'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# SchemaGlow
|
|
37
|
+
|
|
38
|
+
Human-friendly schema diff for CSV, JSON, JSONL, Parquet, OpenAPI, Avro, and protobuf.
|
|
39
|
+
|
|
40
|
+
SchemaGlow compares data files, schema artifacts, directory trees, and saved contract snapshots. It tells you what changed, whether it is safe, and what might break. It is built for pull request review, CI checks, repository-wide drift scans, and baseline contract validation when raw git diffs are not enough.
|
|
41
|
+
|
|
42
|
+
## Why
|
|
43
|
+
|
|
44
|
+
Most nearby tools validate data contracts, inspect file structure, or diff technical schemas in a format-specific way. SchemaGlow focuses on a narrower workflow:
|
|
45
|
+
|
|
46
|
+
- compare two file versions quickly
|
|
47
|
+
- explain changes in plain language
|
|
48
|
+
- classify impact as `SAFE`, `WARNING`, or `BREAKING`
|
|
49
|
+
- export machine-readable and review-friendly reports
|
|
50
|
+
|
|
51
|
+
## Features
|
|
52
|
+
|
|
53
|
+
- Compare `CSV`, `JSON`, `JSONL`, `Parquet`, `OpenAPI`, `Avro`, and `protobuf` sources with one CLI.
|
|
54
|
+
- Infer normalized schema snapshots from both raw data files and schema-definition files.
|
|
55
|
+
- Classify compatibility changes as `SAFE`, `WARNING`, or `BREAKING`.
|
|
56
|
+
- Export diff output as terminal text, JSON, Markdown, or HTML.
|
|
57
|
+
- Save schema snapshots and compare them later without re-reading source files.
|
|
58
|
+
- Scan two directory trees recursively and aggregate drift into one report.
|
|
59
|
+
- Capture baseline contract files and check candidate trees against committed baselines.
|
|
60
|
+
- Detect optional nested expansions, removals, type changes, nullability changes, sample-shape ambiguity, and column-order-only changes.
|
|
61
|
+
- Support ignore rules, strict numeric widening, and rename heuristics with sample overlap.
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install schemaglow
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
For local development:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
python3 -m venv .venv
|
|
73
|
+
source .venv/bin/activate
|
|
74
|
+
pip install -e ".[dev]"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## CLI
|
|
78
|
+
|
|
79
|
+
### `schemaglow diff`
|
|
80
|
+
|
|
81
|
+
Compare two files directly.
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
schemaglow diff old.parquet new.parquet
|
|
85
|
+
schemaglow diff baseline.jsonl candidate.jsonl --format json
|
|
86
|
+
schemaglow diff old.openapi.yaml new.openapi.yaml
|
|
87
|
+
schemaglow diff old.avsc new.avsc
|
|
88
|
+
schemaglow diff old.proto new.proto --report html --report-path proto-report.html
|
|
89
|
+
schemaglow diff old.csv new.csv --report markdown --report-path schema-report.md
|
|
90
|
+
schemaglow diff old.csv new.csv --ignore-fields '(^_loaded_at$|^metadata\.)'
|
|
91
|
+
schemaglow diff old.csv new.csv --strict --rename-heuristics
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Example text output:
|
|
95
|
+
|
|
96
|
+
```text
|
|
97
|
+
SchemaGlow Report
|
|
98
|
+
|
|
99
|
+
BREAKING
|
|
100
|
+
old: old.csv
|
|
101
|
+
new: new.csv
|
|
102
|
+
counts: SAFE=1 WARNING=0 BREAKING=1
|
|
103
|
+
BREAKING
|
|
104
|
+
- removed field: order_total
|
|
105
|
+
SAFE
|
|
106
|
+
+ column order changed only
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### `schemaglow inspect`
|
|
110
|
+
|
|
111
|
+
Infer a snapshot from one file and print its normalized field model.
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
schemaglow inspect data.json
|
|
115
|
+
schemaglow inspect data.parquet --format json
|
|
116
|
+
schemaglow inspect openapi.yaml --format json
|
|
117
|
+
schemaglow inspect schema.proto
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### `schemaglow snapshot`
|
|
121
|
+
|
|
122
|
+
Persist an inferred snapshot to JSON for later comparison.
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
schemaglow snapshot data.jsonl -o snapshots/baseline.schema.json
|
|
126
|
+
schemaglow snapshot schema.avsc -o snapshots/avro.schema.json
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### `schemaglow compare`
|
|
130
|
+
|
|
131
|
+
Compare two saved schema snapshots.
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
schemaglow compare old.schema.json new.schema.json
|
|
135
|
+
schemaglow compare old.schema.json new.schema.json --format json
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### `schemaglow scan`
|
|
139
|
+
|
|
140
|
+
Compare two directory trees recursively and aggregate the results.
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
schemaglow scan datasets/baseline datasets/candidate
|
|
144
|
+
schemaglow scan specs/old specs/new --format json
|
|
145
|
+
schemaglow scan repo-old repo-new --pattern '*.proto' --report markdown --report-path scan.md
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### `schemaglow baseline capture`
|
|
149
|
+
|
|
150
|
+
Capture a repository-local contract baseline made of saved snapshots.
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
schemaglow baseline capture data/ -o .schemaglow-baseline
|
|
154
|
+
schemaglow baseline capture specs/ -o contracts/api --pattern '*.yaml'
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### `schemaglow baseline check`
|
|
158
|
+
|
|
159
|
+
Compare a candidate tree against committed baseline contract files.
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
schemaglow baseline check .schemaglow-baseline data/
|
|
163
|
+
schemaglow baseline check contracts/api specs/ --format json
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Supported Inputs
|
|
167
|
+
|
|
168
|
+
| Format | Typical suffixes | Notes |
|
|
169
|
+
| ---- | ---- | ---- |
|
|
170
|
+
| CSV | `.csv` | Header-driven field discovery with scalar inference |
|
|
171
|
+
| JSON | `.json` | Raw object or array data; OpenAPI JSON is auto-detected |
|
|
172
|
+
| JSONL | `.jsonl` | One JSON object per line |
|
|
173
|
+
| Parquet | `.parquet` | Schema extracted with PyArrow |
|
|
174
|
+
| OpenAPI | `.yaml`, `.yml`, `.json` | Local refs, component schemas, request/response schemas |
|
|
175
|
+
| Avro | `.avsc` | Records, arrays, maps, enums, unions |
|
|
176
|
+
| Protobuf | `.proto` | Messages, enums, repeated fields, and maps |
|
|
177
|
+
|
|
178
|
+
## Compatibility Rules
|
|
179
|
+
|
|
180
|
+
`SAFE`
|
|
181
|
+
|
|
182
|
+
- new nullable or optional top-level field
|
|
183
|
+
- column order changed only
|
|
184
|
+
- numeric widening from `integer` to `number` unless `--strict` is enabled
|
|
185
|
+
- no schema change
|
|
186
|
+
|
|
187
|
+
`WARNING`
|
|
188
|
+
|
|
189
|
+
- new required top-level field
|
|
190
|
+
- nested object shape expanded
|
|
191
|
+
- required to nullable change
|
|
192
|
+
- ambiguous or mixed-type widening
|
|
193
|
+
- sample shape changed while remaining string-typed
|
|
194
|
+
- likely rename detected with `--rename-heuristics`
|
|
195
|
+
|
|
196
|
+
`BREAKING`
|
|
197
|
+
|
|
198
|
+
- field removed
|
|
199
|
+
- nullable to required change
|
|
200
|
+
- incompatible type change such as `string -> integer`
|
|
201
|
+
|
|
202
|
+
## Architecture
|
|
203
|
+
|
|
204
|
+
The package uses a small pipeline that mirrors the product brief.
|
|
205
|
+
|
|
206
|
+
```text
|
|
207
|
+
src/schemaglow/
|
|
208
|
+
├── cli.py # Typer command surface
|
|
209
|
+
├── service.py # File and snapshot orchestration
|
|
210
|
+
├── infer.py # Format detection and schema inference
|
|
211
|
+
├── schema_sources.py # OpenAPI, Avro, and protobuf parsers
|
|
212
|
+
├── diffing.py # Compatibility rules and event generation
|
|
213
|
+
├── renderers.py # Text, JSON, Markdown, and HTML output
|
|
214
|
+
└── models.py # Pydantic models for snapshots and reports
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
Processing flow:
|
|
218
|
+
|
|
219
|
+
1. Detect the input format from suffix and schema-document heuristics.
|
|
220
|
+
2. Infer a normalized field map with type, nullability, order, and sample hints.
|
|
221
|
+
3. Compare old and new field sets against compatibility rules.
|
|
222
|
+
4. Aggregate file-level results for scans and baseline checks when needed.
|
|
223
|
+
5. Render the result for humans or CI consumers.
|
|
224
|
+
|
|
225
|
+
## Tools Used
|
|
226
|
+
|
|
227
|
+
| Tool | Purpose |
|
|
228
|
+
| ---- | ------- |
|
|
229
|
+
| `Python 3.11+` | Runtime and packaging baseline |
|
|
230
|
+
| `Typer` | CLI commands and help output |
|
|
231
|
+
| `Rich` | Terminal rendering |
|
|
232
|
+
| `Pydantic` | Snapshot and report models |
|
|
233
|
+
| `PyArrow` | Parquet schema reading and test fixture creation |
|
|
234
|
+
| `PyYAML` | OpenAPI YAML parsing |
|
|
235
|
+
| `Jinja2` | HTML report templating |
|
|
236
|
+
| `pytest` + `pytest-cov` | Unit and integration tests with coverage |
|
|
237
|
+
| `mypy` | Strict type checking |
|
|
238
|
+
| `ruff` | Linting and formatting |
|
|
239
|
+
| `pip-audit` | Dependency vulnerability checks |
|
|
240
|
+
|
|
241
|
+
## Testing and Verification
|
|
242
|
+
|
|
243
|
+
Local verification commands:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
ruff format --check .
|
|
247
|
+
ruff check .
|
|
248
|
+
mypy src
|
|
249
|
+
pytest
|
|
250
|
+
pip-audit
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
Manual end-to-end commands using committed sample files are documented in `TESTING.md`.
|
|
254
|
+
|
|
255
|
+
The automated test suite covers:
|
|
256
|
+
|
|
257
|
+
- CSV inference and numeric widening behavior
|
|
258
|
+
- JSON and JSONL nested shape, nullability, and sample-shape changes
|
|
259
|
+
- OpenAPI, Avro, and protobuf schema parsing
|
|
260
|
+
- nested diff collapsing and rename heuristics
|
|
261
|
+
- snapshot and baseline round-trips
|
|
262
|
+
- CLI integration for `inspect`, `snapshot`, `compare`, `diff`, `scan`, and `baseline`
|
|
263
|
+
- Parquet and directory report generation
|
|
264
|
+
|
|
265
|
+
## Repository Standards
|
|
266
|
+
|
|
267
|
+
- [CONTRIBUTING.md](./CONTRIBUTING.md)
|
|
268
|
+
- [SECURITY.md](./SECURITY.md)
|
|
269
|
+
- [LICENSE](./LICENSE)
|
|
270
|
+
- [CITATION.cff](./CITATION.cff)
|