schemaglow 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. schemaglow-1.0.0/.github/workflows/ci.yml +43 -0
  2. schemaglow-1.0.0/.github/workflows/pypi_publish.yml +49 -0
  3. schemaglow-1.0.0/.gitignore +60 -0
  4. schemaglow-1.0.0/.pre-commit-config.yaml +47 -0
  5. schemaglow-1.0.0/CITATION.cff +22 -0
  6. schemaglow-1.0.0/CONTRIBUTING.md +31 -0
  7. schemaglow-1.0.0/LICENSE +21 -0
  8. schemaglow-1.0.0/PKG-INFO +270 -0
  9. schemaglow-1.0.0/README.md +235 -0
  10. schemaglow-1.0.0/SECURITY.md +21 -0
  11. schemaglow-1.0.0/TESTING.md +318 -0
  12. schemaglow-1.0.0/pyproject.toml +110 -0
  13. schemaglow-1.0.0/requirements-dev.txt +1 -0
  14. schemaglow-1.0.0/requirements.txt +6 -0
  15. schemaglow-1.0.0/src/schemaglow/__init__.py +24 -0
  16. schemaglow-1.0.0/src/schemaglow/cli.py +282 -0
  17. schemaglow-1.0.0/src/schemaglow/diffing.py +334 -0
  18. schemaglow-1.0.0/src/schemaglow/infer.py +348 -0
  19. schemaglow-1.0.0/src/schemaglow/models.py +119 -0
  20. schemaglow-1.0.0/src/schemaglow/renderers.py +371 -0
  21. schemaglow-1.0.0/src/schemaglow/schema_sources.py +701 -0
  22. schemaglow-1.0.0/src/schemaglow/service.py +240 -0
  23. schemaglow-1.0.0/tests/conftest.py +10 -0
  24. schemaglow-1.0.0/tests/fixtures/downloaded/SOURCES.md +26 -0
  25. schemaglow-1.0.0/tests/fixtures/downloaded/addressbook.proto +56 -0
  26. schemaglow-1.0.0/tests/fixtures/downloaded/alltypes_plain.parquet +0 -0
  27. schemaglow-1.0.0/tests/fixtures/downloaded/example.jsonl +5 -0
  28. schemaglow-1.0.0/tests/fixtures/downloaded/miserables.json +1 -0
  29. schemaglow-1.0.0/tests/fixtures/downloaded/petstore.yaml +839 -0
  30. schemaglow-1.0.0/tests/fixtures/downloaded/seattle-weather.csv +1462 -0
  31. schemaglow-1.0.0/tests/fixtures/downloaded/user.avsc +9 -0
  32. schemaglow-1.0.0/tests/fixtures/manual/avro/user-baseline.avsc +25 -0
  33. schemaglow-1.0.0/tests/fixtures/manual/avro/user-candidate.avsc +29 -0
  34. schemaglow-1.0.0/tests/fixtures/manual/csv/weather-baseline.csv +6 -0
  35. schemaglow-1.0.0/tests/fixtures/manual/csv/weather-candidate.csv +6 -0
  36. schemaglow-1.0.0/tests/fixtures/manual/json/miserables-baseline.json +35 -0
  37. schemaglow-1.0.0/tests/fixtures/manual/json/miserables-candidate.json +36 -0
  38. schemaglow-1.0.0/tests/fixtures/manual/jsonl/search-baseline.jsonl +3 -0
  39. schemaglow-1.0.0/tests/fixtures/manual/jsonl/search-candidate.jsonl +3 -0
  40. schemaglow-1.0.0/tests/fixtures/manual/openapi/petstore-baseline.yaml +851 -0
  41. schemaglow-1.0.0/tests/fixtures/manual/openapi/petstore-candidate.yaml +854 -0
  42. schemaglow-1.0.0/tests/fixtures/manual/options/ignore-new.jsonl +2 -0
  43. schemaglow-1.0.0/tests/fixtures/manual/options/ignore-old.jsonl +2 -0
  44. schemaglow-1.0.0/tests/fixtures/manual/options/rename-new.csv +3 -0
  45. schemaglow-1.0.0/tests/fixtures/manual/options/rename-old.csv +3 -0
  46. schemaglow-1.0.0/tests/fixtures/manual/options/strict-new.csv +3 -0
  47. schemaglow-1.0.0/tests/fixtures/manual/options/strict-old.csv +3 -0
  48. schemaglow-1.0.0/tests/fixtures/manual/parquet/alltypes-baseline.parquet +0 -0
  49. schemaglow-1.0.0/tests/fixtures/manual/parquet/alltypes-candidate.parquet +0 -0
  50. schemaglow-1.0.0/tests/fixtures/manual/proto/addressbook-baseline.proto +56 -0
  51. schemaglow-1.0.0/tests/fixtures/manual/proto/addressbook-candidate.proto +57 -0
  52. schemaglow-1.0.0/tests/fixtures/manual/scan/new/addressbook.proto +57 -0
  53. schemaglow-1.0.0/tests/fixtures/manual/scan/new/petstore.yaml +854 -0
  54. schemaglow-1.0.0/tests/fixtures/manual/scan/new/weather.csv +6 -0
  55. schemaglow-1.0.0/tests/fixtures/manual/scan/old/petstore.yaml +851 -0
  56. schemaglow-1.0.0/tests/fixtures/manual/scan/old/weather.csv +6 -0
  57. schemaglow-1.0.0/tests/integration/test_cli.py +154 -0
  58. schemaglow-1.0.0/tests/integration/test_sample_fixtures.py +142 -0
  59. schemaglow-1.0.0/tests/unit/test_infer_and_diff.py +146 -0
  60. schemaglow-1.0.0/tests/unit/test_options_and_edge_cases.py +121 -0
  61. schemaglow-1.0.0/tests/unit/test_renderers_and_service.py +148 -0
  62. schemaglow-1.0.0/tests/unit/test_schema_sources.py +176 -0
@@ -0,0 +1,43 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ pull_request:
7
+ branches: ["main"]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+ cache: "pip"
24
+
25
+ - name: Install dependencies
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ pip install -e ".[dev]"
29
+
30
+ - name: Check formatting
31
+ run: ruff format --check .
32
+
33
+ - name: Run linter
34
+ run: ruff check .
35
+
36
+ - name: Run type checks
37
+ run: mypy src
38
+
39
+ - name: Run tests
40
+ run: pytest -v
41
+
42
+ - name: Run security audit
43
+ run: pip-audit
@@ -0,0 +1,49 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ release:
6
+ types: [published]
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ contents: read
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.12"
20
+
21
+ - name: Build
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install build twine
25
+ python -m build
26
+ python -m twine check dist/*
27
+
28
+ - name: Upload build artifacts
29
+ uses: actions/upload-artifact@v4
30
+ with:
31
+ name: dist
32
+ path: dist/*
33
+
34
+ publish:
35
+ runs-on: ubuntu-latest
36
+ needs: build
37
+ permissions:
38
+ id-token: write
39
+ environment:
40
+ name: pypi
41
+ steps:
42
+ - name: Download artifacts
43
+ uses: actions/download-artifact@v4
44
+ with:
45
+ name: dist
46
+ path: dist
47
+
48
+ - name: Publish to PyPI
49
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,60 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+
5
+ build/
6
+ dist/
7
+ *.egg-info/
8
+ .eggs/
9
+ *.egg
10
+ pip-wheel-metadata/
11
+
12
+ .venv/
13
+ venv/
14
+ env/
15
+ ENV/
16
+
17
+ .pytest_cache/
18
+ .mypy_cache/
19
+ .ruff_cache/
20
+ .hypothesis/
21
+ .tox/
22
+ .nox/
23
+ .coverage
24
+ .coverage.*
25
+ htmlcov/
26
+
27
+ .pyre/
28
+ .pytype/
29
+
30
+ .python-version
31
+ .poetry/
32
+ poetry.lock
33
+ Pipfile.lock
34
+ pdm.lock
35
+
36
+ .vscode/
37
+ .idea/
38
+ *.swp
39
+ *.swo
40
+ *~
41
+
42
+ .DS_Store
43
+ Thumbs.db
44
+ Desktop.ini
45
+
46
+ .ipynb_checkpoints/
47
+
48
+ *.log
49
+ tmp/
50
+ temp/
51
+ *.tmp
52
+
53
+ .env
54
+ .env.*
55
+ *.pem
56
+ *.key
57
+ *.p12
58
+ release.sh
59
+ requirement.txt
60
+ skills/
@@ -0,0 +1,47 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v6.0.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-toml
9
+ - id: check-json
10
+ - id: check-merge-conflict
11
+ - id: detect-private-key
12
+
13
+ - repo: https://github.com/asottile/pyupgrade
14
+ rev: v3.21.2
15
+ hooks:
16
+ - id: pyupgrade
17
+ args: [--py311-plus]
18
+
19
+ - repo: https://github.com/astral-sh/ruff-pre-commit
20
+ rev: v0.15.1
21
+ hooks:
22
+ - id: ruff-check
23
+ args: [--fix]
24
+ - id: ruff-format
25
+
26
+ - repo: https://github.com/pre-commit/mirrors-mypy
27
+ rev: v1.19.1
28
+ hooks:
29
+ - id: mypy
30
+ args: [--config-file=pyproject.toml]
31
+ additional_dependencies: [pydantic, pyarrow, rich, typer]
32
+
33
+ - repo: https://github.com/pypa/pip-audit
34
+ rev: v2.10.0
35
+ hooks:
36
+ - id: pip-audit
37
+ args: [--progress-spinner=off]
38
+ stages: [manual]
39
+
40
+ - repo: local
41
+ hooks:
42
+ - id: pytest
43
+ name: pytest
44
+ entry: python -m pytest -q
45
+ language: system
46
+ pass_filenames: false
47
+ stages: [manual]
@@ -0,0 +1,22 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite it as below."
3
+ authors:
4
+ - family-names: "Adwant"
5
+ given-names: "Goutam"
6
+ alias: "adwantg"
7
+ title: "SchemaGlow: Human-friendly schema diff and contract drift detection for CSV, JSON, JSONL, Parquet, OpenAPI, Avro, and protobuf."
8
+ version: "1.0.0"
9
+ date-released: "2026-03-09"
10
+ url: "https://github.com/adwantg/schemaglow"
11
+ repository-code: "https://github.com/adwantg/schemaglow"
12
+ license: MIT
13
+ keywords:
14
+ - python
15
+ - schema-diff
16
+ - openapi
17
+ - avro
18
+ - protobuf
19
+ - parquet
20
+ - json
21
+ - jsonl
22
+ - csv
@@ -0,0 +1,31 @@
1
+ # Contributing to SchemaGlow
2
+
3
+ ## Developer Quick Start
4
+
5
+ ```bash
6
+ git clone https://github.com/gadwant/schemaglow.git
7
+ cd schemaglow
8
+
9
+ python3 -m venv .venv
10
+ source .venv/bin/activate
11
+
12
+ pip install -e ".[dev]"
13
+ pre-commit install
14
+ ```
15
+
16
+ ## Quality Gates
17
+
18
+ ```bash
19
+ ruff format .
20
+ ruff check .
21
+ mypy src
22
+ pytest
23
+ pip-audit
24
+ ```
25
+
26
+ ## Pull Request Requirements
27
+
28
+ 1. Keep tests, docs, and CLI examples aligned with the implemented behavior.
29
+ 2. Do not reduce coverage below 90%.
30
+ 3. Add or update tests for every user-visible behavior change.
31
+ 4. Update `README.md` whenever commands, flags, report formats, or compatibility rules change.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Goutam adwant
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,270 @@
1
+ Metadata-Version: 2.3
2
+ Name: schemaglow
3
+ Version: 1.0.0
4
+ Summary: Human-friendly schema diff and contract drift detection for CSV, JSON, JSONL, Parquet, OpenAPI, Avro, and protobuf.
5
+ Project-URL: Homepage, https://github.com/adwantg/schemaglow
6
+ Project-URL: Repository, https://github.com/adwantg/schemaglow
7
+ Project-URL: Issues, https://github.com/adwantg/schemaglow/issues
8
+ Project-URL: Documentation, https://github.com/adwantg/schemaglow#readme
9
+ Author-email: Goutam Adwant <workwithgoutam@gmail.com>
10
+ License: MIT
11
+ Keywords: avro,cli,csv,data-quality,json,jsonl,openapi,parquet,protobuf,schema-diff
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.11
19
+ Requires-Dist: jinja2>=3.1.0
20
+ Requires-Dist: pyarrow>=16.1.0
21
+ Requires-Dist: pydantic>=2.8.0
22
+ Requires-Dist: pyyaml>=6.0.0
23
+ Requires-Dist: rich>=13.9.0
24
+ Requires-Dist: typer>=0.16.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: build>=1.2.0; extra == 'dev'
27
+ Requires-Dist: mypy>=1.13.0; extra == 'dev'
28
+ Requires-Dist: pip-audit>=2.8.0; extra == 'dev'
29
+ Requires-Dist: pre-commit>=4.0.0; extra == 'dev'
30
+ Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
31
+ Requires-Dist: pytest>=8.3.0; extra == 'dev'
32
+ Requires-Dist: ruff>=0.8.0; extra == 'dev'
33
+ Requires-Dist: twine>=6.0.0; extra == 'dev'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # SchemaGlow
37
+
38
+ Human-friendly schema diff for CSV, JSON, JSONL, Parquet, OpenAPI, Avro, and protobuf.
39
+
40
+ SchemaGlow compares data files, schema artifacts, directory trees, and saved contract snapshots. It tells you what changed, whether it is safe, and what might break. It is built for pull request review, CI checks, repository-wide drift scans, and baseline contract validation when raw git diffs are not enough.
41
+
42
+ ## Why
43
+
44
+ Most nearby tools validate data contracts, inspect file structure, or diff technical schemas in a format-specific way. SchemaGlow focuses on a narrower workflow:
45
+
46
+ - compare two file versions quickly
47
+ - explain changes in plain language
48
+ - classify impact as `SAFE`, `WARNING`, or `BREAKING`
49
+ - export machine-readable and review-friendly reports
50
+
51
+ ## Features
52
+
53
+ - Compare `CSV`, `JSON`, `JSONL`, `Parquet`, `OpenAPI`, `Avro`, and `protobuf` sources with one CLI.
54
+ - Infer normalized schema snapshots from both raw data files and schema-definition files.
55
+ - Classify compatibility changes as `SAFE`, `WARNING`, or `BREAKING`.
56
+ - Export diff output as terminal text, JSON, Markdown, or HTML.
57
+ - Save schema snapshots and compare them later without re-reading source files.
58
+ - Scan two directory trees recursively and aggregate drift into one report.
59
+ - Capture baseline contract files and check candidate trees against committed baselines.
60
+ - Detect optional nested expansions, removals, type changes, nullability changes, sample-shape ambiguity, and column-order-only changes.
61
+ - Support ignore rules, strict numeric widening, and rename heuristics with sample overlap.
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ pip install schemaglow
67
+ ```
68
+
69
+ For local development:
70
+
71
+ ```bash
72
+ python3 -m venv .venv
73
+ source .venv/bin/activate
74
+ pip install -e ".[dev]"
75
+ ```
76
+
77
+ ## CLI
78
+
79
+ ### `schemaglow diff`
80
+
81
+ Compare two files directly.
82
+
83
+ ```bash
84
+ schemaglow diff old.parquet new.parquet
85
+ schemaglow diff baseline.jsonl candidate.jsonl --format json
86
+ schemaglow diff old.openapi.yaml new.openapi.yaml
87
+ schemaglow diff old.avsc new.avsc
88
+ schemaglow diff old.proto new.proto --report html --report-path proto-report.html
89
+ schemaglow diff old.csv new.csv --report markdown --report-path schema-report.md
90
+ schemaglow diff old.csv new.csv --ignore-fields '(^_loaded_at$|^metadata\.)'
91
+ schemaglow diff old.csv new.csv --strict --rename-heuristics
92
+ ```
93
+
94
+ Example text output:
95
+
96
+ ```text
97
+ SchemaGlow Report
98
+
99
+ BREAKING
100
+ old: old.csv
101
+ new: new.csv
102
+ counts: SAFE=1 WARNING=0 BREAKING=1
103
+ BREAKING
104
+ - removed field: order_total
105
+ SAFE
106
+ + column order changed only
107
+ ```
108
+
109
+ ### `schemaglow inspect`
110
+
111
+ Infer a snapshot from one file and print its normalized field model.
112
+
113
+ ```bash
114
+ schemaglow inspect data.json
115
+ schemaglow inspect data.parquet --format json
116
+ schemaglow inspect openapi.yaml --format json
117
+ schemaglow inspect schema.proto
118
+ ```
119
+
120
+ ### `schemaglow snapshot`
121
+
122
+ Persist an inferred snapshot to JSON for later comparison.
123
+
124
+ ```bash
125
+ schemaglow snapshot data.jsonl -o snapshots/baseline.schema.json
126
+ schemaglow snapshot schema.avsc -o snapshots/avro.schema.json
127
+ ```
128
+
129
+ ### `schemaglow compare`
130
+
131
+ Compare two saved schema snapshots.
132
+
133
+ ```bash
134
+ schemaglow compare old.schema.json new.schema.json
135
+ schemaglow compare old.schema.json new.schema.json --format json
136
+ ```
137
+
138
+ ### `schemaglow scan`
139
+
140
+ Compare two directory trees recursively and aggregate the results.
141
+
142
+ ```bash
143
+ schemaglow scan datasets/baseline datasets/candidate
144
+ schemaglow scan specs/old specs/new --format json
145
+ schemaglow scan repo-old repo-new --pattern '*.proto' --report markdown --report-path scan.md
146
+ ```
147
+
148
+ ### `schemaglow baseline capture`
149
+
150
+ Capture a repository-local contract baseline made of saved snapshots.
151
+
152
+ ```bash
153
+ schemaglow baseline capture data/ -o .schemaglow-baseline
154
+ schemaglow baseline capture specs/ -o contracts/api --pattern '*.yaml'
155
+ ```
156
+
157
+ ### `schemaglow baseline check`
158
+
159
+ Compare a candidate tree against committed baseline contract files.
160
+
161
+ ```bash
162
+ schemaglow baseline check .schemaglow-baseline data/
163
+ schemaglow baseline check contracts/api specs/ --format json
164
+ ```
165
+
166
+ ## Supported Inputs
167
+
168
+ | Format | Typical suffixes | Notes |
169
+ | ---- | ---- | ---- |
170
+ | CSV | `.csv` | Header-driven field discovery with scalar inference |
171
+ | JSON | `.json` | Raw object or array data; OpenAPI JSON is auto-detected |
172
+ | JSONL | `.jsonl` | One JSON object per line |
173
+ | Parquet | `.parquet` | Schema extracted with PyArrow |
174
+ | OpenAPI | `.yaml`, `.yml`, `.json` | Local refs, component schemas, request/response schemas |
175
+ | Avro | `.avsc` | Records, arrays, maps, enums, unions |
176
+ | Protobuf | `.proto` | Messages, enums, repeated fields, and maps |
177
+
178
+ ## Compatibility Rules
179
+
180
+ `SAFE`
181
+
182
+ - new nullable or optional top-level field
183
+ - column order changed only
184
+ - numeric widening from `integer` to `number` unless `--strict` is enabled
185
+ - no schema change
186
+
187
+ `WARNING`
188
+
189
+ - new required top-level field
190
+ - nested object shape expanded
191
+ - required to nullable change
192
+ - ambiguous or mixed-type widening
193
+ - sample shape changed while remaining string-typed
194
+ - likely rename detected with `--rename-heuristics`
195
+
196
+ `BREAKING`
197
+
198
+ - field removed
199
+ - nullable to required change
200
+ - incompatible type change such as `string -> integer`
201
+
202
+ ## Architecture
203
+
204
+ The package uses a small pipeline that mirrors the product brief.
205
+
206
+ ```text
207
+ src/schemaglow/
208
+ ├── cli.py # Typer command surface
209
+ ├── service.py # File and snapshot orchestration
210
+ ├── infer.py # Format detection and schema inference
211
+ ├── schema_sources.py # OpenAPI, Avro, and protobuf parsers
212
+ ├── diffing.py # Compatibility rules and event generation
213
+ ├── renderers.py # Text, JSON, Markdown, and HTML output
214
+ └── models.py # Pydantic models for snapshots and reports
215
+ ```
216
+
217
+ Processing flow:
218
+
219
+ 1. Detect the input format from suffix and schema-document heuristics.
220
+ 2. Infer a normalized field map with type, nullability, order, and sample hints.
221
+ 3. Compare old and new field sets against compatibility rules.
222
+ 4. Aggregate file-level results for scans and baseline checks when needed.
223
+ 5. Render the result for humans or CI consumers.
224
+
225
+ ## Tools Used
226
+
227
+ | Tool | Purpose |
228
+ | ---- | ------- |
229
+ | `Python 3.11+` | Runtime and packaging baseline |
230
+ | `Typer` | CLI commands and help output |
231
+ | `Rich` | Terminal rendering |
232
+ | `Pydantic` | Snapshot and report models |
233
+ | `PyArrow` | Parquet schema reading and test fixture creation |
234
+ | `PyYAML` | OpenAPI YAML parsing |
235
+ | `Jinja2` | HTML report templating |
236
+ | `pytest` + `pytest-cov` | Unit and integration tests with coverage |
237
+ | `mypy` | Strict type checking |
238
+ | `ruff` | Linting and formatting |
239
+ | `pip-audit` | Dependency vulnerability checks |
240
+
241
+ ## Testing and Verification
242
+
243
+ Local verification commands:
244
+
245
+ ```bash
246
+ ruff format --check .
247
+ ruff check .
248
+ mypy src
249
+ pytest
250
+ pip-audit
251
+ ```
252
+
253
+ Manual end-to-end commands using committed sample files are documented in `TESTING.md`.
254
+
255
+ The automated test suite covers:
256
+
257
+ - CSV inference and numeric widening behavior
258
+ - JSON and JSONL nested shape, nullability, and sample-shape changes
259
+ - OpenAPI, Avro, and protobuf schema parsing
260
+ - nested diff collapsing and rename heuristics
261
+ - snapshot and baseline round-trips
262
+ - CLI integration for `inspect`, `snapshot`, `compare`, `diff`, `scan`, and `baseline`
263
+ - Parquet and directory report generation
264
+
265
+ ## Repository Standards
266
+
267
+ - [CONTRIBUTING.md](./CONTRIBUTING.md)
268
+ - [SECURITY.md](./SECURITY.md)
269
+ - [LICENSE](./LICENSE)
270
+ - [CITATION.cff](./CITATION.cff)