framelint 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ # Byte-compiled / optimized / cache
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ .Python
8
+ build/
9
+ dist/
10
+ *.egg-info/
11
+ *.egg
12
+ wheels/
13
+
14
+ # Virtual environments
15
+ .venv/
16
+ venv/
17
+ env/
18
+ ENV/
19
+
20
+ # Test / coverage
21
+ .pytest_cache/
22
+ .coverage
23
+ .coverage.*
24
+ coverage.xml
25
+ htmlcov/
26
+ .mypy_cache/
27
+ .ruff_cache/
28
+
29
+ # IDE / OS
30
+ .idea/
31
+ .vscode/
32
+ .DS_Store
@@ -0,0 +1,29 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
5
+ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.1.0] - 2026-06-27
10
+
11
+ ### Added
12
+ - Initial release.
13
+ - Data-quality checks: missingness, duplicate rows, constant/zero-variance
14
+ columns, cardinality (likely-ID and high-cardinality), type consistency
15
+ (numbers-as-strings and mixed types), and numeric outliers (IQR / z-score).
16
+ - Opt-in per-column rules: email, date/datetime, numeric range, regex, and
17
+ allowed-value sets.
18
+ - Schema-drift detection against a saved baseline (added/removed columns, dtype
19
+ changes, null-rate jumps, and distribution shifts).
20
+ - `Report` with `summary()` (rich console), `to_dict`, `to_json`, `to_html`,
21
+ and `to_markdown`, plus a `passed` pass/fail decision.
22
+ - Typer-based CLI (`framelint scan`, `framelint baseline save`) with CI-friendly
23
+ exit codes (0 pass, 1 quality failure, 2 usage error).
24
+ - Configuration via defaults, `pyproject.toml`, standalone TOML, a dict/Config,
25
+ and CLI flags, with a documented precedence order.
26
+ - Full type hints and a `py.typed` marker.
27
+
28
+ [Unreleased]: https://github.com/AnoopIbrampur/framelint/compare/v0.1.0...HEAD
29
+ [0.1.0]: https://github.com/AnoopIbrampur/framelint/releases/tag/v0.1.0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Anoop Ibrampur
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,240 @@
1
+ Metadata-Version: 2.4
2
+ Name: framelint
3
+ Version: 0.1.0
4
+ Summary: A lightweight data-quality profiler and CI gate for tabular data.
5
+ Project-URL: Homepage, https://github.com/AnoopIbrampur/framelint
6
+ Project-URL: Repository, https://github.com/AnoopIbrampur/framelint
7
+ Project-URL: Issues, https://github.com/AnoopIbrampur/framelint/issues
8
+ Project-URL: Changelog, https://github.com/AnoopIbrampur/framelint/blob/main/CHANGELOG.md
9
+ Author-email: Anoop Ibrampur <anoopibrampur@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: ci,data-engineering,data-profiling,data-quality,data-validation,dataframe,pandas,schema-drift
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
24
+ Classifier: Topic :: Software Development :: Quality Assurance
25
+ Classifier: Topic :: Utilities
26
+ Classifier: Typing :: Typed
27
+ Requires-Python: >=3.9
28
+ Requires-Dist: pandas>=1.3
29
+ Requires-Dist: rich>=13.0
30
+ Requires-Dist: tomli>=2.0; python_version < '3.11'
31
+ Requires-Dist: typer>=0.9
32
+ Provides-Extra: dev
33
+ Requires-Dist: build>=1.0; extra == 'dev'
34
+ Requires-Dist: mypy>=1.8; extra == 'dev'
35
+ Requires-Dist: pandas-stubs; extra == 'dev'
36
+ Requires-Dist: pre-commit>=3.0; extra == 'dev'
37
+ Requires-Dist: pyarrow>=10.0; extra == 'dev'
38
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
39
+ Requires-Dist: pytest>=7.0; extra == 'dev'
40
+ Requires-Dist: ruff>=0.6; extra == 'dev'
41
+ Requires-Dist: twine>=5.0; extra == 'dev'
42
+ Provides-Extra: parquet
43
+ Requires-Dist: pyarrow>=10.0; extra == 'parquet'
44
+ Description-Content-Type: text/markdown
45
+
46
+ # framelint
47
+
48
+ [![PyPI version](https://img.shields.io/pypi/v/framelint.svg)](https://pypi.org/project/framelint/)
49
+ [![Python versions](https://img.shields.io/pypi/pyversions/framelint.svg)](https://pypi.org/project/framelint/)
50
+ [![CI](https://github.com/AnoopIbrampur/framelint/actions/workflows/ci.yml/badge.svg)](https://github.com/AnoopIbrampur/framelint/actions/workflows/ci.yml)
51
+ [![codecov](https://codecov.io/gh/AnoopIbrampur/framelint/branch/main/graph/badge.svg)](https://codecov.io/gh/AnoopIbrampur/framelint)
52
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
53
+ [![Typed](https://img.shields.io/badge/typed-yes-brightgreen.svg)](https://peps.python.org/pep-0561/)
54
+
55
+ **A lightweight data-quality profiler and CI gate for tabular data.**
56
+
57
+ `framelint` scans a pandas DataFrame or a CSV/Parquet file and produces a clear
58
+ data-quality report — nulls, duplicates, constant columns, likely-ID columns,
59
+ type inconsistencies, numeric outliers, format violations, and schema drift.
60
+
61
+ Its standout feature: it doubles as a **CI gate**. Point it at your data, set
62
+ thresholds, and it exits non-zero when quality drops — so a bad dataset fails
63
+ the build instead of silently flowing downstream.
64
+
65
+ ---
66
+
67
+ ## Why this exists
68
+
69
+ Data pipelines break quietly. A column starts arriving 40% null, an upstream job
70
+ starts writing numbers as strings, a join silently doubles your rows — and
71
+ nobody notices until a dashboard looks wrong weeks later. `framelint` turns
72
+ those failures into loud, early, automated signals you can drop into CI in one
73
+ line.
74
+
75
+ ## Install
76
+
77
+ ```bash
78
+ pip install framelint
79
+ # Parquet support:
80
+ pip install "framelint[parquet]"
81
+ ```
82
+
83
+ Requires Python 3.9+.
84
+
85
+ ## 30-second quickstart
86
+
87
+ ```python
88
+ import framelint
89
+
90
+ report = framelint.scan("sales.csv") # or pass a DataFrame
91
+ report.summary() # pretty console table
92
+ print(report.passed) # -> True / False
93
+
94
+ report.to_json("report.json") # machine-readable
95
+ report.to_html("report.html") # shareable report
96
+ ```
97
+
98
+ Example console output:
99
+
100
+ ```
101
+ framelint FAILED rows=1000 cols=6 errors=1 warnings=3 info=1
102
+ ┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
103
+ ┃ Severity ┃ Check ┃ Column ┃ Message ┃
104
+ ┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
105
+ │ error │ missingness │ region │ Column 'region' is 62.0% null. │
106
+ │ warning │ duplicates │ — │ Found 12 duplicate rows (full-row). │
107
+ │ warning │ type_consistency │ price │ Column 'price' holds numbers as ... │
108
+ │ warning │ outliers │ amount │ Column 'amount' has 18 outliers ... │
109
+ │ info │ cardinality │ id │ Column 'id' looks like an identifier. │
110
+ └──────────┴──────────────────┴─────────┴───────────────────────────────────────┘
111
+ ```
112
+
113
+ ## Features
114
+
115
+ - **Missingness** — per-column null counts and rates, with severity thresholds.
116
+ - **Duplicate rows** — full-row or by a subset of key columns.
117
+ - **Constant / zero-variance** and all-null columns.
118
+ - **Cardinality** — likely-identifier and high-cardinality column detection.
119
+ - **Type consistency** — numbers stored as strings, mixed-type columns.
120
+ - **Outliers** — numeric outliers via IQR or z-score (configurable).
121
+ - **Format validation** (opt-in) — email, date/datetime, numeric ranges,
122
+ regex, and allowed-value sets, per column.
123
+ - **Schema drift** — save a baseline, then detect added/removed columns, dtype
124
+ changes, null-rate jumps, and distribution shifts.
125
+ - **Severity levels** — every finding is `info`, `warning`, or `error`.
126
+ - **Pass/fail decision** — based on configurable thresholds, for use in CI.
127
+ - **Outputs** — rich console, `dict`, JSON, HTML, and Markdown.
128
+
129
+ ## CLI
130
+
131
+ ```bash
132
+ # Scan and write reports
133
+ framelint scan sales.csv --html report.html --json report.json
134
+
135
+ # Fail the build if any error-level finding is present
136
+ framelint scan sales.csv --fail-on error
137
+
138
+ # Save a baseline, then scan a new file for drift
139
+ framelint baseline save sales.csv baseline.json
140
+ framelint scan new.csv --baseline baseline.json
141
+ ```
142
+
143
+ Exit codes: **0** = passed, **1** = quality failure, **2** = usage error.
144
+
145
+ ### Use it in CI to gate data quality
146
+
147
+ ```yaml
148
+ # .github/workflows/data-quality.yml
149
+ name: data-quality
150
+ on: [push, pull_request]
151
+ jobs:
152
+ check:
153
+ runs-on: ubuntu-latest
154
+ steps:
155
+ - uses: actions/checkout@v4
156
+ - uses: actions/setup-python@v5
157
+ with: { python-version: "3.12" }
158
+ - run: pip install framelint
159
+ - run: framelint scan data/sales.csv --fail-on error --baseline data/baseline.json
160
+ ```
161
+
162
+ If quality drops below your thresholds, the step exits non-zero and the build
163
+ fails — no extra glue code required.
164
+
165
+ ## Configuration
166
+
167
+ Thresholds and per-column rules can be set, in increasing order of precedence:
168
+
169
+ 1. Built-in defaults
170
+ 2. `[tool.framelint]` in `pyproject.toml`
171
+ 3. A standalone TOML file (`--config rules.toml`)
172
+ 4. A `dict` / `Config` passed to `scan(...)`
173
+ 5. Individual CLI flags (e.g. `--fail-on`, `--outlier-method`)
174
+
175
+ ```toml
176
+ # pyproject.toml (or a standalone --config file, same schema)
177
+ [tool.framelint]
178
+ null_rate_warning = 0.10
179
+ null_rate_error = 0.50
180
+ duplicate_rate_error = 0.05
181
+ outlier_method = "iqr" # or "zscore"
182
+ fail_on = "error"
183
+
184
+ [tool.framelint.columns.email]
185
+ type = "email"
186
+
187
+ [tool.framelint.columns.age]
188
+ min = 0
189
+ max = 120
190
+ ```
191
+
192
+ | Key | Default | Meaning |
193
+ | --- | --- | --- |
194
+ | `null_rate_warning` / `null_rate_error` | 0.10 / 0.50 | Null-rate thresholds |
195
+ | `duplicate_rate_warning` / `duplicate_rate_error` | 0.0 / 0.10 | Duplicate-row thresholds |
196
+ | `duplicate_subset` | `null` | Key columns for duplicate detection |
197
+ | `id_cardinality_ratio` | 0.95 | Unique-ratio to flag a likely ID |
198
+ | `high_cardinality_ratio` | 0.50 | Unique-ratio to flag high cardinality |
199
+ | `outlier_method` | `"iqr"` | `iqr` or `zscore` |
200
+ | `iqr_factor` / `zscore_threshold` | 1.5 / 3.0 | Outlier sensitivity |
201
+ | `outlier_rate_warning` / `outlier_rate_error` | 0.01 / 0.10 | Outlier-rate thresholds |
202
+ | `drift_mean_shift` | 3.0 | Mean shift (in baseline std) to flag drift |
203
+ | `drift_null_rate_increase` | 0.10 | Null-rate jump to flag drift |
204
+ | `fail_on` | `"error"` | Severity at/above which `passed` is `False` |
205
+
206
+ Per-column rules (`[tool.framelint.columns.<name>]`): `type` (`email`/`date`/
207
+ `datetime`), `min`, `max`, `regex`, `allowed`.
208
+
209
+ ## Programmatic API
210
+
211
+ ```python
212
+ import framelint
213
+
214
+ # Baseline + drift
215
+ framelint.save_baseline("sales.csv", "baseline.json")
216
+ report = framelint.scan("new.csv", baseline="baseline.json")
217
+
218
+ # Inline configuration
219
+ report = framelint.scan(df, config={"fail_on": "warning", "outlier_method": "zscore"})
220
+
221
+ report.to_dict() # full machine-readable result
222
+ report.to_markdown() # Markdown string
223
+ report.counts_by_severity()
224
+ ```
225
+
226
+ ## Contributing
227
+
228
+ Contributions are welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) and the
229
+ [Code of Conduct](CODE_OF_CONDUCT.md). In short:
230
+
231
+ ```bash
232
+ pip install -e ".[dev]"
233
+ ruff check . && ruff format --check .
234
+ mypy
235
+ pytest
236
+ ```
237
+
238
+ ## License
239
+
240
+ [MIT](LICENSE) © Anoop Ibrampur
@@ -0,0 +1,195 @@
1
+ # framelint
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/framelint.svg)](https://pypi.org/project/framelint/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/framelint.svg)](https://pypi.org/project/framelint/)
5
+ [![CI](https://github.com/AnoopIbrampur/framelint/actions/workflows/ci.yml/badge.svg)](https://github.com/AnoopIbrampur/framelint/actions/workflows/ci.yml)
6
+ [![codecov](https://codecov.io/gh/AnoopIbrampur/framelint/branch/main/graph/badge.svg)](https://codecov.io/gh/AnoopIbrampur/framelint)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
8
+ [![Typed](https://img.shields.io/badge/typed-yes-brightgreen.svg)](https://peps.python.org/pep-0561/)
9
+
10
+ **A lightweight data-quality profiler and CI gate for tabular data.**
11
+
12
+ `framelint` scans a pandas DataFrame or a CSV/Parquet file and produces a clear
13
+ data-quality report — nulls, duplicates, constant columns, likely-ID columns,
14
+ type inconsistencies, numeric outliers, format violations, and schema drift.
15
+
16
+ Its standout feature: it doubles as a **CI gate**. Point it at your data, set
17
+ thresholds, and it exits non-zero when quality drops — so a bad dataset fails
18
+ the build instead of silently flowing downstream.
19
+
20
+ ---
21
+
22
+ ## Why this exists
23
+
24
+ Data pipelines break quietly. A column starts arriving 40% null, an upstream job
25
+ starts writing numbers as strings, a join silently doubles your rows — and
26
+ nobody notices until a dashboard looks wrong weeks later. `framelint` turns
27
+ those failures into loud, early, automated signals you can drop into CI in one
28
+ line.
29
+
30
+ ## Install
31
+
32
+ ```bash
33
+ pip install framelint
34
+ # Parquet support:
35
+ pip install "framelint[parquet]"
36
+ ```
37
+
38
+ Requires Python 3.9+.
39
+
40
+ ## 30-second quickstart
41
+
42
+ ```python
43
+ import framelint
44
+
45
+ report = framelint.scan("sales.csv") # or pass a DataFrame
46
+ report.summary() # pretty console table
47
+ print(report.passed) # -> True / False
48
+
49
+ report.to_json("report.json") # machine-readable
50
+ report.to_html("report.html") # shareable report
51
+ ```
52
+
53
+ Example console output:
54
+
55
+ ```
56
+ framelint FAILED rows=1000 cols=6 errors=1 warnings=3 info=1
57
+ ┏━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
58
+ ┃ Severity ┃ Check ┃ Column ┃ Message ┃
59
+ ┡━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
60
+ │ error │ missingness │ region │ Column 'region' is 62.0% null. │
61
+ │ warning │ duplicates │ — │ Found 12 duplicate rows (full-row). │
62
+ │ warning │ type_consistency │ price │ Column 'price' holds numbers as ... │
63
+ │ warning │ outliers │ amount │ Column 'amount' has 18 outliers ... │
64
+ │ info │ cardinality │ id │ Column 'id' looks like an identifier. │
65
+ └──────────┴──────────────────┴─────────┴───────────────────────────────────────┘
66
+ ```
67
+
68
+ ## Features
69
+
70
+ - **Missingness** — per-column null counts and rates, with severity thresholds.
71
+ - **Duplicate rows** — full-row or by a subset of key columns.
72
+ - **Constant / zero-variance** and all-null columns.
73
+ - **Cardinality** — likely-identifier and high-cardinality column detection.
74
+ - **Type consistency** — numbers stored as strings, mixed-type columns.
75
+ - **Outliers** — numeric outliers via IQR or z-score (configurable).
76
+ - **Format validation** (opt-in) — email, date/datetime, numeric ranges,
77
+ regex, and allowed-value sets, per column.
78
+ - **Schema drift** — save a baseline, then detect added/removed columns, dtype
79
+ changes, null-rate jumps, and distribution shifts.
80
+ - **Severity levels** — every finding is `info`, `warning`, or `error`.
81
+ - **Pass/fail decision** — based on configurable thresholds, for use in CI.
82
+ - **Outputs** — rich console, `dict`, JSON, HTML, and Markdown.
83
+
84
+ ## CLI
85
+
86
+ ```bash
87
+ # Scan and write reports
88
+ framelint scan sales.csv --html report.html --json report.json
89
+
90
+ # Fail the build if any error-level finding is present
91
+ framelint scan sales.csv --fail-on error
92
+
93
+ # Save a baseline, then scan a new file for drift
94
+ framelint baseline save sales.csv baseline.json
95
+ framelint scan new.csv --baseline baseline.json
96
+ ```
97
+
98
+ Exit codes: **0** = passed, **1** = quality failure, **2** = usage error.
99
+
100
+ ### Use it in CI to gate data quality
101
+
102
+ ```yaml
103
+ # .github/workflows/data-quality.yml
104
+ name: data-quality
105
+ on: [push, pull_request]
106
+ jobs:
107
+ check:
108
+ runs-on: ubuntu-latest
109
+ steps:
110
+ - uses: actions/checkout@v4
111
+ - uses: actions/setup-python@v5
112
+ with: { python-version: "3.12" }
113
+ - run: pip install framelint
114
+ - run: framelint scan data/sales.csv --fail-on error --baseline data/baseline.json
115
+ ```
116
+
117
+ If quality drops below your thresholds, the step exits non-zero and the build
118
+ fails — no extra glue code required.
119
+
120
+ ## Configuration
121
+
122
+ Thresholds and per-column rules can be set, in increasing order of precedence:
123
+
124
+ 1. Built-in defaults
125
+ 2. `[tool.framelint]` in `pyproject.toml`
126
+ 3. A standalone TOML file (`--config rules.toml`)
127
+ 4. A `dict` / `Config` passed to `scan(...)`
128
+ 5. Individual CLI flags (e.g. `--fail-on`, `--outlier-method`)
129
+
130
+ ```toml
131
+ # pyproject.toml (or a standalone --config file, same schema)
132
+ [tool.framelint]
133
+ null_rate_warning = 0.10
134
+ null_rate_error = 0.50
135
+ duplicate_rate_error = 0.05
136
+ outlier_method = "iqr" # or "zscore"
137
+ fail_on = "error"
138
+
139
+ [tool.framelint.columns.email]
140
+ type = "email"
141
+
142
+ [tool.framelint.columns.age]
143
+ min = 0
144
+ max = 120
145
+ ```
146
+
147
+ | Key | Default | Meaning |
148
+ | --- | --- | --- |
149
+ | `null_rate_warning` / `null_rate_error` | 0.10 / 0.50 | Null-rate thresholds |
150
+ | `duplicate_rate_warning` / `duplicate_rate_error` | 0.0 / 0.10 | Duplicate-row thresholds |
151
+ | `duplicate_subset` | `null` | Key columns for duplicate detection |
152
+ | `id_cardinality_ratio` | 0.95 | Unique-ratio to flag a likely ID |
153
+ | `high_cardinality_ratio` | 0.50 | Unique-ratio to flag high cardinality |
154
+ | `outlier_method` | `"iqr"` | `iqr` or `zscore` |
155
+ | `iqr_factor` / `zscore_threshold` | 1.5 / 3.0 | Outlier sensitivity |
156
+ | `outlier_rate_warning` / `outlier_rate_error` | 0.01 / 0.10 | Outlier-rate thresholds |
157
+ | `drift_mean_shift` | 3.0 | Mean shift (in baseline std) to flag drift |
158
+ | `drift_null_rate_increase` | 0.10 | Null-rate jump to flag drift |
159
+ | `fail_on` | `"error"` | Severity at/above which `passed` is `False` |
160
+
161
+ Per-column rules (`[tool.framelint.columns.<name>]`): `type` (`email`/`date`/
162
+ `datetime`), `min`, `max`, `regex`, `allowed`.
163
+
164
+ ## Programmatic API
165
+
166
+ ```python
167
+ import framelint
168
+
169
+ # Baseline + drift
170
+ framelint.save_baseline("sales.csv", "baseline.json")
171
+ report = framelint.scan("new.csv", baseline="baseline.json")
172
+
173
+ # Inline configuration
174
+ report = framelint.scan(df, config={"fail_on": "warning", "outlier_method": "zscore"})
175
+
176
+ report.to_dict() # full machine-readable result
177
+ report.to_markdown() # Markdown string
178
+ report.counts_by_severity()
179
+ ```
180
+
181
+ ## Contributing
182
+
183
+ Contributions are welcome — see [CONTRIBUTING.md](CONTRIBUTING.md) and the
184
+ [Code of Conduct](CODE_OF_CONDUCT.md). In short:
185
+
186
+ ```bash
187
+ pip install -e ".[dev]"
188
+ ruff check . && ruff format --check .
189
+ mypy
190
+ pytest
191
+ ```
192
+
193
+ ## License
194
+
195
+ [MIT](LICENSE) © Anoop Ibrampur
@@ -0,0 +1,149 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "framelint"
7
+ dynamic = ["version"]
8
+ description = "A lightweight data-quality profiler and CI gate for tabular data."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Anoop Ibrampur", email = "anoopibrampur@gmail.com" }]
13
+ keywords = [
14
+ "data-quality",
15
+ "data-validation",
16
+ "data-profiling",
17
+ "pandas",
18
+ "dataframe",
19
+ "ci",
20
+ "data-engineering",
21
+ "schema-drift",
22
+ ]
23
+ classifiers = [
24
+ "Development Status :: 4 - Beta",
25
+ "Intended Audience :: Developers",
26
+ "Intended Audience :: Science/Research",
27
+ "License :: OSI Approved :: MIT License",
28
+ "Operating System :: OS Independent",
29
+ "Programming Language :: Python :: 3",
30
+ "Programming Language :: Python :: 3.9",
31
+ "Programming Language :: Python :: 3.10",
32
+ "Programming Language :: Python :: 3.11",
33
+ "Programming Language :: Python :: 3.12",
34
+ "Topic :: Scientific/Engineering :: Information Analysis",
35
+ "Topic :: Software Development :: Quality Assurance",
36
+ "Topic :: Utilities",
37
+ "Typing :: Typed",
38
+ ]
39
+ dependencies = [
40
+ "pandas>=1.3",
41
+ "rich>=13.0",
42
+ "typer>=0.9",
43
+ "tomli>=2.0; python_version < '3.11'",
44
+ ]
45
+
46
+ [project.optional-dependencies]
47
+ parquet = ["pyarrow>=10.0"]
48
+ dev = [
49
+ "framelint[parquet]",
50
+ "pytest>=7.0",
51
+ "pytest-cov>=4.0",
52
+ "ruff>=0.6",
53
+ "mypy>=1.8",
54
+ "pandas-stubs",
55
+ "build>=1.0",
56
+ "twine>=5.0",
57
+ "pre-commit>=3.0",
58
+ ]
59
+
60
+ [project.urls]
61
+ Homepage = "https://github.com/AnoopIbrampur/framelint"
62
+ Repository = "https://github.com/AnoopIbrampur/framelint"
63
+ Issues = "https://github.com/AnoopIbrampur/framelint/issues"
64
+ Changelog = "https://github.com/AnoopIbrampur/framelint/blob/main/CHANGELOG.md"
65
+
66
+ [project.scripts]
67
+ framelint = "framelint.cli:main"
68
+
69
+ [tool.hatch.version]
70
+ path = "src/framelint/__init__.py"
71
+
72
+ [tool.hatch.build.targets.wheel]
73
+ packages = ["src/framelint"]
74
+
75
+ [tool.hatch.build.targets.sdist]
76
+ include = ["src/framelint", "tests", "README.md", "LICENSE", "CHANGELOG.md"]
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Ruff: linter + formatter
80
+ # ---------------------------------------------------------------------------
81
+ [tool.ruff]
82
+ line-length = 100
83
+ target-version = "py39"
84
+ src = ["src", "tests"]
85
+
86
+ [tool.ruff.lint]
87
+ select = [
88
+ "E", # pycodestyle errors
89
+ "W", # pycodestyle warnings
90
+ "F", # pyflakes
91
+ "I", # isort
92
+ "N", # pep8-naming
93
+ "UP", # pyupgrade
94
+ "B", # flake8-bugbear
95
+ "C4", # flake8-comprehensions
96
+ "SIM", # flake8-simplify
97
+ "PTH", # flake8-use-pathlib
98
+ "ARG", # flake8-unused-arguments
99
+ "RUF", # ruff-specific
100
+ "D", # pydocstyle
101
+ ]
102
+ ignore = [
103
+ "D203", # one-blank-line-before-class (conflicts with D211)
104
+ "D213", # multi-line-summary-second-line (conflicts with D212)
105
+ ]
106
+
107
+ [tool.ruff.lint.pydocstyle]
108
+ convention = "google"
109
+
110
+ [tool.ruff.lint.per-file-ignores]
111
+ "tests/**" = ["D", "ARG"]
112
+ "examples/**" = ["D", "ARG"]
113
+ # Typer relies on function-call defaults (typer.Argument/Option) by design, and
114
+ # evaluates annotations at runtime, so `Optional[X]` is required for Python 3.9.
115
+ "src/framelint/cli.py" = ["B008", "UP007", "UP045"]
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # mypy: strict type checking
119
+ # ---------------------------------------------------------------------------
120
+ [tool.mypy]
121
+ strict = true
122
+ warn_unreachable = true
123
+ files = ["src", "tests"]
124
+
125
+ [[tool.mypy.overrides]]
126
+ module = ["tests.*"]
127
+ disallow_untyped_defs = false
128
+ disallow_incomplete_defs = false
129
+ disallow_untyped_calls = false
130
+
131
+ # ---------------------------------------------------------------------------
132
+ # pytest + coverage
133
+ # ---------------------------------------------------------------------------
134
+ [tool.pytest.ini_options]
135
+ minversion = "7.0"
136
+ addopts = "-ra --cov=framelint --cov-report=term-missing --cov-report=xml"
137
+ testpaths = ["tests"]
138
+
139
+ [tool.coverage.run]
140
+ branch = true
141
+ source = ["framelint"]
142
+
143
+ [tool.coverage.report]
144
+ show_missing = true
145
+ exclude_lines = [
146
+ "pragma: no cover",
147
+ "if TYPE_CHECKING:",
148
+ "raise NotImplementedError",
149
+ ]