fairscope 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. fairscope-0.3.0/.gitattributes +10 -0
  2. fairscope-0.3.0/.github/workflows/ci.yml +40 -0
  3. fairscope-0.3.0/.gitignore +55 -0
  4. fairscope-0.3.0/.pre-commit-config.yaml +22 -0
  5. fairscope-0.3.0/CHANGELOG.md +66 -0
  6. fairscope-0.3.0/CITATION.cff +80 -0
  7. fairscope-0.3.0/CONTRIBUTING.md +41 -0
  8. fairscope-0.3.0/LICENSE +21 -0
  9. fairscope-0.3.0/PKG-INFO +202 -0
  10. fairscope-0.3.0/README.md +149 -0
  11. fairscope-0.3.0/docs/DESIGN.md +88 -0
  12. fairscope-0.3.0/docs/api.md +27 -0
  13. fairscope-0.3.0/docs/cpfe.md +57 -0
  14. fairscope-0.3.0/docs/federated.md +58 -0
  15. fairscope-0.3.0/docs/getting-started.md +66 -0
  16. fairscope-0.3.0/docs/healthcare.md +46 -0
  17. fairscope-0.3.0/docs/index.md +38 -0
  18. fairscope-0.3.0/docs/lending.md +53 -0
  19. fairscope-0.3.0/docs/planned-issues.md +57 -0
  20. fairscope-0.3.0/fairscope/__init__.py +55 -0
  21. fairscope-0.3.0/fairscope/core/__init__.py +48 -0
  22. fairscope-0.3.0/fairscope/core/_utils.py +44 -0
  23. fairscope-0.3.0/fairscope/core/bootstrap.py +66 -0
  24. fairscope-0.3.0/fairscope/core/calibration.py +168 -0
  25. fairscope-0.3.0/fairscope/core/correction.py +49 -0
  26. fairscope-0.3.0/fairscope/core/delong.py +151 -0
  27. fairscope-0.3.0/fairscope/core/metrics.py +96 -0
  28. fairscope-0.3.0/fairscope/federated/__init__.py +9 -0
  29. fairscope-0.3.0/fairscope/federated/audit.py +217 -0
  30. fairscope-0.3.0/fairscope/healthcare/__init__.py +5 -0
  31. fairscope-0.3.0/fairscope/healthcare/audit.py +227 -0
  32. fairscope-0.3.0/fairscope/lending/__init__.py +5 -0
  33. fairscope-0.3.0/fairscope/lending/audit.py +160 -0
  34. fairscope-0.3.0/fairscope/nlp/__init__.py +29 -0
  35. fairscope-0.3.0/fairscope/nlp/attribution.py +54 -0
  36. fairscope-0.3.0/fairscope/nlp/cross_platform.py +178 -0
  37. fairscope-0.3.0/fairscope/nlp/metrics.py +87 -0
  38. fairscope-0.3.0/fairscope/nlp/significance.py +51 -0
  39. fairscope-0.3.0/mkdocs.yml +43 -0
  40. fairscope-0.3.0/notebooks/01_healthcare_replication.ipynb +109 -0
  41. fairscope-0.3.0/notebooks/02_nlp_cpfe_demo.ipynb +129 -0
  42. fairscope-0.3.0/notebooks/03_lending_replication.ipynb +160 -0
  43. fairscope-0.3.0/notebooks/04_federated_replication.ipynb +133 -0
  44. fairscope-0.3.0/paper/cpfe_demo.py +66 -0
  45. fairscope-0.3.0/paper/paper.md +210 -0
  46. fairscope-0.3.0/paper/paper.tex +270 -0
  47. fairscope-0.3.0/pyproject.toml +81 -0
  48. fairscope-0.3.0/tests/fixtures/README.md +44 -0
  49. fairscope-0.3.0/tests/fixtures/_generate_healthcare_fixture.py +49 -0
  50. fairscope-0.3.0/tests/fixtures/_generate_lending_fixture.py +71 -0
  51. fairscope-0.3.0/tests/fixtures/healthcare_subsample.csv +1204 -0
  52. fairscope-0.3.0/tests/fixtures/lending_subsample.csv +1505 -0
  53. fairscope-0.3.0/tests/test_bootstrap.py +36 -0
  54. fairscope-0.3.0/tests/test_calibration.py +107 -0
  55. fairscope-0.3.0/tests/test_core_api.py +35 -0
  56. fairscope-0.3.0/tests/test_correction.py +37 -0
  57. fairscope-0.3.0/tests/test_delong.py +93 -0
  58. fairscope-0.3.0/tests/test_dispatch.py +70 -0
  59. fairscope-0.3.0/tests/test_federated.py +111 -0
  60. fairscope-0.3.0/tests/test_healthcare.py +187 -0
  61. fairscope-0.3.0/tests/test_lending.py +144 -0
  62. fairscope-0.3.0/tests/test_metrics.py +100 -0
  63. fairscope-0.3.0/tests/test_nlp_api.py +54 -0
  64. fairscope-0.3.0/tests/test_nlp_attribution.py +40 -0
  65. fairscope-0.3.0/tests/test_nlp_cpfe.py +81 -0
  66. fairscope-0.3.0/tests/test_nlp_metrics.py +114 -0
  67. fairscope-0.3.0/tests/test_nlp_significance.py +41 -0
  68. fairscope-0.3.0/tests/test_utils.py +44 -0
  69. fairscope-0.3.0/tests/test_version.py +16 -0
@@ -0,0 +1,10 @@
1
+ # Normalize line endings: store text as LF in the repo, check out native on Windows.
2
+ * text=auto eol=lf
3
+
4
+ # Binary assets (never normalize)
5
+ *.png binary
6
+ *.jpg binary
7
+ *.jpeg binary
8
+ *.gif binary
9
+ *.pdf binary
10
+ *.ipynb binary
@@ -0,0 +1,40 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v5
19
+
20
+ - name: Set up Python ${{ matrix.python-version }}
21
+ uses: actions/setup-python@v6
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install (base + dev; NLP extras intentionally excluded to keep CI light)
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ pip install -e ".[dev]"
29
+
30
+ - name: Ruff (lint + import order)
31
+ run: ruff check .
32
+
33
+ - name: Black (format check)
34
+ run: black --check .
35
+
36
+ - name: Pytest + coverage (overall floor >=70%)
37
+ run: pytest --cov=fairscope --cov-report=term-missing --cov-fail-under=70
38
+
39
+ - name: Execute replication notebooks (nbmake)
40
+ run: pytest --nbmake notebooks/
@@ -0,0 +1,55 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ .eggs/
11
+ wheels/
12
+
13
+ # Virtual environments
14
+ .venv/
15
+ venv/
16
+ env/
17
+ ENV/
18
+
19
+ # Test / coverage
20
+ .pytest_cache/
21
+ .coverage
22
+ .coverage.*
23
+ htmlcov/
24
+ coverage.xml
25
+ .tox/
26
+
27
+ # Type / lint caches
28
+ .mypy_cache/
29
+ .ruff_cache/
30
+
31
+ # Docs build
32
+ site/
33
+
34
+ # LaTeX build artifacts (paper/)
35
+ paper/*.aux
36
+ paper/*.log
37
+ paper/*.out
38
+ paper/*.pdf
39
+
40
+ # Notebooks
41
+ .ipynb_checkpoints/
42
+
43
+ # Datasets (NOT bundled; downloaded on demand by scripts/fetch_data.py)
44
+ data/
45
+ *.csv
46
+ !tests/fixtures/*.csv
47
+
48
+ # IDE / OS
49
+ .idea/
50
+ .vscode/
51
+ .DS_Store
52
+ Thumbs.db
53
+
54
+ # Internal planning docs — never publish
55
+ docs/internal/
@@ -0,0 +1,22 @@
1
+ # Run `pre-commit install` once after cloning. Hooks run on every commit.
2
+ repos:
3
+ - repo: https://github.com/pre-commit/pre-commit-hooks
4
+ rev: v4.6.0
5
+ hooks:
6
+ - id: trailing-whitespace
7
+ - id: end-of-file-fixer
8
+ - id: check-yaml
9
+ - id: check-toml
10
+ - id: check-added-large-files
11
+ args: ["--maxkb=512"]
12
+
13
+ - repo: https://github.com/astral-sh/ruff-pre-commit
14
+ rev: v0.5.7
15
+ hooks:
16
+ - id: ruff
17
+ args: ["--fix"]
18
+
19
+ - repo: https://github.com/psf/black
20
+ rev: 24.8.0
21
+ hooks:
22
+ - id: black
@@ -0,0 +1,66 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and the project aims to
5
+ adhere to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [Unreleased]
8
+
9
+ ## [0.3.0] - 2026-06-27
10
+
11
+ ### Added
12
+ - `fairscope.federated`: `FederatedFairnessAudit` + `FederatedReport` — a cross-node
13
+ (federated / multi-site) audit composing `core`: per-node DeLong AUC CIs, ECE, Brier and
14
+ F1; cross-node disparity (max−min AUC gap and Bonferroni-corrected pairwise unpaired
15
+ DeLong tests); optional per-node recalibration (temperature/isotonic) with pre/post ECE;
16
+ per-node AUC forest, reliability curves, and PDF export. Audits per-node predictions only
17
+ — no training and no privacy guarantee. Routed via `FairnessAudit(model, domain="federated", ...)`.
18
+ - `fairscope.lending`: `LendingFairnessAudit` + `LendingReport` — a descriptive annual
19
+ approval-gap analysis (symmetric disparate impact per year, composing `core`) plus an
20
+ optional subgroup CATE via Causal Forest DML (`estimate_cate`, `econml.dml.CausalForestDML`).
21
+ Causal claims are conditional on the DML assumptions. `econml` is the optional
22
+ `fairscope[lending]` extra. Routed via `FairnessAudit(model, domain="lending", ...)`.
23
+ - Documentation pages for the federated and lending modules, and an auto-generated API
24
+ reference for both.
25
+ - Replication notebooks `notebooks/03_lending_replication.ipynb` and
26
+ `notebooks/04_federated_replication.ipynb` (synthetic; executed in CI via `nbmake`).
27
+
28
+ ## [0.2.0] - 2026-06-27
29
+
30
+ ### Added
31
+ - `fairscope.nlp`: the five-axis Cross-Platform Fairness Evaluation (CPFE) protocol —
32
+ `CPFEProtocol` + `CPFEReport` (macro AUC/F1 and ΔAUC%, multiclass ECE, bootstrap
33
+ macro-AUC significance with Bonferroni correction, per-class disparate impact and
34
+ equalized odds), a structured per-axis `deployment_readiness()` diagnostic using P4's
35
+ stated reference bands (with an illustrative, configurable ΔAUC limit), and
36
+ gradient-saliency Jaccard attribution stability (`token_saliency` behind
37
+ `fairscope[nlp]`). Routed via `FairnessAudit(model, domain="nlp", ...)`.
38
+ - Documentation site (MkDocs Material + mkdocstrings) published to GitHub Pages:
39
+ getting-started with a runnable example on the synthetic fixture, CPFE and healthcare
40
+ guides, and an auto-generated API reference. <https://rajveer-code.github.io/fairscope/>
41
+ - Replication notebooks (`notebooks/01_healthcare_replication.ipynb`,
42
+ `notebooks/02_nlp_cpfe_demo.ipynb`) executed in CI via `nbmake`.
43
+
44
+ ## [0.1.0] - 2026-06-26
45
+
46
+ ### Added
47
+ - Phase 0 public skeleton: package layout, MIT `LICENSE`, `pyproject.toml` (hatchling),
48
+ GitHub Actions CI (matrix py3.9–3.12), `pre-commit` config, `CITATION.cff`, README
49
+ skeleton, and the design overview in `docs/DESIGN.md`.
50
+ - `fairscope.core`: DeLong AUC confidence intervals and paired/unpaired tests
51
+ (`delong.py`), a stratified bootstrap AUC test (`bootstrap.py`), Expected/Maximum
52
+ Calibration Error with reliability diagrams plus temperature-scaling and isotonic
53
+ recalibration (`calibration.py`), Bonferroni and Benjamini–Hochberg corrections
54
+ (`correction.py`), and subgroup metrics with symmetric disparate impact and equalized
55
+ odds difference (`metrics.py`). 100% test coverage on `core/`.
56
+ - `fairscope.healthcare`: `HealthcareFairnessAudit` + `HealthcareReport` — a one-call
57
+ clinical fairness audit composing `core/` (per-subgroup DeLong CIs, ECE, Bonferroni-
58
+ corrected pairwise tests, Brier/F1), with report tables, an AUC forest plot,
59
+ reliability-curve plots, multi-page PDF export (matplotlib only), and an optional SHAP
60
+ summary (`fairscope[shap]`). A synthetic, seed-generated golden fixture regression-tests
61
+ the published direction and approximate magnitude (elderly < young AUC, gap ≈ 0.135).
62
+ - Top-level `FairnessAudit(model, domain=...)` dispatcher (healthcare implemented).
63
+
64
+ [0.3.0]: https://github.com/Rajveer-code/fairscope/releases/tag/v0.3.0
65
+ [0.2.0]: https://github.com/Rajveer-code/fairscope/releases/tag/v0.2.0
66
+ [0.1.0]: https://github.com/Rajveer-code/fairscope/releases/tag/v0.1.0
@@ -0,0 +1,80 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use fairscope, please cite it as below."
3
+ title: "fairscope: subgroup-stratified, calibration-aware fairness auditing for ML models"
4
+ abstract: >-
5
+ An open-source Python library that packages peer-reviewed statistical methods for
6
+ subgroup-stratified, calibration-aware fairness auditing — including per-subgroup
7
+ DeLong confidence intervals, per-subgroup Expected Calibration Error, bootstrap
8
+ significance testing, a subgroup-stratified interface to standard recalibration, and a
9
+ five-axis Cross-Platform Fairness Evaluation (CPFE) protocol.
10
+ type: software
11
+ authors:
12
+ - family-names: Pall
13
+ given-names: Rajveer Singh
14
+ orcid: "https://orcid.org/0009-0001-6762-6134"
15
+ version: "0.3.0"
16
+ license: MIT
17
+ repository-code: "https://github.com/Rajveer-code/fairscope"
18
+ keywords:
19
+ - fairness
20
+ - machine learning
21
+ - calibration
22
+ - DeLong
23
+ - subgroup analysis
24
+ - model auditing
25
+ references:
26
+ # Published, final.
27
+ - type: conference-paper
28
+ title: >-
29
+ Comprehensive Evaluation of Machine Learning for Type 2 Diabetes Risk Prediction:
30
+ Large-Scale External Validation and Fairness Analysis
31
+ authors:
32
+ - family-names: Pall
33
+ given-names: Rajveer Singh
34
+ - family-names: Yadav
35
+ given-names: Sameer
36
+ - family-names: Bhalerao
37
+ given-names: Siddharth
38
+ - family-names: Sahu
39
+ given-names: Sourabh
40
+ - family-names: Ahluwalia
41
+ given-names: Ritu
42
+ - family-names: Awadhiya
43
+ given-names: Bhaskar
44
+ year: 2026
45
+ collection-title: "IEEE CIPHER 2026"
46
+ doi: "10.1109/CIPHER70417.2026.11523789"
47
+ # Preprints (no final venue).
48
+ - type: article
49
+ title: "IndiaFinBench"
50
+ authors:
51
+ - family-names: Pall
52
+ given-names: Rajveer Singh
53
+ year: 2026
54
+ url: "https://arxiv.org/abs/2604.19298"
55
+ identifiers:
56
+ - type: other
57
+ value: "arXiv:2604.19298 [cs.CL]"
58
+ notes: "Preprint."
59
+ - type: article
60
+ title: "When the Gate Stays Closed"
61
+ authors:
62
+ - family-names: Pall
63
+ given-names: Rajveer Singh
64
+ year: 2026
65
+ url: "https://ssrn.com/abstract=6742700"
66
+ identifiers:
67
+ - type: other
68
+ value: "SSRN: 6742700"
69
+ notes: "Preprint."
70
+ - type: article
71
+ title: "The Transaction Cost Trap"
72
+ authors:
73
+ - family-names: Pall
74
+ given-names: Rajveer Singh
75
+ year: 2026
76
+ url: "https://ssrn.com/abstract=6422358"
77
+ identifiers:
78
+ - type: other
79
+ value: "SSRN: 6422358"
80
+ notes: "Preprint."
@@ -0,0 +1,41 @@
1
+ # Contributing to fairscope
2
+
3
+ Thanks for your interest. `fairscope` is built incrementally, one module per phase,
4
+ with regression tests as the credibility anchor.
5
+
6
+ ## Development setup
7
+
8
+ ```bash
9
+ git clone https://github.com/Rajveer-code/fairscope
10
+ cd fairscope
11
+ python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate
12
+ pip install -e ".[dev]"
13
+ pre-commit install
14
+ pytest
15
+ ```
16
+
17
+ ## Standards
18
+
19
+ - **Every function:** full type hints; NumPy-style docstring with a runnable `Examples`
20
+ block; input validation with clear errors; no silent failures; deterministic given a
21
+ seed (expose `random_state` on anything stochastic).
22
+ - **Tests are mandatory.** Where an authoritative reference value exists (e.g. DeLong
23
+ 1988, `statsmodels` multitest), test against it — not against a paper. Where testing
24
+ against a paper, assert *direction + approximate magnitude* within a stated tolerance
25
+ and document any discrepancy. Committed data fixtures are small subsamples, labeled as
26
+ such in the docstring; they do not reproduce a full published run.
27
+ - **Formatting/linting:** `black` + `ruff` (run automatically by `pre-commit`).
28
+ - **Coverage:** enforced overall package floor ≥70% (`--cov-fail-under=70` in CI); target ≥85% on `core/`.
29
+
30
+ ## Honesty rules (non-negotiable)
31
+
32
+ 1. No invented mathematics — every method ports a published paper and cites it.
33
+ 2. No fabricated user counts, stars, downloads, or adoption claims anywhere.
34
+ 3. If a paper is ambiguous about a parameter, open an issue and ask — do not invent it.
35
+ 4. If a method does not match what a paper actually contains, name the function for what
36
+ it generically does and flag the mismatch.
37
+
38
+ ## Commits
39
+
40
+ Small, logical, self-contained commits — never batch a whole phase into one commit. Each
41
+ commit message should describe a real design decision or fix.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rajveer Singh Pall
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: fairscope
3
+ Version: 0.3.0
4
+ Summary: Subgroup-stratified, calibration-aware fairness auditing for ML models, grounded in peer-reviewed methods.
5
+ Project-URL: Homepage, https://github.com/Rajveer-code/fairscope
6
+ Project-URL: Repository, https://github.com/Rajveer-code/fairscope
7
+ Project-URL: Issues, https://github.com/Rajveer-code/fairscope/issues
8
+ Author: Rajveer Singh Pall
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: auc,calibration,delong,fairness,machine-learning,model-auditing,subgroup-analysis
12
+ Classifier: Development Status :: 2 - Pre-Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.9
21
+ Requires-Dist: matplotlib>=3.6
22
+ Requires-Dist: numpy>=1.23
23
+ Requires-Dist: pandas>=1.5
24
+ Requires-Dist: scikit-learn>=1.1
25
+ Requires-Dist: scipy>=1.9
26
+ Provides-Extra: all
27
+ Requires-Dist: captum>=0.6; extra == 'all'
28
+ Requires-Dist: econml>=0.15; extra == 'all'
29
+ Requires-Dist: shap>=0.42; extra == 'all'
30
+ Requires-Dist: torch>=2.0; extra == 'all'
31
+ Requires-Dist: transformers>=4.30; extra == 'all'
32
+ Provides-Extra: dev
33
+ Requires-Dist: black>=24.0; extra == 'dev'
34
+ Requires-Dist: nbmake>=1.5; extra == 'dev'
35
+ Requires-Dist: pre-commit>=3.5; extra == 'dev'
36
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
37
+ Requires-Dist: pytest>=7.4; extra == 'dev'
38
+ Requires-Dist: ruff>=0.5; extra == 'dev'
39
+ Requires-Dist: statsmodels>=0.14; extra == 'dev'
40
+ Provides-Extra: docs
41
+ Requires-Dist: mkdocs-material>=9.4; extra == 'docs'
42
+ Requires-Dist: mkdocs>=1.5; extra == 'docs'
43
+ Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
44
+ Provides-Extra: lending
45
+ Requires-Dist: econml>=0.15; extra == 'lending'
46
+ Provides-Extra: nlp
47
+ Requires-Dist: captum>=0.6; extra == 'nlp'
48
+ Requires-Dist: torch>=2.0; extra == 'nlp'
49
+ Requires-Dist: transformers>=4.30; extra == 'nlp'
50
+ Provides-Extra: shap
51
+ Requires-Dist: shap>=0.42; extra == 'shap'
52
+ Description-Content-Type: text/markdown
53
+
54
+ # fairscope
55
+
56
+ [![CI](https://github.com/Rajveer-code/fairscope/actions/workflows/ci.yml/badge.svg)](https://github.com/Rajveer-code/fairscope/actions/workflows/ci.yml)
57
+ [![Python](https://img.shields.io/badge/python-3.9%E2%80%933.12-blue.svg)](https://www.python.org/)
58
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
59
+ [![Docs](https://img.shields.io/badge/docs-live-brightgreen.svg)](https://rajveer-code.github.io/fairscope/)
60
+
61
+ **Subgroup-stratified, calibration-aware fairness auditing for machine-learning models — grounded in peer-reviewed methods.**
62
+
63
+ 📖 **Documentation:** <https://rajveer-code.github.io/fairscope/>
64
+
65
+ `fairscope` packages statistical machinery that mainstream fairness toolkits do not expose as
66
+ first-class, subgroup-stratified functions, and adds one novel protocol on top:
67
+
68
+ - **DeLong confidence intervals** for per-subgroup AUC (fast midrank algorithm).
69
+ - **Per-subgroup Expected/Maximum Calibration Error** with reliability diagrams.
70
+ - **Significance testing** of subgroup performance gaps (paired/unpaired DeLong, stratified
71
+ bootstrap) with **Bonferroni / Benjamini–Hochberg** correction.
72
+ - A subgroup-stratified **interface to standard recalibration** — temperature scaling
73
+ (Guo et al. 2017) and isotonic regression (Zadrozny & Elkan 2002), with pre/post-ECE.
74
+ - A novel five-axis **Cross-Platform Fairness Evaluation (CPFE)** protocol.
75
+ - One-call **domain audits**: `healthcare`, `lending`, `federated`.
76
+
77
+ Only the CPFE protocol is presented as novel. Every other function ports a documented method
78
+ and cites its source; the recalibration methods are standard, and the contribution there is the
79
+ per-subgroup interface and pre/post-ECE reporting.
80
+
81
+ > **Status — v0.3.0.** All five modules (`core`, `healthcare`, `nlp`/CPFE, `federated`,
82
+ > `lending`) are implemented, tested, and released. 100% line coverage on the statistical core;
83
+ > CI green across Python 3.9–3.12. See [`docs/DESIGN.md`](docs/DESIGN.md) for methods and design.
84
+
85
+ ## Install
86
+
87
+ ```bash
88
+ pip install fairscope
89
+ ```
90
+
91
+ Releases are uploaded to PyPI by the maintainer; if a version isn't available there yet,
92
+ install from source or from the [release assets](https://github.com/Rajveer-code/fairscope/releases):
93
+
94
+ ```bash
95
+ git clone https://github.com/Rajveer-code/fairscope
96
+ cd fairscope
97
+ pip install -e ".[dev]"
98
+ pytest
99
+ ```
100
+
101
+ The base install is light (NumPy, SciPy, scikit-learn, pandas, matplotlib). Optional extras:
102
+ `fairscope[nlp]` (torch, transformers, captum), `fairscope[lending]` (econml),
103
+ `fairscope[shap]`, `fairscope[docs]`.
104
+
105
+ ## Quickstart
106
+
107
+ ```python
108
+ from fairscope.healthcare import HealthcareFairnessAudit
109
+
110
+ # y_true : binary outcomes
111
+ # y_score: the model's positive-class probabilities
112
+ # age_group: a protected attribute, aligned row-for-row
113
+ report = HealthcareFairnessAudit.from_scores(
114
+ y_true, y_score, {"age_group": age_group}
115
+ ).run()
116
+
117
+ print(report.summary()) # per-subgroup AUC (DeLong CI), ECE, Brier, F1; flags the largest gap
118
+ report.to_dataframe() # tidy per-subgroup table
119
+ report.plot_auc_forest() # forest plot of per-subgroup AUC with DeLong intervals
120
+ ```
121
+
122
+ Every domain is also reachable through one dispatcher,
123
+ `FairnessAudit(model, domain=...)`, with `domain` in `{"healthcare", "nlp", "federated",
124
+ "lending"}`. A runnable end-to-end example on a committed synthetic fixture is in the
125
+ [getting-started guide](https://rajveer-code.github.io/fairscope/getting-started/) and in
126
+ [`notebooks/`](notebooks/).
127
+
128
+ ## Modules
129
+
130
+ | Module | Purpose | Status |
131
+ |---|---|---|
132
+ | `core/` | DeLong CI, bootstrap-AUC test, ECE/MCE + reliability, multiple-testing correction, subgroup metrics | ✅ shipped |
133
+ | `healthcare/` | one-call clinical fairness audit + report (tables, forest & reliability plots, PDF, optional SHAP) | ✅ shipped |
134
+ | `nlp/` | CPFE five-axis cross-platform protocol (centerpiece) + Captum attribution stability | ✅ shipped |
135
+ | `federated/` | per-node DeLong + cross-node disparity + per-node recalibration | ✅ shipped |
136
+ | `lending/` | annual approval-gap + subgroup CATE (Causal Forest DML) | ✅ shipped |
137
+
138
+ Plotting (forest plots, reliability diagrams) currently lives in the domain reports.
139
+ `lending`'s CATE estimation needs the optional `fairscope[lending]` extra (`econml`). The
140
+ `federated` module audits per-node predictions only — it performs no training and provides no
141
+ privacy guarantee.
142
+
143
+ ## How it differs from AIF360 / Fairlearn
144
+
145
+ `fairscope` is complementary to AIF360 and Fairlearn, not a replacement: those toolkits do bias
146
+ *mitigation*; `fairscope` does uncertainty-aware *measurement*. The table below was verified by
147
+ inspecting the installed public APIs of **AIF360 0.6.1** and **Fairlearn 0.14.0** (checked
148
+ 2026-06; re-confirm if versions change).
149
+
150
+ | Capability | AIF360 | Fairlearn | fairscope |
151
+ |---|:---:|:---:|:---:|
152
+ | Per-subgroup AUC confidence interval (DeLong) | no | no\* | yes |
153
+ | Per-subgroup Expected Calibration Error | no | no | yes |
154
+ | Subgroup significance test + multiple-comparison correction | no | no | yes |
155
+ | Subgroup-stratified recalibration (temperature / isotonic) | partial† | no | yes |
156
+ | Cross-platform five-axis protocol (CPFE) | no | no | yes (novel) |
157
+ | Per-node / federated audit | no | no | yes |
158
+ | Bias-mitigation algorithms | yes | yes | out of scope |
159
+
160
+ \* Fairlearn's `MetricFrame` computes per-subgroup AUC *point estimates* (e.g.
161
+ `roc_auc_score_group_min`), but provides no analytic (DeLong) confidence interval.
162
+ † AIF360 ships `CalibratedEqOddsPostprocessing` (calibration-aware equalized-odds
163
+ postprocessing), not a general per-subgroup temperature/isotonic recalibration interface.
164
+
165
+ **Closest related work — `meval`** (Sutariya & Petersen, 2025,
166
+ [arXiv:2512.17409](https://arxiv.org/abs/2512.17409)): a statistical toolbox for stratified,
167
+ fine-grained model-performance analysis that *also* provides subgroup metric uncertainty and
168
+ multiple-comparison corrections (with a medical-imaging focus). `fairscope` overlaps with it on
169
+ uncertainty + significance; what `fairscope` adds is the specific DeLong AUC intervals, the
170
+ per-subgroup calibration **and recalibration** interface, the five-axis cross-platform CPFE
171
+ protocol, and one-call domain audits (healthcare / lending / federated).
172
+
173
+ ## Engineering
174
+
175
+ - **Test-driven**, with regression tests anchored to authoritative reference values where they
176
+ exist (DeLong's worked example; `statsmodels` multiple-testing routines).
177
+ - **100% line coverage** on the statistical core; CI runs pytest + coverage, ruff, and black
178
+ across Python 3.9–3.12, and executes the replication notebooks via `nbmake`.
179
+ - Full type hints, NumPy-style docstrings with runnable examples, and explicit input validation
180
+ (an AUC on a single-class subgroup raises rather than returning a meaningless value).
181
+ - Committed fixtures are **small, synthetic, and labelled as such**; no datasets or trained
182
+ models are bundled.
183
+
184
+ ## Grounded in published research
185
+
186
+ `fairscope` ports methods from the author's peer-reviewed and under-review papers; it invents no
187
+ new mathematics. Each function cites its source. Full venues and identifiers are in
188
+ [`CITATION.cff`](CITATION.cff).
189
+
190
+ - Diabetes risk prediction with external validation + fairness analysis (XGBoost, NHANES→BRFSS) — IEEE CIPHER, 2026.
191
+ - A five-axis Cross-Platform Fairness Evaluation for mental-health NLP — under review.
192
+ - Privacy-preserving federated learning for diabetes risk across heterogeneous nodes — under review.
193
+ - Heterogeneous racial effects in mortgage approval (Causal Forest Double Machine Learning, HMDA) — under review.
194
+ - Racial disparities in mortgage lending (RDD / DiD / decomposition, HMDA) — under review.
195
+
196
+ ## Citation
197
+
198
+ If you use `fairscope`, please cite it via [`CITATION.cff`](CITATION.cff).
199
+
200
+ ## License
201
+
202
+ [MIT](LICENSE) © 2026 Rajveer Singh Pall · ORCID [0009-0001-6762-6134](https://orcid.org/0009-0001-6762-6134)