fairscope 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ """Axis 5 of CPFE: attribution stability via Jaccard overlap of top-K gradient-saliency
2
+ token sets across platforms (CPFE paper). The Jaccard computation is dependency-free; the
3
+ gradient-saliency extraction uses Captum and requires ``pip install fairscope[nlp]``.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+
9
+ def jaccard_topk(saliency_a, saliency_b, k):
10
+ """Jaccard overlap of the top-k tokens by saliency: ``|topK(A) ∩ topK(B)| / |union|``.
11
+
12
+ ``saliency_*`` map token -> saliency score. Returns 0.0 if both are empty.
13
+
14
+ Examples
15
+ --------
16
+ >>> jaccard_topk({"a": 0.9, "b": 0.8}, {"a": 0.7, "c": 0.6}, k=2)
17
+ 0.3333333333333333
18
+ """
19
+ top_a = set(sorted(saliency_a, key=saliency_a.get, reverse=True)[:k])
20
+ top_b = set(sorted(saliency_b, key=saliency_b.get, reverse=True)[:k])
21
+ union = top_a | top_b
22
+ return len(top_a & top_b) / len(union) if union else 0.0
23
+
24
+
25
+ def token_saliency(model, tokenizer, text, target=None):
26
+ """Per-token gradient saliency ``s_i = ‖∂P(y|x)/∂E_i‖₂`` via Captum (optional).
27
+ Requires ``pip install fairscope[nlp]``. Returns ``{token: saliency}``."""
28
+ try:
29
+ import captum # noqa: F401 (captum depends on torch; one import gates the extra)
30
+ except ImportError as exc:
31
+ raise ImportError(
32
+ "token_saliency requires the optional dependency: pip install fairscope[nlp]"
33
+ ) from exc
34
+ return _captum_token_saliency(model, tokenizer, text, target) # pragma: no cover
35
+
36
+
37
+ def _captum_token_saliency(model, tokenizer, text, target): # pragma: no cover - needs nlp extra
38
+ from captum.attr import Saliency
39
+
40
+ enc = tokenizer(text, return_tensors="pt")
41
+ embeddings = model.get_input_embeddings()(enc["input_ids"])
42
+ embeddings.requires_grad_(True)
43
+
44
+ def forward(emb):
45
+ return model(inputs_embeds=emb, attention_mask=enc["attention_mask"]).logits.softmax(-1)
46
+
47
+ tgt = target if target is not None else int(forward(embeddings).argmax())
48
+ grads = Saliency(forward).attribute(embeddings, target=tgt, abs=False)
49
+ scores = grads.norm(dim=-1).squeeze(0).detach().numpy()
50
+ tokens = tokenizer.convert_ids_to_tokens(enc["input_ids"].squeeze(0))
51
+ agg = {}
52
+ for tok, score in zip(tokens, scores):
53
+ agg[tok] = max(agg.get(tok, 0.0), float(score))
54
+ return agg
@@ -0,0 +1,178 @@
1
+ """The five-axis Cross-Platform Fairness Evaluation (CPFE) protocol (Pall & Yadav).
2
+
3
+ Axes 1-4 run on precomputed per-platform outputs (no torch); attribution stability
4
+ (axis 5) is provided separately via ``fairscope.nlp.attribution`` behind the optional
5
+ ``fairscope[nlp]`` extra.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+
13
+ from ..core import bonferroni
14
+ from .metrics import (
15
+ macro_auc,
16
+ macro_f1,
17
+ multiclass_ece,
18
+ per_class_disparate_impact,
19
+ per_class_equalized_odds,
20
+ )
21
+ from .significance import bootstrap_macro_auc_test
22
+
23
+ # Reference bands EXPLICITLY STATED in the CPFE paper (descriptive diagnostic, NOT
24
+ # regulatory standards). P4 declines to set a delta-AUC decision threshold (Section 6.6),
25
+ # so that one is a constructor argument with an illustrative default instead.
26
+ DI_FOUR_FIFTHS = 0.80 # P4 Sec 4.4: DI < 0.80 violates the four-fifths rule
27
+ DI_SEVERE = 0.50 # P4 Sec 4.4: DI < 0.50 is a severe disparity
28
+ ECE_WELL_CALIBRATED = 0.10 # P4 Suppl. Fig. S2: ECE < 0.10 well-calibrated
29
+ ECE_MODERATE = 0.20 # P4 Suppl. Fig. S2: ECE > 0.20 moderate miscalibration
30
+ JACCARD_INSTABILITY = 0.20 # P4 Suppl. Fig. S7: J < 0.20 attribution instability
31
+
32
+
33
+ class CPFEProtocol:
34
+ """Run the CPFE five-axis evaluation over precomputed per-platform outputs.
35
+
36
+ Parameters
37
+ ----------
38
+ platform_data : dict ``{name: {"y_true": array, "probs": (n, n_classes) array}}``.
39
+ reference : the within-platform name (e.g. the training platform).
40
+ n_classes : number of classes.
41
+ delta_auc_pct_max : ILLUSTRATIVE macro-AUC-drop screening limit (percent) used by
42
+ ``CPFEReport.deployment_readiness``. NOT a published cutoff: P4 Section 6.6 declines
43
+ to set one (observed drops were 28.6-39.5%); the default echoes that ">30%" magnitude
44
+ and is labelled illustrative everywhere it surfaces.
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ platform_data,
50
+ reference,
51
+ n_classes,
52
+ *,
53
+ n_bins=10,
54
+ alpha=0.05,
55
+ n_boot=2000,
56
+ delta_auc_pct_max=30.0,
57
+ ):
58
+ if reference not in platform_data:
59
+ raise ValueError(f"reference platform {reference!r} not in platform_data")
60
+ self.platform_data = platform_data
61
+ self.reference = reference
62
+ self.n_classes = n_classes
63
+ self.n_bins = n_bins
64
+ self.alpha = alpha
65
+ self.n_boot = n_boot
66
+ self.delta_auc_pct_max = delta_auc_pct_max
67
+
68
+ def run(self) -> CPFEReport:
69
+ ref = self.platform_data[self.reference]
70
+ ref_auc = macro_auc(ref["y_true"], ref["probs"])
71
+ others = [p for p in self.platform_data if p != self.reference]
72
+
73
+ performance = {}
74
+ for name, d in self.platform_data.items():
75
+ a = macro_auc(d["y_true"], d["probs"])
76
+ performance[name] = {
77
+ "macro_auc": a,
78
+ "macro_f1": macro_f1(d["y_true"], d["probs"]),
79
+ "ece": multiclass_ece(d["y_true"], d["probs"], self.n_bins),
80
+ "delta_auc_pct": 100.0 * (a - ref_auc) / ref_auc,
81
+ }
82
+
83
+ significance, equity, raw_p = {}, {}, []
84
+ for name in others:
85
+ d = self.platform_data[name]
86
+ sig = bootstrap_macro_auc_test(
87
+ ref["y_true"],
88
+ ref["probs"],
89
+ d["y_true"],
90
+ d["probs"],
91
+ n_boot=self.n_boot,
92
+ random_state=0,
93
+ )
94
+ significance[name] = sig
95
+ raw_p.append(sig["p_value"])
96
+ equity[name] = {
97
+ "disparate_impact": per_class_disparate_impact(
98
+ ref["probs"], d["probs"], self.n_classes
99
+ ),
100
+ "equalized_odds": per_class_equalized_odds(
101
+ ref["y_true"], ref["probs"], d["y_true"], d["probs"], self.n_classes
102
+ ),
103
+ }
104
+ if raw_p:
105
+ adj = bonferroni(np.array(raw_p), alpha=self.alpha)
106
+ for name, padj, rej in zip(others, adj["adjusted"], adj["reject"]):
107
+ significance[name]["p_adjusted"] = float(padj)
108
+ significance[name]["reject"] = bool(rej)
109
+
110
+ return CPFEReport(performance, significance, equity, self.reference, self.delta_auc_pct_max)
111
+
112
+
113
+ class CPFEReport:
114
+ """Holds the five-axis results and renders tables and a deployment-readiness diagnostic."""
115
+
116
+ def __init__(self, performance, significance, equity, reference, delta_auc_pct_max):
117
+ self.performance = performance
118
+ self.significance = significance
119
+ self.equity = equity
120
+ self.reference = reference
121
+ self.delta_auc_pct_max = delta_auc_pct_max
122
+
123
+ def to_dataframe(self) -> pd.DataFrame:
124
+ return pd.DataFrame([{"platform": name, **m} for name, m in self.performance.items()])
125
+
126
+ def deployment_readiness(self):
127
+ """Structured per-axis, per-platform screening DIAGNOSTIC -- NOT a deployment
128
+ decision. Following the CPFE paper (Sections 6.5-6.6), cross-platform degradation
129
+ is an informative diagnostic, not definitive evidence of bias.
130
+
131
+ Thresholds: calibration uses P4's stated ECE bands (Suppl. Fig. S2); equity uses
132
+ P4's four-fifths rule; discrimination uses an ILLUSTRATIVE ``delta_auc_pct_max``
133
+ (P4 Section 6.6 declines to set a published cutoff). Returns
134
+ ``{platform: {"ready": bool, "axes": {axis: {pass, value, threshold, source, reason}}}}``.
135
+ """
136
+ verdict = {}
137
+ for name, perf in self.performance.items():
138
+ if name == self.reference:
139
+ continue
140
+ drop = -perf["delta_auc_pct"]
141
+ ece = perf["ece"]
142
+ di = self.equity.get(name, {}).get("disparate_impact", {})
143
+ violations = sorted(c for c, v in di.items() if v < DI_FOUR_FIFTHS)
144
+ severe = sorted(c for c, v in di.items() if v < DI_SEVERE)
145
+ if violations:
146
+ equity_reason = (
147
+ f"four-fifths violations (DI<{DI_FOUR_FIFTHS}) for classes "
148
+ f"{violations}; severe (<{DI_SEVERE}) {severe}"
149
+ )
150
+ else:
151
+ equity_reason = "no four-fifths violations"
152
+ axes = {
153
+ "discrimination": {
154
+ "pass": drop <= self.delta_auc_pct_max,
155
+ "value": drop,
156
+ "threshold": self.delta_auc_pct_max,
157
+ "source": "illustrative (not a published cutoff; P4 Section 6.6)",
158
+ "reason": f"macro-AUC drop {drop:.1f}% vs reference "
159
+ f"(illustrative limit {self.delta_auc_pct_max:.0f}%)",
160
+ },
161
+ "calibration": {
162
+ "pass": ece < ECE_WELL_CALIBRATED,
163
+ "value": ece,
164
+ "threshold": ECE_WELL_CALIBRATED,
165
+ "source": "P4 Suppl. Fig. S2",
166
+ "reason": f"ECE {ece:.3f} (well-calibrated < {ECE_WELL_CALIBRATED}; "
167
+ f"moderate miscalibration > {ECE_MODERATE})",
168
+ },
169
+ "equity": {
170
+ "pass": len(violations) == 0,
171
+ "value": {"violations": violations, "severe": severe},
172
+ "threshold": DI_FOUR_FIFTHS,
173
+ "source": "P4 four-fifths rule (Sec 4.4)",
174
+ "reason": equity_reason,
175
+ },
176
+ }
177
+ verdict[name] = {"ready": all(a["pass"] for a in axes.values()), "axes": axes}
178
+ return verdict
@@ -0,0 +1,87 @@
1
+ """Multiclass metrics for the CPFE protocol (axes 1, 2, 4 primitives).
2
+
3
+ Pure functions that reuse ``fairscope.core`` where the definition is shared. The
4
+ confidence-accuracy ECE follows the formula in the CPFE paper (Pall & Yadav), and is
5
+ distinct from ``core.expected_calibration_error`` (binary prob-vs-frequency calibration).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import numpy as np
11
+ from sklearn.metrics import f1_score, roc_auc_score
12
+
13
+ from ..core import disparate_impact, equalized_odds_difference
14
+
15
+
16
+ def _check_probs(y_true, probs):
17
+ y = np.asarray(y_true)
18
+ p = np.asarray(probs, dtype=float)
19
+ if p.ndim != 2:
20
+ raise ValueError("probs must be a 2-D array of shape (n_samples, n_classes)")
21
+ if len(y) != p.shape[0]:
22
+ raise ValueError("y_true and probs must have the same number of rows")
23
+ if np.any(np.isnan(p)):
24
+ raise ValueError("probs contain NaN")
25
+ return y, p
26
+
27
+
28
+ def macro_auc(y_true, probs):
29
+ """Macro one-vs-rest AUC. Requires every class present in ``y_true``."""
30
+ y, p = _check_probs(y_true, probs)
31
+ return float(roc_auc_score(y, p, multi_class="ovr", average="macro"))
32
+
33
+
34
+ def macro_f1(y_true, probs):
35
+ """Macro F1 of the argmax predictions."""
36
+ y, p = _check_probs(y_true, probs)
37
+ return float(f1_score(y, p.argmax(axis=1), average="macro"))
38
+
39
+
40
+ def multiclass_ece(y_true, probs, n_bins=10):
41
+ """Confidence-accuracy Expected Calibration Error (Guo et al. 2017; CPFE paper):
42
+ ``ECE = sum_m (|B_m|/n) * |acc(B_m) - conf(B_m)|`` with ``conf = max prob`` and
43
+ ``acc = top-1 correct``."""
44
+ y, p = _check_probs(y_true, probs)
45
+ conf = p.max(axis=1)
46
+ correct = (p.argmax(axis=1) == y).astype(float)
47
+ edges = np.linspace(0.0, 1.0, n_bins + 1)
48
+ idx = np.digitize(conf, edges[1:-1])
49
+ n = len(y)
50
+ ece = 0.0
51
+ for b in range(n_bins):
52
+ m = idx == b
53
+ if np.any(m):
54
+ ece += (m.sum() / n) * abs(correct[m].mean() - conf[m].mean())
55
+ return float(ece)
56
+
57
+
58
+ def per_class_disparate_impact(probs_a, probs_b, n_classes):
59
+ """Symmetric DI per class between two platforms, reusing ``core.disparate_impact`` with
60
+ the class binarized (``pred == c``) and platform as the two-group label."""
61
+ pa = np.asarray(probs_a).argmax(axis=1)
62
+ pb = np.asarray(probs_b).argmax(axis=1)
63
+ groups = np.array(["a"] * len(pa) + ["b"] * len(pb))
64
+ out = {}
65
+ for c in range(n_classes):
66
+ ypred = np.concatenate([(pa == c).astype(int), (pb == c).astype(int)])
67
+ out[c] = disparate_impact(ypred, groups, "a", "b")
68
+ return out
69
+
70
+
71
+ def per_class_equalized_odds(y_a, probs_a, y_b, probs_b, n_classes):
72
+ """EOD per class between two platforms (``|TPR_c(A) - TPR_c(B)|``), reusing
73
+ ``core.equalized_odds_difference``. A class with no positive labels in a platform is
74
+ returned as ``None`` (TPR undefined)."""
75
+ pa = np.asarray(probs_a).argmax(axis=1)
76
+ pb = np.asarray(probs_b).argmax(axis=1)
77
+ ya, yb = np.asarray(y_a), np.asarray(y_b)
78
+ groups = np.array(["a"] * len(ya) + ["b"] * len(yb))
79
+ out = {}
80
+ for c in range(n_classes):
81
+ yt = np.concatenate([(ya == c).astype(int), (yb == c).astype(int)])
82
+ yp = np.concatenate([(pa == c).astype(int), (pb == c).astype(int)])
83
+ try:
84
+ out[c] = equalized_odds_difference(yt, yp, groups, "a", "b")
85
+ except ValueError:
86
+ out[c] = None # a platform has no examples of class c
87
+ return out
@@ -0,0 +1,51 @@
1
+ """Axis 3 of CPFE: unpaired bootstrap comparison of macro one-vs-rest AUC across two
2
+ platforms (independent test sets), as in the CPFE paper (stratified bootstrap standard
3
+ errors, B = 2000 by default, combined for a normal-approximation z-test).
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import numpy as np
9
+ from scipy import stats
10
+
11
+ from .metrics import macro_auc
12
+
13
+
14
+ def _bootstrap_se(y, probs, n_boot, rng):
15
+ """Stratified (by class) bootstrap standard error of the macro AUC."""
16
+ y = np.asarray(y)
17
+ probs = np.asarray(probs, dtype=float)
18
+ class_idx = [np.flatnonzero(y == c) for c in np.unique(y)]
19
+ aucs = np.empty(n_boot)
20
+ for i in range(n_boot):
21
+ idx = np.concatenate([rng.choice(ci, size=ci.size, replace=True) for ci in class_idx])
22
+ aucs[i] = macro_auc(y[idx], probs[idx])
23
+ return float(aucs.std(ddof=1))
24
+
25
+
26
+ def bootstrap_macro_auc_test(y_a, probs_a, y_b, probs_b, n_boot=2000, random_state=None):
27
+ """Compare macro AUC across two platforms (independent test sets).
28
+
29
+ Each platform's macro-AUC standard error is estimated by a class-stratified bootstrap;
30
+ the errors are combined for an unpaired z-test. Returns a dict: auc_a, auc_b, delta,
31
+ se, z, p_value, n_boot.
32
+ """
33
+ rng = np.random.default_rng(random_state)
34
+ probs_a = np.asarray(probs_a, dtype=float)
35
+ probs_b = np.asarray(probs_b, dtype=float)
36
+ auc_a = macro_auc(y_a, probs_a)
37
+ auc_b = macro_auc(y_b, probs_b)
38
+ se_a = _bootstrap_se(y_a, probs_a, n_boot, rng)
39
+ se_b = _bootstrap_se(y_b, probs_b, n_boot, rng)
40
+ delta = auc_a - auc_b
41
+ se = float(np.sqrt(se_a**2 + se_b**2))
42
+ z = delta / se if se > 0 else 0.0
43
+ return {
44
+ "auc_a": auc_a,
45
+ "auc_b": auc_b,
46
+ "delta": delta,
47
+ "se": se,
48
+ "z": float(z),
49
+ "p_value": float(2.0 * stats.norm.sf(abs(z))),
50
+ "n_boot": n_boot,
51
+ }
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: fairscope
3
+ Version: 0.3.0
4
+ Summary: Subgroup-stratified, calibration-aware fairness auditing for ML models, grounded in peer-reviewed methods.
5
+ Project-URL: Homepage, https://github.com/Rajveer-code/fairscope
6
+ Project-URL: Repository, https://github.com/Rajveer-code/fairscope
7
+ Project-URL: Issues, https://github.com/Rajveer-code/fairscope/issues
8
+ Author: Rajveer Singh Pall
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: auc,calibration,delong,fairness,machine-learning,model-auditing,subgroup-analysis
12
+ Classifier: Development Status :: 2 - Pre-Alpha
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.9
21
+ Requires-Dist: matplotlib>=3.6
22
+ Requires-Dist: numpy>=1.23
23
+ Requires-Dist: pandas>=1.5
24
+ Requires-Dist: scikit-learn>=1.1
25
+ Requires-Dist: scipy>=1.9
26
+ Provides-Extra: all
27
+ Requires-Dist: captum>=0.6; extra == 'all'
28
+ Requires-Dist: econml>=0.15; extra == 'all'
29
+ Requires-Dist: shap>=0.42; extra == 'all'
30
+ Requires-Dist: torch>=2.0; extra == 'all'
31
+ Requires-Dist: transformers>=4.30; extra == 'all'
32
+ Provides-Extra: dev
33
+ Requires-Dist: black>=24.0; extra == 'dev'
34
+ Requires-Dist: nbmake>=1.5; extra == 'dev'
35
+ Requires-Dist: pre-commit>=3.5; extra == 'dev'
36
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
37
+ Requires-Dist: pytest>=7.4; extra == 'dev'
38
+ Requires-Dist: ruff>=0.5; extra == 'dev'
39
+ Requires-Dist: statsmodels>=0.14; extra == 'dev'
40
+ Provides-Extra: docs
41
+ Requires-Dist: mkdocs-material>=9.4; extra == 'docs'
42
+ Requires-Dist: mkdocs>=1.5; extra == 'docs'
43
+ Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
44
+ Provides-Extra: lending
45
+ Requires-Dist: econml>=0.15; extra == 'lending'
46
+ Provides-Extra: nlp
47
+ Requires-Dist: captum>=0.6; extra == 'nlp'
48
+ Requires-Dist: torch>=2.0; extra == 'nlp'
49
+ Requires-Dist: transformers>=4.30; extra == 'nlp'
50
+ Provides-Extra: shap
51
+ Requires-Dist: shap>=0.42; extra == 'shap'
52
+ Description-Content-Type: text/markdown
53
+
54
+ # fairscope
55
+
56
+ [![CI](https://github.com/Rajveer-code/fairscope/actions/workflows/ci.yml/badge.svg)](https://github.com/Rajveer-code/fairscope/actions/workflows/ci.yml)
57
+ [![Python](https://img.shields.io/badge/python-3.9%E2%80%933.12-blue.svg)](https://www.python.org/)
58
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
59
+ [![Docs](https://img.shields.io/badge/docs-live-brightgreen.svg)](https://rajveer-code.github.io/fairscope/)
60
+
61
+ **Subgroup-stratified, calibration-aware fairness auditing for machine-learning models — grounded in peer-reviewed methods.**
62
+
63
+ 📖 **Documentation:** <https://rajveer-code.github.io/fairscope/>
64
+
65
+ `fairscope` packages statistical machinery that mainstream fairness toolkits do not expose as
66
+ first-class, subgroup-stratified functions, and adds one novel protocol on top:
67
+
68
+ - **DeLong confidence intervals** for per-subgroup AUC (fast midrank algorithm).
69
+ - **Per-subgroup Expected/Maximum Calibration Error** with reliability diagrams.
70
+ - **Significance testing** of subgroup performance gaps (paired/unpaired DeLong, stratified
71
+ bootstrap) with **Bonferroni / Benjamini–Hochberg** correction.
72
+ - A subgroup-stratified **interface to standard recalibration** — temperature scaling
73
+ (Guo et al. 2017) and isotonic regression (Zadrozny & Elkan 2002), with pre/post-ECE.
74
+ - A novel five-axis **Cross-Platform Fairness Evaluation (CPFE)** protocol.
75
+ - One-call **domain audits**: `healthcare`, `lending`, `federated`.
76
+
77
+ Only the CPFE protocol is presented as novel. Every other function ports a documented method
78
+ and cites its source; the recalibration methods are standard, and the contribution there is the
79
+ per-subgroup interface and pre/post-ECE reporting.
80
+
81
+ > **Status — v0.3.0.** All five modules (`core`, `healthcare`, `nlp`/CPFE, `federated`,
82
+ > `lending`) are implemented, tested, and released. 100% line coverage on the statistical core;
83
+ > CI green across Python 3.9–3.12. See [`docs/DESIGN.md`](docs/DESIGN.md) for methods and design.
84
+
85
+ ## Install
86
+
87
+ ```bash
88
+ pip install fairscope
89
+ ```
90
+
91
+ Releases are uploaded to PyPI by the maintainer; if a version isn't available there yet,
92
+ install from source or from the [release assets](https://github.com/Rajveer-code/fairscope/releases):
93
+
94
+ ```bash
95
+ git clone https://github.com/Rajveer-code/fairscope
96
+ cd fairscope
97
+ pip install -e ".[dev]"
98
+ pytest
99
+ ```
100
+
101
+ The base install is light (NumPy, SciPy, scikit-learn, pandas, matplotlib). Optional extras:
102
+ `fairscope[nlp]` (torch, transformers, captum), `fairscope[lending]` (econml),
103
+ `fairscope[shap]`, `fairscope[docs]`.
104
+
105
+ ## Quickstart
106
+
107
+ ```python
108
+ from fairscope.healthcare import HealthcareFairnessAudit
109
+
110
+ # y_true : binary outcomes
111
+ # y_score: the model's positive-class probabilities
112
+ # age_group: a protected attribute, aligned row-for-row
113
+ report = HealthcareFairnessAudit.from_scores(
114
+ y_true, y_score, {"age_group": age_group}
115
+ ).run()
116
+
117
+ print(report.summary()) # per-subgroup AUC (DeLong CI), ECE, Brier, F1; flags the largest gap
118
+ report.to_dataframe() # tidy per-subgroup table
119
+ report.plot_auc_forest() # forest plot of per-subgroup AUC with DeLong intervals
120
+ ```
121
+
122
+ Every domain is also reachable through one dispatcher,
123
+ `FairnessAudit(model, domain=...)`, with `domain` in `{"healthcare", "nlp", "federated",
124
+ "lending"}`. A runnable end-to-end example on a committed synthetic fixture is in the
125
+ [getting-started guide](https://rajveer-code.github.io/fairscope/getting-started/) and in
126
+ [`notebooks/`](notebooks/).
127
+
128
+ ## Modules
129
+
130
+ | Module | Purpose | Status |
131
+ |---|---|---|
132
+ | `core/` | DeLong CI, bootstrap-AUC test, ECE/MCE + reliability, multiple-testing correction, subgroup metrics | ✅ shipped |
133
+ | `healthcare/` | one-call clinical fairness audit + report (tables, forest & reliability plots, PDF, optional SHAP) | ✅ shipped |
134
+ | `nlp/` | CPFE five-axis cross-platform protocol (centerpiece) + Captum attribution stability | ✅ shipped |
135
+ | `federated/` | per-node DeLong + cross-node disparity + per-node recalibration | ✅ shipped |
136
+ | `lending/` | annual approval-gap + subgroup CATE (Causal Forest DML) | ✅ shipped |
137
+
138
+ Plotting (forest plots, reliability diagrams) currently lives in the domain reports.
139
+ `lending`'s CATE estimation needs the optional `fairscope[lending]` extra (`econml`). The
140
+ `federated` module audits per-node predictions only — it performs no training and provides no
141
+ privacy guarantee.
142
+
143
+ ## How it differs from AIF360 / Fairlearn
144
+
145
+ `fairscope` is complementary to AIF360 and Fairlearn, not a replacement: those toolkits do bias
146
+ *mitigation*; `fairscope` does uncertainty-aware *measurement*. The table below was verified by
147
+ inspecting the installed public APIs of **AIF360 0.6.1** and **Fairlearn 0.14.0** (checked
148
+ 2026-06; re-confirm if versions change).
149
+
150
+ | Capability | AIF360 | Fairlearn | fairscope |
151
+ |---|:---:|:---:|:---:|
152
+ | Per-subgroup AUC confidence interval (DeLong) | no | no\* | yes |
153
+ | Per-subgroup Expected Calibration Error | no | no | yes |
154
+ | Subgroup significance test + multiple-comparison correction | no | no | yes |
155
+ | Subgroup-stratified recalibration (temperature / isotonic) | partial† | no | yes |
156
+ | Cross-platform five-axis protocol (CPFE) | no | no | yes (novel) |
157
+ | Per-node / federated audit | no | no | yes |
158
+ | Bias-mitigation algorithms | yes | yes | out of scope |
159
+
160
+ \* Fairlearn's `MetricFrame` computes per-subgroup AUC *point estimates* (e.g.
161
+ `roc_auc_score_group_min`), but provides no analytic (DeLong) confidence interval.
162
+ † AIF360 ships `CalibratedEqOddsPostprocessing` (calibration-aware equalized-odds
163
+ postprocessing), not a general per-subgroup temperature/isotonic recalibration interface.
164
+
165
+ **Closest related work — `meval`** (Sutariya & Petersen, 2025,
166
+ [arXiv:2512.17409](https://arxiv.org/abs/2512.17409)): a statistical toolbox for stratified,
167
+ fine-grained model-performance analysis that *also* provides subgroup metric uncertainty and
168
+ multiple-comparison corrections (with a medical-imaging focus). `fairscope` overlaps with it on
169
+ uncertainty + significance; what `fairscope` adds is the specific DeLong AUC intervals, the
170
+ per-subgroup calibration **and recalibration** interface, the five-axis cross-platform CPFE
171
+ protocol, and one-call domain audits (healthcare / lending / federated).
172
+
173
+ ## Engineering
174
+
175
+ - **Test-driven**, with regression tests anchored to authoritative reference values where they
176
+ exist (DeLong's worked example; `statsmodels` multiple-testing routines).
177
+ - **100% line coverage** on the statistical core; CI runs pytest + coverage, ruff, and black
178
+ across Python 3.9–3.12, and executes the replication notebooks via `nbmake`.
179
+ - Full type hints, NumPy-style docstrings with runnable examples, and explicit input validation
180
+ (an AUC on a single-class subgroup raises rather than returning a meaningless value).
181
+ - Committed fixtures are **small, synthetic, and labelled as such**; no datasets or trained
182
+ models are bundled.
183
+
184
+ ## Grounded in published research
185
+
186
+ `fairscope` ports methods from the author's peer-reviewed and under-review papers; it invents no
187
+ new mathematics. Each function cites its source. Full venues and identifiers are in
188
+ [`CITATION.cff`](CITATION.cff).
189
+
190
+ - Diabetes risk prediction with external validation + fairness analysis (XGBoost, NHANES→BRFSS) — IEEE CIPHER, 2026.
191
+ - A five-axis Cross-Platform Fairness Evaluation for mental-health NLP — under review.
192
+ - Privacy-preserving federated learning for diabetes risk across heterogeneous nodes — under review.
193
+ - Heterogeneous racial effects in mortgage approval (Causal Forest Double Machine Learning, HMDA) — under review.
194
+ - Racial disparities in mortgage lending (RDD / DiD / decomposition, HMDA) — under review.
195
+
196
+ ## Citation
197
+
198
+ If you use `fairscope`, please cite it via [`CITATION.cff`](CITATION.cff).
199
+
200
+ ## License
201
+
202
+ [MIT](LICENSE) © 2026 Rajveer Singh Pall · ORCID [0009-0001-6762-6134](https://orcid.org/0009-0001-6762-6134)
@@ -0,0 +1,23 @@
1
+ fairscope/__init__.py,sha256=aZb7kP8vyFmMZqXe7MdhXI5TuszHSXJNK01-m9KFJSI,1979
2
+ fairscope/core/__init__.py,sha256=BqMc2kIC0Kjm2qvm7lqKJ7cLKNLDcdXeEQ53hN0t9Dw,1420
3
+ fairscope/core/_utils.py,sha256=K7NaqaWXfoZRtIc0xvDH5q9zRVTSVi1Q1SWq4yLAbTE,1368
4
+ fairscope/core/bootstrap.py,sha256=tETKVrGRNlkktGza0ITnVLh6vJIBDAh95OqwZ5fK2eE,2244
5
+ fairscope/core/calibration.py,sha256=FyFyVjGZ0EzjKDvcO_B9Y3aSXGwA9bG6nHn4s-Touw4,6188
6
+ fairscope/core/correction.py,sha256=3t3lIfThiP9nTIRiFgPJh2tNS3AAHMxMVMvcrPCHxGo,1443
7
+ fairscope/core/delong.py,sha256=9ZNh_98Zre4oHYYQaUxT5ydIrrg-2SIE0wJkNH81Ed0,5044
8
+ fairscope/core/metrics.py,sha256=NOIlTpzsTuVYbCPPOsfG2NdmtNn1DAaO9ZyqVlNKS_4,3655
9
+ fairscope/federated/__init__.py,sha256=3ieUZDgobc2OKjBPvnKittDuxRc2ZSDwL7SR1hSkFxM,336
10
+ fairscope/federated/audit.py,sha256=_MYnn_I9dBvVqZMHjxaJhHMktNWUS69XK0pVozeeCd0,8235
11
+ fairscope/healthcare/__init__.py,sha256=rmFIfUt_Qao0kZ0b2i7pNSCSD0RKIkxT3n1oN3CnwKc,179
12
+ fairscope/healthcare/audit.py,sha256=q84h7mpfgwwfczbuOWpBwFTZzJsq7tXWSsetqtCw5dc,9180
13
+ fairscope/lending/__init__.py,sha256=nS1UJjpO12mDduDrz5pwPsSDeFZMx_GLkHM2wBwL7Uc,195
14
+ fairscope/lending/audit.py,sha256=QEHHB3tdSG2ULVAVFOuKzFlMX0v-1MWuWyzJQBtLDGc,5989
15
+ fairscope/nlp/__init__.py,sha256=OmFoKumKjdCOjsvKOaSgCUyWuF-JEILmA0tC3Bj9i50,798
16
+ fairscope/nlp/attribution.py,sha256=7cGLZ2b5lbezvFr14B81uVpHf_2Nno_4YUpqSk6Ky6I,2292
17
+ fairscope/nlp/cross_platform.py,sha256=SaBgu69GLhKbBaZCq9S8tvXZphCORpbrvTvAxz0Xdcc,7507
18
+ fairscope/nlp/metrics.py,sha256=E5BJgdp6ooQbSsbxRgqLCyeHK0BOQsSw2GlkifLH794,3405
19
+ fairscope/nlp/significance.py,sha256=TXf2LgyLMvtan3xaOCbA6j27aHH6ejxeAGIbJ6Pd6oE,1859
20
+ fairscope-0.3.0.dist-info/METADATA,sha256=MYgXmLdl_jYdYbaQZKrroRQ3CwfHDusld4HIBIBH8-Q,10074
21
+ fairscope-0.3.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
22
+ fairscope-0.3.0.dist-info/licenses/LICENSE,sha256=33pRQQTdA8whKltAeAn7UzoxgIS-y3hQWQ-ZcxB0Pss,1075
23
+ fairscope-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rajveer Singh Pall
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.