fairscope 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,217 @@
1
+ """One-call cross-node fairness audit. Composes fairscope.core; invents no statistics.
2
+
3
+ Per node: DeLong AUC confidence interval -> Expected Calibration Error -> Brier/F1.
4
+ Across nodes: max-min AUC gap and Bonferroni-corrected pairwise (unpaired) DeLong tests.
5
+ An optional per-node recalibration step reports pre/post ECE. Mirrors the cross-node
6
+ evaluation in the privacy-preserving federated-learning study.
7
+
8
+ IMPORTANT: this module audits per-node PREDICTIONS only. It does not train models, perform
9
+ secure aggregation, or provide any privacy guarantee.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from itertools import combinations
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ from sklearn.metrics import brier_score_loss, f1_score
19
+
20
+ from ..core import (
21
+ bonferroni,
22
+ delong_auc_ci,
23
+ delong_unpaired_test,
24
+ expected_calibration_error,
25
+ isotonic_recalibrate,
26
+ temperature_scale,
27
+ )
28
+
29
+
30
+ class FederatedFairnessAudit:
31
+ """Audit per-node predictions for cross-node fairness.
32
+
33
+ Parameters
34
+ ----------
35
+ node_data : dict ``{node_name: (y_true, y_score)}`` where ``y_score`` is the
36
+ positive-class probability for that node's evaluation samples.
37
+ n_bins : int, ECE bin count.
38
+ alpha : float, significance level for DeLong CIs and the Bonferroni correction.
39
+ """
40
+
41
+ def __init__(self, node_data, *, n_bins=10, alpha=0.05):
42
+ self.node_data = {
43
+ k: (np.asarray(y), np.asarray(s, dtype=float)) for k, (y, s) in node_data.items()
44
+ }
45
+ self.n_bins = n_bins
46
+ self.alpha = alpha
47
+
48
+ def run(self) -> FederatedReport:
49
+ per_node = {}
50
+ for node, (y, s) in self.node_data.items():
51
+ if len(np.unique(y)) < 2:
52
+ raise ValueError(f"node {node!r} has a single class; AUC undefined")
53
+ ci = delong_auc_ci(y, s, alpha=self.alpha)
54
+ ece = expected_calibration_error(y, s, n_bins=self.n_bins)
55
+ per_node[node] = {
56
+ "ci": ci,
57
+ "ece": float(ece),
58
+ "n": int(len(y)),
59
+ "brier": float(brier_score_loss(y, s)),
60
+ "f1": float(f1_score(y, (s >= 0.5).astype(int), zero_division=0)),
61
+ }
62
+ nodes = sorted(per_node)
63
+ pairs, pvals = [], []
64
+ for a, b in combinations(nodes, 2):
65
+ ya, sa = self.node_data[a]
66
+ yb, sb = self.node_data[b]
67
+ res = delong_unpaired_test(ya, sa, yb, sb)
68
+ pairs.append((a, b))
69
+ pvals.append(res["p_value"])
70
+ corrected = bonferroni(np.array(pvals), alpha=self.alpha) if pvals else None
71
+ return FederatedReport(
72
+ per_node,
73
+ pairs,
74
+ pvals,
75
+ corrected,
76
+ node_data=self.node_data,
77
+ alpha=self.alpha,
78
+ n_bins=self.n_bins,
79
+ )
80
+
81
+
82
+ class FederatedReport:
83
+ """Holds cross-node audit results; renders tables (and plots/PDF in later tasks)."""
84
+
85
+ def __init__(self, per_node, pairs, pvals, corrected, *, node_data, alpha, n_bins):
86
+ self.per_node = per_node
87
+ self.pairs = pairs
88
+ self.pvals = pvals
89
+ self.corrected = corrected
90
+ self._node_data = node_data
91
+ self.alpha = alpha
92
+ self.n_bins = n_bins
93
+
94
+ def to_dataframe(self) -> pd.DataFrame:
95
+ rows = []
96
+ for node, r in self.per_node.items():
97
+ ci = r["ci"]
98
+ rows.append(
99
+ {
100
+ "node": node,
101
+ "n": r["n"],
102
+ "auc": ci["auc"],
103
+ "ci_lower": ci["ci_lower"],
104
+ "ci_upper": ci["ci_upper"],
105
+ "ece": r["ece"],
106
+ "brier": r["brier"],
107
+ "f1": r["f1"],
108
+ }
109
+ )
110
+ return pd.DataFrame(rows)
111
+
112
+ def disparity(self) -> dict:
113
+ """Cross-node AUC disparity summary."""
114
+ aucs = {n: r["ci"]["auc"] for n, r in self.per_node.items()}
115
+ hi = max(aucs, key=aucs.get)
116
+ lo = min(aucs, key=aucs.get)
117
+ return {
118
+ "max_auc_gap": aucs[hi] - aucs[lo],
119
+ "best": hi,
120
+ "worst": lo,
121
+ "worst_pair": (lo, hi),
122
+ }
123
+
124
+ def recalibrate(self, method="temperature") -> dict:
125
+ """Recalibrate each node on its own (y, score) and report pre/post ECE.
126
+
127
+ ``method`` is 'temperature' (Guo et al., 2017) or 'isotonic' (Zadrozny &
128
+ Elkan, 2002) — both standard methods from ``fairscope.core``. Temperature
129
+ scaling operates on logits, so the per-node probabilities are converted to
130
+ logits first. Returns ``{node: {"ece_pre": float, "ece_post": float}}``.
131
+
132
+ NOTE: this fits and evaluates on the same per-node data (an in-sample
133
+ diagnostic). For a deployment estimate, recalibrate on a held-out split.
134
+ """
135
+ if method not in ("temperature", "isotonic"):
136
+ raise ValueError(f"unknown method: {method!r}; use 'temperature' or 'isotonic'")
137
+ out = {}
138
+ for node, (y, s) in self._node_data.items():
139
+ pre = expected_calibration_error(y, s, n_bins=self.n_bins)
140
+ if method == "temperature":
141
+ p = np.clip(s, 1e-7, 1 - 1e-7)
142
+ logits = np.log(p / (1 - p))
143
+ _, s_cal = temperature_scale(logits, y)
144
+ else: # isotonic
145
+ _, s_cal = isotonic_recalibrate(s, y)
146
+ post = expected_calibration_error(y, s_cal, n_bins=self.n_bins)
147
+ out[node] = {"ece_pre": float(pre), "ece_post": float(post)}
148
+ return out
149
+
150
+ def plot_auc_forest(self):
151
+ """Forest plot of per-node AUC with DeLong 95% CIs. Returns a Figure."""
152
+ import matplotlib.pyplot as plt
153
+
154
+ df = self.to_dataframe()
155
+ ys = np.arange(len(df))
156
+ xerr = np.vstack([df.auc - df.ci_lower, df.ci_upper - df.auc])
157
+ fig, ax = plt.subplots(figsize=(6, 0.5 * len(df) + 1.5))
158
+ ax.errorbar(df.auc, ys, xerr=xerr, fmt="o", capsize=3)
159
+ ax.set_yticks(ys)
160
+ ax.set_yticklabels(df.node)
161
+ ax.set_xlabel("AUC (95% DeLong CI)")
162
+ ax.axvline(0.5, color="gray", linestyle="--", linewidth=1)
163
+ ax.set_title("Per-node discrimination")
164
+ fig.tight_layout()
165
+ return fig
166
+
167
+ def plot_calibration(self):
168
+ """Per-node reliability curves drawn with ``core.reliability_diagram``
169
+ (federated retains each node's (y, score), so these are true curves).
170
+ Returns a Figure."""
171
+ from ..core import reliability_diagram
172
+
173
+ ys, ss, labels = [], [], []
174
+ for node, (y, s) in self._node_data.items():
175
+ ys.append(np.asarray(y))
176
+ ss.append(np.asarray(s))
177
+ labels.append(np.full(len(y), node))
178
+ return reliability_diagram(
179
+ np.concatenate(ys),
180
+ np.concatenate(ss),
181
+ groups=np.concatenate(labels),
182
+ n_bins=self.n_bins,
183
+ )
184
+
185
+ def to_pdf(self, path):
186
+ """Write a multi-page PDF: summary, per-node AUC forest, per-node calibration.
187
+ Uses matplotlib only (no extra dependency)."""
188
+ import matplotlib.pyplot as plt
189
+ from matplotlib.backends.backend_pdf import PdfPages
190
+
191
+ with PdfPages(path) as pdf:
192
+ fig0, ax = plt.subplots(figsize=(8.5, 11))
193
+ ax.axis("off")
194
+ ax.text(0.02, 0.98, self.summary(), family="monospace", va="top", fontsize=8)
195
+ pdf.savefig(fig0)
196
+ plt.close(fig0)
197
+
198
+ forest = self.plot_auc_forest()
199
+ pdf.savefig(forest)
200
+ plt.close(forest)
201
+
202
+ cal = self.plot_calibration()
203
+ cal.axes[0].set_title("Per-node calibration")
204
+ pdf.savefig(cal)
205
+ plt.close(cal)
206
+
207
+ def summary(self) -> str:
208
+ lines = [self.to_dataframe().to_string(index=False)]
209
+ d = self.disparity()
210
+ lines.append(f"cross-node AUC gap: {d['best']} vs {d['worst']} = {d['max_auc_gap']:.3f}")
211
+ if self.corrected is not None:
212
+ for (a, b), padj, rej in zip(
213
+ self.pairs, self.corrected["adjusted"], self.corrected["reject"]
214
+ ):
215
+ if rej:
216
+ lines.append(f" {a} vs {b}: Bonferroni-adjusted p={padj:.4g} (significant)")
217
+ return "\n".join(lines)
@@ -0,0 +1,5 @@
1
+ """Clinical fairness auditing built on fairscope.core."""
2
+
3
+ from .audit import HealthcareFairnessAudit, HealthcareReport
4
+
5
+ __all__ = ["HealthcareFairnessAudit", "HealthcareReport"]
@@ -0,0 +1,227 @@
1
+ """One-call clinical fairness audit. Composes fairscope.core; invents no statistics.
2
+
3
+ Pipeline per protected attribute: per-subgroup DeLong AUC CIs -> per-subgroup ECE ->
4
+ per-subgroup Brier/F1 -> Bonferroni-corrected pairwise (unpaired) DeLong tests across the
5
+ attribute's subgroups. Mirrors the analysis in the diabetes paper (IEEE CIPHER 2026).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from itertools import combinations
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+ from ..core import (
16
+ bonferroni,
17
+ delong_by_group,
18
+ delong_unpaired_test,
19
+ ece_by_group,
20
+ subgroup_metrics,
21
+ )
22
+
23
+
24
+ class HealthcareFairnessAudit:
25
+ """Audit a fitted classifier (or precomputed scores) for subgroup fairness.
26
+
27
+ Parameters
28
+ ----------
29
+ model : object with ``predict_proba`` (positive-class probability in column 1), or
30
+ ``None`` when using :meth:`from_scores`.
31
+ X_test, y_test : test features and binary labels.
32
+ protected_attr : dict ``{attribute_name: 1-D array of subgroup labels per sample}``.
33
+ """
34
+
35
+ def __init__(self, model, X_test, y_test, protected_attr, *, n_bins=10, alpha=0.05):
36
+ self.model = model
37
+ self.X_test = X_test
38
+ self.y_test = np.asarray(y_test)
39
+ self.protected_attr = protected_attr
40
+ self.n_bins = n_bins
41
+ self.alpha = alpha
42
+ self._scores = None
43
+
44
+ @classmethod
45
+ def from_scores(cls, y_true, y_score, protected_attr, *, n_bins=10, alpha=0.05):
46
+ """Build an audit from precomputed positive-class probabilities (no model)."""
47
+ obj = cls(None, None, y_true, protected_attr, n_bins=n_bins, alpha=alpha)
48
+ obj._scores = np.asarray(y_score, dtype=float)
49
+ return obj
50
+
51
+ def _get_scores(self):
52
+ if self._scores is not None:
53
+ return self._scores
54
+ if self.model is None or not hasattr(self.model, "predict_proba"):
55
+ raise ValueError(
56
+ "model must implement predict_proba(); or use "
57
+ "HealthcareFairnessAudit.from_scores(...)"
58
+ )
59
+ return np.asarray(self.model.predict_proba(self.X_test))[:, 1]
60
+
61
+ def run(self) -> HealthcareReport:
62
+ """Run the audit and return a :class:`HealthcareReport`.
63
+
64
+ Raises a clear, attribute-named ``ValueError`` if a subgroup is single-class
65
+ (AUC undefined) rather than letting a low-level error surface.
66
+ """
67
+ y = self.y_test
68
+ s = self._get_scores()
69
+ results = {}
70
+ for name, groups in self.protected_attr.items():
71
+ groups = np.asarray(groups)
72
+ try:
73
+ cis = delong_by_group(y, s, groups, alpha=self.alpha)
74
+ eces = ece_by_group(y, s, groups, n_bins=self.n_bins)
75
+ mets = subgroup_metrics(y, s, groups)
76
+ except ValueError as exc:
77
+ raise ValueError(f"protected attribute {name!r}: {exc}") from exc
78
+ vals = sorted(np.unique(groups).tolist())
79
+ pairs, pvals, deltas = [], [], []
80
+ for a, b in combinations(vals, 2):
81
+ ma, mb = groups == a, groups == b
82
+ res = delong_unpaired_test(y[ma], s[ma], y[mb], s[mb])
83
+ pairs.append((a, b))
84
+ pvals.append(res["p_value"])
85
+ deltas.append(res["delta"])
86
+ corrected = bonferroni(np.array(pvals), alpha=self.alpha) if pvals else None
87
+ results[name] = {
88
+ "groups": vals,
89
+ "ci": cis,
90
+ "ece": eces,
91
+ "metrics": mets,
92
+ "pairs": pairs,
93
+ "delta": deltas,
94
+ "p_value": pvals,
95
+ "p_adjusted": corrected["adjusted"].tolist() if corrected else [],
96
+ "reject": corrected["reject"].tolist() if corrected else [],
97
+ }
98
+ return HealthcareReport(
99
+ results, y, s, self.protected_attr, alpha=self.alpha, n_bins=self.n_bins
100
+ )
101
+
102
+ def shap_summary(self, max_samples=200):
103
+ """Mean absolute SHAP value per feature (optional). Requires
104
+ ``pip install fairscope[shap]`` and a model (not ``from_scores``). Returns a dict
105
+ ``{feature_index: mean_abs_shap}``."""
106
+ try:
107
+ import shap # noqa: F401
108
+ except ImportError as exc:
109
+ raise ImportError(
110
+ "SHAP summary requires the optional dependency: pip install fairscope[shap]"
111
+ ) from exc
112
+ return _shap_mean_abs(self.model, self.X_test, max_samples) # pragma: no cover
113
+
114
+
115
+ class HealthcareReport:
116
+ """Holds audit results and the raw (y, score, groups) needed to render reliability
117
+ curves; provides tables (here), and plots/PDF (see plotting methods)."""
118
+
119
+ def __init__(self, results, y_true, y_score, protected_attr, *, alpha=0.05, n_bins=10):
120
+ self.results = results
121
+ self.y_true = np.asarray(y_true)
122
+ self.y_score = np.asarray(y_score)
123
+ self.protected_attr = protected_attr
124
+ self.alpha = alpha
125
+ self.n_bins = n_bins
126
+
127
+ def to_dataframe(self) -> pd.DataFrame:
128
+ rows = []
129
+ for attr, r in self.results.items():
130
+ for g in r["groups"]:
131
+ ci = r["ci"][g]
132
+ m = r["metrics"][g]
133
+ rows.append(
134
+ {
135
+ "attribute": attr,
136
+ "group": g,
137
+ "n": m["n"],
138
+ "auc": ci["auc"],
139
+ "ci_lower": ci["ci_lower"],
140
+ "ci_upper": ci["ci_upper"],
141
+ "ece": r["ece"][g],
142
+ "brier": m["brier"],
143
+ "f1": m["f1"],
144
+ }
145
+ )
146
+ return pd.DataFrame(rows)
147
+
148
+ def summary(self) -> str:
149
+ """Return a human-readable summary string (no print side effect)."""
150
+ df = self.to_dataframe()
151
+ lines = [df.to_string(index=False)]
152
+ for attr, r in self.results.items():
153
+ aucs = {g: r["ci"][g]["auc"] for g in r["groups"]}
154
+ hi, lo = max(aucs, key=aucs.get), min(aucs, key=aucs.get)
155
+ lines.append(
156
+ f"[{attr}] largest AUC gap: {hi} {aucs[hi]:.3f} vs "
157
+ f"{lo} {aucs[lo]:.3f} (delta={aucs[hi] - aucs[lo]:.3f})"
158
+ )
159
+ for (a, b), padj, rej in zip(r["pairs"], r["p_adjusted"], r["reject"]):
160
+ if rej:
161
+ lines.append(f" {a} vs {b}: Bonferroni-adjusted p={padj:.4g} (significant)")
162
+ return "\n".join(lines)
163
+
164
+ def plot_auc_forest(self, attribute=None):
165
+ """Forest plot of per-subgroup AUC with DeLong 95% CIs. Returns a Figure."""
166
+ import matplotlib.pyplot as plt
167
+
168
+ df = self.to_dataframe()
169
+ if attribute is not None:
170
+ df = df[df.attribute == attribute]
171
+ labels = [f"{a}:{g}" for a, g in zip(df.attribute, df.group)]
172
+ ys = np.arange(len(df))
173
+ xerr = np.vstack([df.auc - df.ci_lower, df.ci_upper - df.auc])
174
+ fig, ax = plt.subplots(figsize=(6, 0.5 * len(df) + 1.5))
175
+ ax.errorbar(df.auc, ys, xerr=xerr, fmt="o", capsize=3)
176
+ ax.set_yticks(ys)
177
+ ax.set_yticklabels(labels)
178
+ ax.set_xlabel("AUC (95% DeLong CI)")
179
+ ax.axvline(0.5, color="gray", linestyle="--", linewidth=1)
180
+ ax.set_title("Per-subgroup discrimination")
181
+ fig.tight_layout()
182
+ return fig
183
+
184
+ def plot_calibration(self, attribute=None):
185
+ """Reliability curves per subgroup for one attribute, drawn with
186
+ ``core.reliability_diagram``. Returns a Figure."""
187
+ from ..core import reliability_diagram
188
+
189
+ attr = attribute if attribute is not None else next(iter(self.protected_attr))
190
+ groups = np.asarray(self.protected_attr[attr])
191
+ return reliability_diagram(self.y_true, self.y_score, groups=groups, n_bins=self.n_bins)
192
+
193
+ def to_pdf(self, path):
194
+ """Write a multi-page PDF: summary, AUC forest, and one calibration page per
195
+ attribute. Uses matplotlib only (no extra dependency)."""
196
+ import matplotlib.pyplot as plt
197
+ from matplotlib.backends.backend_pdf import PdfPages
198
+
199
+ with PdfPages(path) as pdf:
200
+ fig0, ax = plt.subplots(figsize=(8.5, 11))
201
+ ax.axis("off")
202
+ ax.text(0.02, 0.98, self.summary(), family="monospace", va="top", fontsize=8)
203
+ pdf.savefig(fig0)
204
+ plt.close(fig0)
205
+
206
+ forest = self.plot_auc_forest()
207
+ pdf.savefig(forest)
208
+ plt.close(forest)
209
+
210
+ for attr in self.protected_attr:
211
+ cal = self.plot_calibration(attr)
212
+ cal.axes[0].set_title(f"Calibration: {attr}")
213
+ pdf.savefig(cal)
214
+ plt.close(cal)
215
+
216
+
217
+ def _shap_mean_abs(model, X_test, max_samples): # pragma: no cover - needs optional shap
218
+ import shap
219
+
220
+ if model is None:
221
+ raise ValueError("shap_summary requires a model (not from_scores).")
222
+ X = np.asarray(X_test)[:max_samples]
223
+ explainer = shap.Explainer(model.predict_proba, X)
224
+ values = explainer(X).values
225
+ vals = np.abs(values).mean(axis=0)
226
+ vals = vals[:, 1] if vals.ndim == 2 else vals
227
+ return {i: float(v) for i, v in enumerate(np.ravel(vals))}
@@ -0,0 +1,5 @@
1
+ """Mortgage-lending fairness auditing built on fairscope.core (+ optional econml)."""
2
+
3
+ from .audit import LendingFairnessAudit, LendingReport
4
+
5
+ __all__ = ["LendingFairnessAudit", "LendingReport"]
@@ -0,0 +1,160 @@
1
+ """One-call lending fairness audit.
2
+
3
+ Annual approval-gap analysis composes fairscope.core and is purely descriptive (no causal
4
+ claim). Subgroup CATE estimation (Task 2) wraps econml under stated DML assumptions.
5
+ Mirrors the analyses in P1 (Causal Forest DML, HMDA) and P2 (descriptive disparities).
6
+ Ships no HMDA data and no model.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+
14
+ from ..core import disparate_impact
15
+
16
+
17
+ class LendingFairnessAudit:
18
+ """Audit mortgage-approval outcomes for annual subgroup disparities.
19
+
20
+ Parameters
21
+ ----------
22
+ approved : 1-D binary array (1 = approved).
23
+ group : 1-D array of protected-group labels, aligned with ``approved``.
24
+ year : 1-D array of years, aligned with ``approved``.
25
+ reference : the group label to compare every other group against. Must be present
26
+ in each year.
27
+ alpha : significance level (reserved for the CATE step).
28
+ """
29
+
30
+ def __init__(self, *, approved, group, year, reference, alpha=0.05):
31
+ self.approved = np.asarray(approved)
32
+ self.group = np.asarray(group)
33
+ self.year = np.asarray(year)
34
+ self.reference = reference
35
+ self.alpha = alpha
36
+
37
+ @classmethod
38
+ def from_outcomes(cls, approved, group, year, *, reference, alpha=0.05):
39
+ """Build an audit from precomputed approval outcomes."""
40
+ return cls(approved=approved, group=group, year=year, reference=reference, alpha=alpha)
41
+
42
+ def estimate_cate(
43
+ self,
44
+ X,
45
+ *,
46
+ treatment=None,
47
+ outcome=None,
48
+ model_y=None,
49
+ model_t=None,
50
+ n_estimators=500,
51
+ random_state=0,
52
+ ):
53
+ """Per-subgroup conditional average treatment effect (CATE) of the protected
54
+ attribute on approval, via Causal Forest DML (``econml.dml.CausalForestDML``,
55
+ as in P1).
56
+
57
+ The CAUSAL CLAIM IS CONDITIONAL on the DML assumptions (unconfoundedness given
58
+ the supplied features ``X``, and overlap). This estimates an effect under those
59
+ assumptions; it does not, on its own, prove discrimination.
60
+
61
+ Parameters
62
+ ----------
63
+ X : array (n, k) of heterogeneity features.
64
+ treatment : binary array; defaults to ``group != reference``.
65
+ outcome : binary array; defaults to the ``approved`` outcomes.
66
+ model_y, model_t : nuisance estimators; default to random forests.
67
+
68
+ Returns
69
+ -------
70
+ dict
71
+ Keys ``ate``, ``effect``, and ``effect_interval``.
72
+
73
+ Notes
74
+ -----
75
+ Requires the optional dependency ``econml`` (``pip install fairscope[lending]``).
76
+ """
77
+ try:
78
+ import econml.dml # noqa: F401
79
+ except ImportError as exc: # optional dependency
80
+ raise ImportError(
81
+ "Subgroup CATE requires the optional dependency: " "pip install fairscope[lending]"
82
+ ) from exc
83
+ return _causal_forest_cate( # pragma: no cover - exercised only with econml installed
84
+ self, X, treatment, outcome, model_y, model_t, n_estimators, random_state
85
+ )
86
+
87
+ def run(self) -> LendingReport:
88
+ rows = []
89
+ for yr in sorted(np.unique(self.year).tolist()):
90
+ m = self.year == yr
91
+ approved_y = self.approved[m]
92
+ group_y = self.group[m]
93
+ for g in sorted(np.unique(group_y).tolist()):
94
+ sel = group_y == g
95
+ rate = float(approved_y[sel].mean())
96
+ di = float(disparate_impact(approved_y, group_y, g, self.reference))
97
+ rows.append(
98
+ {
99
+ "year": yr,
100
+ "group": g,
101
+ "n": int(sel.sum()),
102
+ "approval_rate": rate,
103
+ "disparate_impact": di,
104
+ }
105
+ )
106
+ return LendingReport(pd.DataFrame(rows), reference=self.reference, alpha=self.alpha)
107
+
108
+
109
+ class LendingReport:
110
+ """Holds the annual approval-gap table (CATE results are returned separately)."""
111
+
112
+ def __init__(self, df, *, reference, alpha=0.05):
113
+ self._df = df
114
+ self.reference = reference
115
+ self.alpha = alpha
116
+
117
+ def to_dataframe(self) -> pd.DataFrame:
118
+ return self._df.copy()
119
+
120
+ def summary(self) -> str:
121
+ lines = [self._df.to_string(index=False)]
122
+ nonref = self._df[self._df.group != self.reference]
123
+ if not nonref.empty:
124
+ worst = nonref.nsmallest(1, "disparate_impact").iloc[0]
125
+ lines.append(
126
+ f"largest gap: {worst.group} in {int(worst.year)} "
127
+ f"DI={worst.disparate_impact:.3f} (approval {worst.approval_rate:.3f})"
128
+ )
129
+ return "\n".join(lines)
130
+
131
+
132
+ def _causal_forest_cate( # pragma: no cover - exercised only with econml installed
133
+ audit, X, treatment, outcome, model_y, model_t, n_estimators, random_state
134
+ ):
135
+ """Causal Forest DML CATE. Isolated so the optional-econml path is excluded from
136
+ coverage in CI (econml is not installed there); the importorskip tests exercise it
137
+ locally and for any contributor who installs ``fairscope[lending]``."""
138
+ from econml.dml import CausalForestDML
139
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
140
+
141
+ if treatment is None:
142
+ t = (audit.group != audit.reference).astype(int)
143
+ else:
144
+ t = np.asarray(treatment)
145
+ y = audit.approved if outcome is None else np.asarray(outcome)
146
+ x = np.asarray(X)
147
+ est = CausalForestDML(
148
+ model_y=model_y or RandomForestRegressor(random_state=random_state),
149
+ model_t=model_t or RandomForestClassifier(random_state=random_state),
150
+ discrete_treatment=True,
151
+ n_estimators=n_estimators,
152
+ random_state=random_state,
153
+ )
154
+ est.fit(y, t, X=x)
155
+ lo, hi = est.effect_interval(x, alpha=audit.alpha)
156
+ return {
157
+ "ate": float(est.ate(x)),
158
+ "effect": est.effect(x),
159
+ "effect_interval": (lo, hi),
160
+ }
@@ -0,0 +1,29 @@
1
+ """Cross-Platform Fairness Evaluation (CPFE) for NLP, built on fairscope.core.
2
+
3
+ Public API: the five-axis ``CPFEProtocol``/``CPFEReport``, the multiclass metric and
4
+ significance primitives (axes 1-4), and the attribution-stability functions (axis 5).
5
+ """
6
+
7
+ from .attribution import jaccard_topk, token_saliency
8
+ from .cross_platform import CPFEProtocol, CPFEReport
9
+ from .metrics import (
10
+ macro_auc,
11
+ macro_f1,
12
+ multiclass_ece,
13
+ per_class_disparate_impact,
14
+ per_class_equalized_odds,
15
+ )
16
+ from .significance import bootstrap_macro_auc_test
17
+
18
+ __all__ = [
19
+ "CPFEProtocol",
20
+ "CPFEReport",
21
+ "macro_auc",
22
+ "macro_f1",
23
+ "multiclass_ece",
24
+ "per_class_disparate_impact",
25
+ "per_class_equalized_odds",
26
+ "bootstrap_macro_auc_test",
27
+ "jaccard_topk",
28
+ "token_saliency",
29
+ ]