hapc 2.0.2__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hapc-2.0.2/python/hapc.egg-info → hapc-2.3.0}/PKG-INFO +1 -1
- {hapc-2.0.2 → hapc-2.3.0}/pyproject.toml +1 -1
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc/__init__.py +1 -1
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc/ate.py +55 -21
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc/cv.py +12 -5
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc/single.py +128 -26
- {hapc-2.0.2 → hapc-2.3.0/python/hapc.egg-info}/PKG-INFO +1 -1
- {hapc-2.0.2 → hapc-2.3.0}/src/bindings.cpp +5 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/hapc_core.hpp +8 -1
- {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_cv_classi_cpp.cpp +111 -41
- {hapc-2.0.2 → hapc-2.3.0}/src/r_bindings.cpp +23 -11
- {hapc-2.0.2 → hapc-2.3.0}/tests/test_ate_hapc_diagnostics_example.py +12 -8
- {hapc-2.0.2 → hapc-2.3.0}/CMakeLists.txt +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/LICENSE +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/MANIFEST.in +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/README.md +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc/core.py +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/SOURCES.txt +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/dependency_links.txt +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/not-zip-safe +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/requires.txt +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/top_level.txt +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/setup.cfg +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/setup.py +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/cross_kernel.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/cv_classi.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/cv_fast_pchal.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/cv_fast_pchal_python.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/fast_pchal.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/logistic_call.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/mkernel.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_call.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_classi_call.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_cv.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_cv_cpp.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/pchal_design.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/ridge_wrappers.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/single_pcghal_cpp.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/src/single_pchar.cpp +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/tests/test_api.py +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/tests/test_ate.py +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/tests/test_core.py +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/tests/test_logistic_regression.py +0 -0
- {hapc-2.0.2 → hapc-2.3.0}/tests/test_r_vs_python_alpha.py +0 -0
|
@@ -16,9 +16,9 @@ Provides :func:`ate_hapc`, a high-level convenience wrapper that:
|
|
|
16
16
|
which ``|mean(EIF)| ≤ σ / (√n · log n)``. This is the **undersmoothed**
|
|
17
17
|
outcome model. If no λ in the grid meets the threshold, the smallest λ
|
|
18
18
|
is used.
|
|
19
|
-
5. Returns
|
|
20
|
-
``(1 - alpha)`` Wald confidence interval
|
|
21
|
-
that
|
|
19
|
+
5. Returns a **doubly robust** ATE point estimate at the undersmoothed outcome
|
|
20
|
+
model and a ``(1 - alpha)`` Wald confidence interval from the EIF evaluated
|
|
21
|
+
at that estimate (see Notes).
|
|
22
22
|
|
|
23
23
|
The function does not implement sample splitting / cross-fitting:
|
|
24
24
|
nuisances are fit on the full sample and the EIF is evaluated on the same
|
|
@@ -47,8 +47,9 @@ class ATEResult(NamedTuple):
|
|
|
47
47
|
Attributes
|
|
48
48
|
----------
|
|
49
49
|
estimate : float
|
|
50
|
-
|
|
51
|
-
``mean(
|
|
50
|
+
Doubly robust (AIPW-style) ATE at the undersmoothed outcome model:
|
|
51
|
+
``mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``, matching the
|
|
52
|
+
efficient influence function used for the Wald interval (see Notes).
|
|
52
53
|
lower : float
|
|
53
54
|
Lower endpoint of the ``(1 - alpha)`` Wald confidence interval.
|
|
54
55
|
upper : float
|
|
@@ -228,15 +229,25 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
|
228
229
|
specified).
|
|
229
230
|
2. Fix the propensity at its CV-best λ; refit on the full sample to
|
|
230
231
|
obtain ``π̂(W_i) = P(A=1 | W_i)``.
|
|
231
|
-
3. At the CV-best outcome λ, compute
|
|
232
|
-
|
|
232
|
+
3. At the CV-best outcome λ, compute a **plugin-centered** influence vector
|
|
233
|
+
(same mean as the DR EIF at :math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`)
|
|
234
|
+
and let ``σ = std(·)``.
|
|
233
235
|
4. Threshold ``τ = σ / (√n · log n)``.
|
|
234
236
|
5. Walk the **outcome** λ grid in **decreasing**
|
|
235
237
|
order; pick the first (largest) λ for which
|
|
236
238
|
``|mean(EIF_diff)| ≤ τ`` — call it ``λ_u``.
|
|
237
|
-
6.
|
|
238
|
-
|
|
239
|
-
at
|
|
239
|
+
6. **Doubly robust** point estimate (same nuisances ``(π̂, μ̂₁, μ̂₀)``):
|
|
240
|
+
``ψ̂ = mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``.
|
|
241
|
+
One-step influence function (centered at ``ψ̂``):
|
|
242
|
+
``φ_i = A_i/π̂_i·(Y_i-μ̂_{1i}) + μ̂_{1i} - (1-A_i)/(1-π̂_i)·(Y_i-μ̂_{0i})
|
|
243
|
+
- μ̂_{0i} - ψ̂``.
|
|
244
|
+
CI: ``ψ̂ ± z_{1-α/2} · std(φ) / √n``.
|
|
245
|
+
|
|
246
|
+
This contrasts with **plug-in** G-computation ``mean(μ̂₁(W)-μ̂₀(W))``,
|
|
247
|
+
which can be materially biased when both nuisances are estimated on the
|
|
248
|
+
same sample and the outcome regressions are regularized. The DR
|
|
249
|
+
``ψ̂`` is consistent if **either** the propensity **or** the pair
|
|
250
|
+
``(μ̂₁, μ̂₀)`` is correctly specified (standard double robustness).
|
|
240
251
|
|
|
241
252
|
Examples
|
|
242
253
|
--------
|
|
@@ -329,38 +340,60 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
|
329
340
|
)
|
|
330
341
|
return p[:n], p[n:]
|
|
331
342
|
|
|
332
|
-
def
|
|
343
|
+
def _eif_plugin_centered(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
|
|
344
|
+
"""Plugin-centered influence vector (undersmoothing gate only).
|
|
345
|
+
|
|
346
|
+
Its mean matches the DR EIF evaluated at plug-in
|
|
347
|
+
:math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`. The returned ATE
|
|
348
|
+
uses ``_psi_dr`` / ``_eif_dr`` instead.
|
|
349
|
+
"""
|
|
333
350
|
eif1 = (A01 / pi1) * (Y - mu1) - (mu1 - mu1.mean())
|
|
334
351
|
eif0 = ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0) - (mu0 - mu0.mean())
|
|
335
352
|
return eif1 - eif0
|
|
336
353
|
|
|
354
|
+
def _psi_dr(mu1: np.ndarray, mu0: np.ndarray) -> float:
|
|
355
|
+
return float(
|
|
356
|
+
np.mean(
|
|
357
|
+
(A01 / pi1) * (Y - mu1)
|
|
358
|
+
+ mu1
|
|
359
|
+
- ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
|
|
360
|
+
- mu0
|
|
361
|
+
)
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
def _eif_dr(mu1: np.ndarray, mu0: np.ndarray, psi: float) -> np.ndarray:
|
|
365
|
+
return (
|
|
366
|
+
(A01 / pi1) * (Y - mu1)
|
|
367
|
+
+ mu1
|
|
368
|
+
- ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
|
|
369
|
+
- mu0
|
|
370
|
+
- psi
|
|
371
|
+
)
|
|
372
|
+
|
|
337
373
|
# --- 3. σ at CV configuration → threshold τ ----------------------------
|
|
338
374
|
mu1_cv, mu0_cv = _mu_pair(lam_out_cv)
|
|
339
|
-
eif_cv =
|
|
375
|
+
eif_cv = _eif_plugin_centered(mu1_cv, mu0_cv)
|
|
340
376
|
sigma_cv = float(np.std(eif_cv, ddof=0))
|
|
341
377
|
threshold = sigma_cv / (np.sqrt(n) * np.log(n))
|
|
342
378
|
|
|
343
379
|
# --- 4. Undersmoothing sweep: largest λ → smallest --------------------
|
|
344
380
|
lam_und: Optional[float] = None
|
|
345
|
-
eif_und: Optional[np.ndarray] = None
|
|
346
381
|
mu1_und = mu0_und = None
|
|
347
382
|
for lam in np.sort(lambdas_out)[::-1]:
|
|
348
383
|
try:
|
|
349
384
|
mu1, mu0 = _mu_pair(float(lam))
|
|
350
385
|
except Exception:
|
|
351
386
|
continue
|
|
352
|
-
eif =
|
|
387
|
+
eif = _eif_plugin_centered(mu1, mu0)
|
|
353
388
|
if abs(eif.mean()) <= threshold:
|
|
354
389
|
lam_und = float(lam)
|
|
355
390
|
mu1_und, mu0_und = mu1, mu0
|
|
356
|
-
eif_und = eif
|
|
357
391
|
break
|
|
358
392
|
|
|
359
|
-
if
|
|
393
|
+
if lam_und is None:
|
|
360
394
|
# Threshold never met → fall back to the smallest λ in the grid.
|
|
361
395
|
lam_und = float(lambdas_out.min())
|
|
362
396
|
mu1_und, mu0_und = _mu_pair(lam_und)
|
|
363
|
-
eif_und = _eif_diff(mu1_und, mu0_und)
|
|
364
397
|
|
|
365
398
|
if plot_diagnostics:
|
|
366
399
|
t_lams: list[float] = []
|
|
@@ -370,7 +403,7 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
|
370
403
|
mu1, mu0 = _mu_pair(float(lam))
|
|
371
404
|
except Exception:
|
|
372
405
|
continue
|
|
373
|
-
eif =
|
|
406
|
+
eif = _eif_plugin_centered(mu1, mu0)
|
|
374
407
|
t_lams.append(float(lam))
|
|
375
408
|
t_abs.append(float(np.abs(eif.mean())))
|
|
376
409
|
_plot_ate_diagnostics(
|
|
@@ -379,9 +412,10 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
|
379
412
|
lam_prop_cv, lam_out_cv, lam_und, threshold,
|
|
380
413
|
)
|
|
381
414
|
|
|
382
|
-
# --- 5.
|
|
383
|
-
psi =
|
|
384
|
-
|
|
415
|
+
# --- 5. Doubly robust point estimate + (1 - alpha) Wald CI --------------
|
|
416
|
+
psi = _psi_dr(mu1_und, mu0_und)
|
|
417
|
+
eif_dr = _eif_dr(mu1_und, mu0_und, psi)
|
|
418
|
+
sigma_und = float(np.std(eif_dr, ddof=0))
|
|
385
419
|
z = float(_normal.ppf(1.0 - alpha / 2.0))
|
|
386
420
|
half = z * sigma_und / np.sqrt(n)
|
|
387
421
|
|
|
@@ -18,7 +18,11 @@ import numpy as np
|
|
|
18
18
|
|
|
19
19
|
from . import hapc_core
|
|
20
20
|
from .core import _C, cross_kernel_hapc, design_hapc
|
|
21
|
-
from .single import
|
|
21
|
+
from .single import (
|
|
22
|
+
_check_binomial_labels,
|
|
23
|
+
_to_soft01,
|
|
24
|
+
single_pcghal_classification_lasso,
|
|
25
|
+
)
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
class CVResult(NamedTuple):
|
|
@@ -376,6 +380,9 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
|
|
|
376
380
|
if not np.all(lams > 0):
|
|
377
381
|
raise ValueError("All lambdas must be > 0 for logistic LASSO.")
|
|
378
382
|
|
|
383
|
+
# Soft target in [0,1] used for the held-out cross-entropy deviance
|
|
384
|
+
# (accepts hard {0,1}/{-1,+1} or fractional EM-HAL posteriors).
|
|
385
|
+
q = _to_soft01(Y)
|
|
379
386
|
folds = _native_folds(n, int(nfolds))
|
|
380
387
|
L = lams.size
|
|
381
388
|
fold_dev = np.full((int(nfolds), L), np.nan)
|
|
@@ -386,7 +393,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
|
|
|
386
393
|
if te.size == 0 or tr.size == 0:
|
|
387
394
|
continue
|
|
388
395
|
Xtr, Ytr = X[tr], Y[tr]
|
|
389
|
-
Xte, Yte = X[te],
|
|
396
|
+
Xte, Yte = X[te], q[te]
|
|
390
397
|
|
|
391
398
|
for j, lam in enumerate(lams):
|
|
392
399
|
res = single_pcghal_classification_lasso(
|
|
@@ -395,9 +402,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
|
|
|
395
402
|
verbose=bool(verbose), max_iter=int(max_iter),
|
|
396
403
|
)
|
|
397
404
|
probs = np.clip(res.probabilities, 1e-15, 1 - 1e-15)
|
|
398
|
-
|
|
399
|
-
else (Yte > 0).astype(np.float64)
|
|
400
|
-
dev = -(yte01 * np.log(probs) + (1 - yte01) * np.log(1 - probs))
|
|
405
|
+
dev = -(Yte * np.log(probs) + (1 - Yte) * np.log(1 - probs))
|
|
401
406
|
fold_dev[k - 1, j] = float(dev.mean())
|
|
402
407
|
|
|
403
408
|
deviances = np.nanmean(fold_dev, axis=0)
|
|
@@ -500,6 +505,8 @@ def cv_hapc(X: np.ndarray, Y: np.ndarray,
|
|
|
500
505
|
lams = _grid(None, log_lambda_min, log_lambda_max, grid_length)
|
|
501
506
|
|
|
502
507
|
if family == "binomial":
|
|
508
|
+
# Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
|
|
509
|
+
_check_binomial_labels(Y, norm)
|
|
503
510
|
if norm in {"sv", "2"}:
|
|
504
511
|
return pcghal_cv_classi(
|
|
505
512
|
X, Y, max_degree=max_degree, npcs=npcs,
|
|
@@ -95,6 +95,79 @@ def _to_pm1(Y: np.ndarray, *, verbose: bool = False) -> np.ndarray:
|
|
|
95
95
|
)
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
def _label_kind(Y: np.ndarray) -> str:
|
|
99
|
+
"""Classify a binomial response vector.
|
|
100
|
+
|
|
101
|
+
Returns ``"01"`` (hard labels in ``{0,1}``), ``"pm1"`` (hard labels in
|
|
102
|
+
``{-1,+1}``), or ``"soft"`` (fractional labels in ``[0,1]``, e.g. EM-HAL
|
|
103
|
+
E-step posteriors). Raises ``ValueError`` if any value falls outside
|
|
104
|
+
``[0,1]`` and the set is not exactly ``{-1,+1}``.
|
|
105
|
+
"""
|
|
106
|
+
Y = np.asarray(Y, dtype=np.float64).ravel()
|
|
107
|
+
u = np.unique(Y[~np.isnan(Y)])
|
|
108
|
+
s = set(u.tolist())
|
|
109
|
+
if s.issubset({0.0, 1.0}):
|
|
110
|
+
return "01"
|
|
111
|
+
if s == {-1.0, 1.0}:
|
|
112
|
+
return "pm1"
|
|
113
|
+
if u.size and u.min() >= 0.0 and u.max() <= 1.0:
|
|
114
|
+
return "soft"
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"family='binomial' requires Y in {0,1}, {-1,+1}, or soft labels in "
|
|
117
|
+
"[0,1]; found values outside [0,1]."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _to_soft01(Y: np.ndarray) -> np.ndarray:
|
|
122
|
+
"""Map a binomial response to a soft cross-entropy target in ``[0,1]``."""
|
|
123
|
+
Y = np.asarray(Y, dtype=np.float64).ravel()
|
|
124
|
+
return (Y + 1.0) / 2.0 if _label_kind(Y) == "pm1" else Y
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _check_binomial_labels(Y: np.ndarray, norm: str) -> str:
|
|
128
|
+
"""Validate labels and enforce the soft-label norm restriction.
|
|
129
|
+
|
|
130
|
+
Soft labels (any value strictly inside ``(0,1)``) are supported only for
|
|
131
|
+
``norm`` in ``{"1","2"}``; ``norm="sv"`` raises ``NotImplementedError``.
|
|
132
|
+
A warning is emitted whenever soft labels are detected. Returns the label
|
|
133
|
+
kind from :func:`_label_kind`.
|
|
134
|
+
"""
|
|
135
|
+
import warnings
|
|
136
|
+
|
|
137
|
+
kind = _label_kind(Y)
|
|
138
|
+
if kind == "soft":
|
|
139
|
+
if norm == "sv":
|
|
140
|
+
raise NotImplementedError(
|
|
141
|
+
"Soft labels (Y in (0,1)) are not implemented for norm='sv'; "
|
|
142
|
+
"use norm='1' or norm='2'."
|
|
143
|
+
)
|
|
144
|
+
warnings.warn(
|
|
145
|
+
"Non-binary labels detected in Y: treating them as soft labels in "
|
|
146
|
+
"[0,1] (cross-entropy target). Supported only for norm='1' and "
|
|
147
|
+
"norm='2'.",
|
|
148
|
+
stacklevel=2,
|
|
149
|
+
)
|
|
150
|
+
return kind
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _calibrate_logistic_intercept(y01: np.ndarray, eta: np.ndarray) -> float:
|
|
154
|
+
"""Newton calibration for intercept with fixed linear predictor ``eta``."""
|
|
155
|
+
y01 = np.asarray(y01, dtype=np.float64).ravel()
|
|
156
|
+
eta = np.asarray(eta, dtype=np.float64).ravel()
|
|
157
|
+
if y01.shape != eta.shape:
|
|
158
|
+
raise ValueError("y01 and eta must have the same shape")
|
|
159
|
+
b0 = 0.0
|
|
160
|
+
for _ in range(50):
|
|
161
|
+
z = eta + b0
|
|
162
|
+
p = 1.0 / (1.0 + np.exp(-z))
|
|
163
|
+
g = float(np.sum(p - y01))
|
|
164
|
+
h = float(np.sum(p * (1.0 - p)))
|
|
165
|
+
if abs(g) < 1e-10 or h < 1e-12:
|
|
166
|
+
break
|
|
167
|
+
b0 -= g / h
|
|
168
|
+
return float(b0)
|
|
169
|
+
|
|
170
|
+
|
|
98
171
|
# ---------------------------------------------------------------------------
|
|
99
172
|
# Single λ — gaussian, norm in {"1", "2"} (closed-form)
|
|
100
173
|
# ---------------------------------------------------------------------------
|
|
@@ -299,6 +372,14 @@ def single_pcghal_classification(
|
|
|
299
372
|
res = pcghal_classification(Y_pm1, Xtilde, ENn, alpha0,
|
|
300
373
|
max_iter=max_iter, tol=tol,
|
|
301
374
|
step_factor=step_factor, verbose=verbose)
|
|
375
|
+
y01 = (Y_pm1 > 0).astype(np.float64)
|
|
376
|
+
eta_train = Xtilde @ np.asarray(res.alpha).ravel()
|
|
377
|
+
b0 = _calibrate_logistic_intercept(y01, eta_train)
|
|
378
|
+
ymu = Y_pm1 * (eta_train + b0)
|
|
379
|
+
risk = float(
|
|
380
|
+
np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
|
|
381
|
+
.mean()
|
|
382
|
+
)
|
|
302
383
|
|
|
303
384
|
predictions = probabilities = predicted_classes = None
|
|
304
385
|
if predict is not None:
|
|
@@ -307,7 +388,7 @@ def single_pcghal_classification(
|
|
|
307
388
|
raise ValueError(f"predict must have {p} columns")
|
|
308
389
|
Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
|
|
309
390
|
v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * res.alpha)
|
|
310
|
-
log_odds = Ktest @ v
|
|
391
|
+
log_odds = Ktest @ v + b0
|
|
311
392
|
predictions = log_odds
|
|
312
393
|
probabilities = 1.0 / (1.0 + np.exp(-log_odds))
|
|
313
394
|
predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
|
|
@@ -315,7 +396,7 @@ def single_pcghal_classification(
|
|
|
315
396
|
return SinglePcghalClassificationResult(
|
|
316
397
|
alpha=res.alpha, predictions=predictions,
|
|
317
398
|
probabilities=probabilities, predicted_classes=predicted_classes,
|
|
318
|
-
lambda_=float(lambda_), risk=
|
|
399
|
+
lambda_=float(lambda_), risk=risk, iter=res.iter,
|
|
319
400
|
)
|
|
320
401
|
|
|
321
402
|
|
|
@@ -341,22 +422,21 @@ def single_pcghal_classification_ridge_only(
|
|
|
341
422
|
SinglePcghalClassificationResult
|
|
342
423
|
"""
|
|
343
424
|
X, Y, n, p = _check_xy(X, Y)
|
|
344
|
-
|
|
425
|
+
# Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
|
|
426
|
+
y01 = _to_soft01(Y)
|
|
345
427
|
|
|
346
428
|
des = design_hapc(X, max_degree, npcs, center=center)
|
|
347
429
|
final_npc = des.d.shape[0]
|
|
348
430
|
Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
|
|
349
431
|
|
|
350
432
|
alpha = np.asarray(
|
|
351
|
-
hapc_core.
|
|
433
|
+
hapc_core.logistic_ridge_init_y01(_C(y01), _C(Xtilde), float(lambda_))
|
|
352
434
|
).ravel()
|
|
353
435
|
|
|
354
436
|
eta = Xtilde @ alpha
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
.mean()
|
|
359
|
-
)
|
|
437
|
+
b0 = _calibrate_logistic_intercept(y01, eta)
|
|
438
|
+
phat = np.clip(1.0 / (1.0 + np.exp(-(eta + b0))), 1e-15, 1 - 1e-15)
|
|
439
|
+
risk = float((-(y01 * np.log(phat) + (1 - y01) * np.log(1 - phat))).mean())
|
|
360
440
|
|
|
361
441
|
predictions = probabilities = predicted_classes = None
|
|
362
442
|
if predict is not None:
|
|
@@ -365,7 +445,7 @@ def single_pcghal_classification_ridge_only(
|
|
|
365
445
|
raise ValueError(f"predict must have {p} columns")
|
|
366
446
|
Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
|
|
367
447
|
v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
|
|
368
|
-
log_odds = Ktest @ v
|
|
448
|
+
log_odds = Ktest @ v + b0
|
|
369
449
|
predictions = log_odds
|
|
370
450
|
probabilities = 1.0 / (1.0 + np.exp(-log_odds))
|
|
371
451
|
predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
|
|
@@ -452,13 +532,26 @@ def single_pcghal_classification_lasso(
|
|
|
452
532
|
raise ValueError(f"lambda_ must be > 0 for LASSO; got {lambda_}")
|
|
453
533
|
|
|
454
534
|
X, Y, n, p = _check_xy(X, Y)
|
|
455
|
-
|
|
456
|
-
|
|
535
|
+
# Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
|
|
536
|
+
q = _to_soft01(Y)
|
|
457
537
|
|
|
458
538
|
des = design_hapc(X, max_degree, npcs, center=center)
|
|
459
539
|
final_npc = des.d.shape[0]
|
|
460
540
|
Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
|
|
461
541
|
|
|
542
|
+
# For soft labels, replicate each row as a (label=1, weight=q) and
|
|
543
|
+
# (label=0, weight=1-q) pair so the sample-weighted logistic loss equals
|
|
544
|
+
# the soft cross-entropy. On hard labels this reduces to the plain fit.
|
|
545
|
+
is_soft = bool(np.any((q > 1e-12) & (q < 1.0 - 1e-12)))
|
|
546
|
+
if is_soft:
|
|
547
|
+
Xfit = _C(np.vstack([Xtilde, Xtilde]))
|
|
548
|
+
yfit = np.concatenate([np.ones(n), np.zeros(n)]).astype(np.int64)
|
|
549
|
+
wfit = np.concatenate([q, 1.0 - q]).astype(np.float64)
|
|
550
|
+
else:
|
|
551
|
+
Xfit = _C(Xtilde)
|
|
552
|
+
yfit = (q > 0.5).astype(np.int64)
|
|
553
|
+
wfit = None
|
|
554
|
+
|
|
462
555
|
C = 1.0 / (n * float(lambda_))
|
|
463
556
|
# sklearn>=1.8 deprecated penalty="l1" in favour of l1_ratio=1 with the
|
|
464
557
|
# liblinear solver; older versions still need penalty="l1". Try the new
|
|
@@ -467,23 +560,28 @@ def single_pcghal_classification_lasso(
|
|
|
467
560
|
sig_params = inspect.signature(LogisticRegression).parameters
|
|
468
561
|
common_kw = dict(solver="liblinear", C=C, fit_intercept=False,
|
|
469
562
|
max_iter=int(max_iter))
|
|
563
|
+
|
|
564
|
+
def _fit(**ctor):
|
|
565
|
+
m = LogisticRegression(**ctor, **common_kw)
|
|
566
|
+
if wfit is None:
|
|
567
|
+
m.fit(Xfit, yfit)
|
|
568
|
+
else:
|
|
569
|
+
m.fit(Xfit, yfit, sample_weight=wfit)
|
|
570
|
+
return m
|
|
571
|
+
|
|
470
572
|
if "l1_ratio" in sig_params and "penalty" in sig_params:
|
|
471
573
|
try:
|
|
472
|
-
model =
|
|
473
|
-
model.fit(_C(Xtilde), Y_01)
|
|
574
|
+
model = _fit(l1_ratio=1.0)
|
|
474
575
|
except (TypeError, ValueError):
|
|
475
|
-
model =
|
|
476
|
-
model.fit(_C(Xtilde), Y_01)
|
|
576
|
+
model = _fit(penalty="l1")
|
|
477
577
|
else: # pragma: no cover (very old sklearn)
|
|
478
|
-
model =
|
|
479
|
-
model.fit(_C(Xtilde), Y_01)
|
|
578
|
+
model = _fit(penalty="l1")
|
|
480
579
|
alpha = np.asarray(model.coef_, dtype=np.float64).ravel()
|
|
580
|
+
b0 = _calibrate_logistic_intercept(q, Xtilde @ alpha)
|
|
481
581
|
|
|
482
|
-
eta = Xtilde @ alpha
|
|
483
|
-
|
|
484
|
-
risk = float(
|
|
485
|
-
np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu))).mean()
|
|
486
|
-
)
|
|
582
|
+
eta = Xtilde @ alpha + b0
|
|
583
|
+
phat = np.clip(1.0 / (1.0 + np.exp(-eta)), 1e-15, 1 - 1e-15)
|
|
584
|
+
risk = float((-(q * np.log(phat) + (1 - q) * np.log(1 - phat))).mean())
|
|
487
585
|
|
|
488
586
|
predictions = probabilities = predicted_classes = None
|
|
489
587
|
if predict is not None:
|
|
@@ -492,7 +590,7 @@ def single_pcghal_classification_lasso(
|
|
|
492
590
|
raise ValueError(f"predict must have {p} columns")
|
|
493
591
|
Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
|
|
494
592
|
v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
|
|
495
|
-
log_odds = Ktest @ v
|
|
593
|
+
log_odds = Ktest @ v + b0
|
|
496
594
|
predictions = log_odds
|
|
497
595
|
probabilities = 1.0 / (1.0 + np.exp(-log_odds))
|
|
498
596
|
predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
|
|
@@ -531,8 +629,10 @@ def hapc(X: np.ndarray, Y: np.ndarray,
|
|
|
531
629
|
X : np.ndarray, shape (n, p)
|
|
532
630
|
Features.
|
|
533
631
|
Y : np.ndarray, shape (n,)
|
|
534
|
-
Response. For ``family="binomial"
|
|
535
|
-
``{-1,+1}
|
|
632
|
+
Response. For ``family="binomial"``: hard labels in ``{0,1}`` or
|
|
633
|
+
``{-1,+1}``, or soft labels in ``[0,1]`` (e.g. EM-HAL E-step
|
|
634
|
+
posteriors). Soft labels are supported only for ``norm`` in
|
|
635
|
+
``{"1","2"}``; ``norm="sv"`` requires hard labels.
|
|
536
636
|
family : {"gaussian", "binomial"}, default "gaussian"
|
|
537
637
|
Loss family.
|
|
538
638
|
max_degree : int, default 1
|
|
@@ -588,6 +688,8 @@ def hapc(X: np.ndarray, Y: np.ndarray,
|
|
|
588
688
|
npcs = int(X.shape[0])
|
|
589
689
|
|
|
590
690
|
if family == "binomial":
|
|
691
|
+
# Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
|
|
692
|
+
_check_binomial_labels(Y, norm)
|
|
591
693
|
if norm == "sv":
|
|
592
694
|
return single_pcghal_classification(
|
|
593
695
|
X, Y, max_degree, npcs, lambda_,
|
|
@@ -117,4 +117,9 @@ PYBIND11_MODULE(hapc_core, m) {
|
|
|
117
117
|
|
|
118
118
|
m.def("logistic_ridge_init", &logistic_ridge_init,
|
|
119
119
|
py::arg("Y"), py::arg("X"), py::arg("lambda"));
|
|
120
|
+
|
|
121
|
+
// Soft-label logistic ridge initialiser: target Y may be any value in
|
|
122
|
+
// [0,1] (hard {0,1} labels or fractional EM-HAL E-step posteriors).
|
|
123
|
+
m.def("logistic_ridge_init_y01", &logistic_ridge_init_y01,
|
|
124
|
+
py::arg("Y"), py::arg("X"), py::arg("lambda"));
|
|
120
125
|
}
|
|
@@ -91,6 +91,11 @@ FastCVOutput fasthal_cv_python(const MatrixXd& X, const VectorXd& Y, int npc,
|
|
|
91
91
|
// (internally multiplied by n, matching logistic_call).
|
|
92
92
|
VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda);
|
|
93
93
|
|
|
94
|
+
// Soft-label variant: target `y01` may take any value in [0, 1] (hard {0,1}
|
|
95
|
+
// labels or fractional EM-HAL E-step posteriors). On hard {0,1} inputs the
|
|
96
|
+
// result is identical to logistic_ridge_init. lambda has the same scaling.
|
|
97
|
+
VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda);
|
|
98
|
+
|
|
94
99
|
// Cross-validation output for binomial (logistic) HAPC.
|
|
95
100
|
struct CVClassiOutput {
|
|
96
101
|
std::vector<double> deviances;
|
|
@@ -101,7 +106,9 @@ struct CVClassiOutput {
|
|
|
101
106
|
};
|
|
102
107
|
|
|
103
108
|
// Python-friendly binomial CV (mirrors R `pchal_cv_classi_call`).
|
|
104
|
-
// Y must
|
|
109
|
+
// Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
|
|
110
|
+
// labels are supported only when with_pgd == false (norm="2"); with_pgd ==
|
|
111
|
+
// true (norm="sv") rejects soft labels.
|
|
105
112
|
//
|
|
106
113
|
// When `with_pgd == true` (default): per fold runs logistic-ridge initialiser
|
|
107
114
|
// followed by projected gradient descent on logistic loss (norm="sv").
|
|
@@ -28,10 +28,15 @@
|
|
|
28
28
|
// rule `beta := delta_beta` (i.e. solving the full normal equation each
|
|
29
29
|
// iteration, treating the IRLS working response as the regression target).
|
|
30
30
|
// ---------------------------------------------------------------------------
|
|
31
|
-
|
|
31
|
+
// Soft-label logistic ridge. The target `y01` may take any value in [0, 1]:
|
|
32
|
+
// hard {0,1} labels or fractional EM-HAL E-step posteriors. The IRLS update
|
|
33
|
+
// is unchanged; fractional targets are standard for cross-entropy
|
|
34
|
+
// minimisation, so on hard {0,1} inputs the result is bit-identical to the
|
|
35
|
+
// former {-1,+1} implementation.
|
|
36
|
+
VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda) {
|
|
32
37
|
const int n = X.rows();
|
|
33
38
|
const int p = X.cols();
|
|
34
|
-
if (
|
|
39
|
+
if (y01.size() != n) {
|
|
35
40
|
throw std::runtime_error("logistic_ridge_init: Y length must match nrow(X).");
|
|
36
41
|
}
|
|
37
42
|
// Match logistic_call: lambda is multiplied by n internally.
|
|
@@ -39,12 +44,6 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
|
|
|
39
44
|
const int max_iter = 100;
|
|
40
45
|
const double tol = 1e-8;
|
|
41
46
|
|
|
42
|
-
// logistic_call expects Y in {-1,+1} but treats it via the GLM update with
|
|
43
|
-
// the {0,1} working response. We replicate that behaviour exactly: convert
|
|
44
|
-
// back to a {0,1} response y01 = (Y_pm1 + 1) / 2 to compute mu/working z.
|
|
45
|
-
VectorXd y01(n);
|
|
46
|
-
for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
|
|
47
|
-
|
|
48
47
|
VectorXd beta = VectorXd::Zero(p);
|
|
49
48
|
for (int iter = 0; iter < max_iter; ++iter) {
|
|
50
49
|
VectorXd eta = X * beta;
|
|
@@ -66,6 +65,51 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
|
|
|
66
65
|
return beta;
|
|
67
66
|
}
|
|
68
67
|
|
|
68
|
+
// Backward-compatible wrapper: accepts Y in {-1,+1} and converts to {0,1}.
|
|
69
|
+
// Used by the PGD (norm="sv") single-fit path, which is hard-label only.
|
|
70
|
+
VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda) {
|
|
71
|
+
const int n = X.rows();
|
|
72
|
+
VectorXd y01(n);
|
|
73
|
+
for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
|
|
74
|
+
return logistic_ridge_init_y01(y01, X, lambda);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
static double calibrate_logistic_intercept(const VectorXd& Y01,
|
|
78
|
+
const VectorXd& eta) {
|
|
79
|
+
const int n = (int)Y01.size();
|
|
80
|
+
if (eta.size() != n) {
|
|
81
|
+
throw std::runtime_error("calibrate_logistic_intercept: length mismatch");
|
|
82
|
+
}
|
|
83
|
+
double b0 = 0.0;
|
|
84
|
+
for (int it = 0; it < 50; ++it) {
|
|
85
|
+
const VectorXd z = eta.array() + b0;
|
|
86
|
+
const VectorXd p = (1.0 + (-z.array()).exp()).inverse();
|
|
87
|
+
const double g = (p - Y01).sum();
|
|
88
|
+
const double h = (p.array() * (1.0 - p.array())).sum();
|
|
89
|
+
if (std::abs(g) < 1e-10 || h < 1e-12) break;
|
|
90
|
+
b0 -= g / h;
|
|
91
|
+
}
|
|
92
|
+
return b0;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Soft cross-entropy risk for fractional targets y01 in [0,1], given a linear
|
|
96
|
+
// predictor `eta` (intercept already folded in). On hard {0,1} labels this
|
|
97
|
+
// equals the former {-1,+1} logistic risk, so behaviour is unchanged on
|
|
98
|
+
// binary inputs.
|
|
99
|
+
static double logistic_risk_y01(const VectorXd& y01, const VectorXd& eta) {
|
|
100
|
+
const int n = (int)y01.size();
|
|
101
|
+
if (eta.size() != n) {
|
|
102
|
+
throw std::runtime_error("logistic_risk_y01: length mismatch");
|
|
103
|
+
}
|
|
104
|
+
double risk = 0.0;
|
|
105
|
+
for (int i = 0; i < n; ++i) {
|
|
106
|
+
const double pi = 1.0 / (1.0 + std::exp(-eta[i]));
|
|
107
|
+
const double p = std::min(1.0 - 1e-15, std::max(1e-15, pi));
|
|
108
|
+
risk += -(y01[i] * std::log(p) + (1.0 - y01[i]) * std::log(1.0 - p));
|
|
109
|
+
}
|
|
110
|
+
return risk / n;
|
|
111
|
+
}
|
|
112
|
+
|
|
69
113
|
// ---------------------------------------------------------------------------
|
|
70
114
|
// Build the Eigen-friendly "Xtilde = U_top * diag(d_top)" representation,
|
|
71
115
|
// returning final_npc (which may be capped by the design rank).
|
|
@@ -104,33 +148,35 @@ static std::vector<int> make_folds(int n, int K) {
|
|
|
104
148
|
// for the post-CV refit). When `with_pgd == false`, returns the logistic-ridge
|
|
105
149
|
// initialiser α directly with its training logistic risk; otherwise runs the
|
|
106
150
|
// PGD step on top of it (norm="sv").
|
|
107
|
-
static OptimizerOutput logistic_full_fit(const VectorXd&
|
|
151
|
+
static OptimizerOutput logistic_full_fit(const VectorXd& Y01,
|
|
108
152
|
const MatrixXd& Xtilde,
|
|
109
153
|
const MatrixXd& E_Nn,
|
|
110
154
|
double lambda,
|
|
111
155
|
int max_iter, double tol,
|
|
112
156
|
double step_factor, bool verbose,
|
|
113
157
|
bool with_pgd) {
|
|
114
|
-
VectorXd alpha0 =
|
|
115
|
-
if (with_pgd) {
|
|
116
|
-
return pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
|
|
117
|
-
max_iter, tol, step_factor, verbose);
|
|
118
|
-
}
|
|
119
|
-
// Logistic-ridge-only path: assemble the same OptimizerOutput shape with
|
|
120
|
-
// logistic training risk evaluated on (Y_pm1, Xtilde, alpha0).
|
|
158
|
+
VectorXd alpha0 = logistic_ridge_init_y01(Y01, Xtilde, lambda);
|
|
121
159
|
const int n = Xtilde.rows();
|
|
122
|
-
VectorXd
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
160
|
+
VectorXd alpha_fit;
|
|
161
|
+
if (with_pgd) {
|
|
162
|
+
// PGD (norm="sv") uses the {-1,+1} logistic loss and is reached only
|
|
163
|
+
// for hard labels (soft labels are rejected upstream), so thresholding
|
|
164
|
+
// at 0.5 recovers the exact {-1,+1} encoding.
|
|
165
|
+
VectorXd Y_pm1(n);
|
|
166
|
+
for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] > 0.5) ? 1.0 : -1.0;
|
|
167
|
+
OptimizerOutput out = pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
|
|
168
|
+
max_iter, tol, step_factor, verbose);
|
|
169
|
+
alpha_fit = out.alpha;
|
|
170
|
+
} else {
|
|
171
|
+
alpha_fit = alpha0; // logistic ridge only (norm="2")
|
|
128
172
|
}
|
|
129
|
-
|
|
173
|
+
VectorXd eta = Xtilde * alpha_fit;
|
|
174
|
+
const double b0 = calibrate_logistic_intercept(Y01, eta);
|
|
175
|
+
const double risk = logistic_risk_y01(Y01, eta.array() + b0);
|
|
130
176
|
OptimizerOutput out;
|
|
131
|
-
out.alpha =
|
|
132
|
-
out.alphaiters = MatrixXd::Zero(0,
|
|
133
|
-
out.beta = E_Nn *
|
|
177
|
+
out.alpha = alpha_fit;
|
|
178
|
+
out.alphaiters = MatrixXd::Zero(0, alpha_fit.size());
|
|
179
|
+
out.beta = E_Nn * alpha_fit;
|
|
134
180
|
out.risk = risk;
|
|
135
181
|
out.iter = 0;
|
|
136
182
|
return out;
|
|
@@ -146,10 +192,21 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
146
192
|
const int n = X.rows();
|
|
147
193
|
const int p = X.cols();
|
|
148
194
|
if (Y.size() != n) throw std::runtime_error("pcghal_cv_classi: length(Y) != nrow(X)");
|
|
195
|
+
// Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
|
|
196
|
+
// labels (any value strictly inside (0,1)) are supported only for the
|
|
197
|
+
// logistic-ridge path (norm="2"); the PGD path (norm="sv", with_pgd=true)
|
|
198
|
+
// is not implemented for soft labels.
|
|
199
|
+
bool soft = false;
|
|
149
200
|
for (int i = 0; i < n; ++i) {
|
|
150
|
-
if (Y[i]
|
|
151
|
-
throw std::runtime_error("pcghal_cv_classi: Y must be 0
|
|
201
|
+
if (Y[i] < -1e-12 || Y[i] > 1.0 + 1e-12) {
|
|
202
|
+
throw std::runtime_error("pcghal_cv_classi: Y must be in [0,1]");
|
|
152
203
|
}
|
|
204
|
+
if (Y[i] > 1e-12 && Y[i] < 1.0 - 1e-12) soft = true;
|
|
205
|
+
}
|
|
206
|
+
if (soft && with_pgd) {
|
|
207
|
+
throw std::runtime_error(
|
|
208
|
+
"pcghal_cv_classi: soft labels (Y in (0,1)) are not implemented for "
|
|
209
|
+
"norm='sv'; use norm='1' or norm='2'.");
|
|
153
210
|
}
|
|
154
211
|
const int L = (int)lambdas.size();
|
|
155
212
|
if (L <= 0) throw std::runtime_error("pcghal_cv_classi: lambdas must be non-empty");
|
|
@@ -167,9 +224,9 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
167
224
|
const int final_npc = compute_classi_design(X, maxdeg, npc_eff, center,
|
|
168
225
|
Xtilde, E_Nn, U_top, d_top);
|
|
169
226
|
|
|
170
|
-
//
|
|
171
|
-
|
|
172
|
-
|
|
227
|
+
// Soft target in [0,1] used throughout (the ridge/CE machinery works
|
|
228
|
+
// directly in this space; the PGD branch builds {-1,+1} locally).
|
|
229
|
+
const VectorXd& Y01 = Y;
|
|
173
230
|
|
|
174
231
|
// Degenerate case: R `hapc(family="binomial", …)` passes nfolds=1 with a
|
|
175
232
|
// single λ — there is no proper train/test split. Fit on full data and
|
|
@@ -182,7 +239,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
182
239
|
for (int j = 0; j < L; ++j) {
|
|
183
240
|
const double lam = lambdas[j];
|
|
184
241
|
OptimizerOutput full_out = logistic_full_fit(
|
|
185
|
-
|
|
242
|
+
Y01, Xtilde, E_Nn, lam, max_iter, tol, step_factor,
|
|
186
243
|
verbose, with_pgd);
|
|
187
244
|
deviances[j] = full_out.risk;
|
|
188
245
|
if (full_out.risk < best_val) {
|
|
@@ -199,7 +256,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
199
256
|
MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
|
|
200
257
|
VectorXd d_inv = d_top.cwiseInverse();
|
|
201
258
|
VectorXd v = U_top * (d_inv.asDiagonal() * best_alpha);
|
|
202
|
-
VectorXd
|
|
259
|
+
VectorXd eta_full = Xtilde * best_alpha;
|
|
260
|
+
VectorXd Y01_full(n);
|
|
261
|
+
for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
|
|
262
|
+
const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
|
|
263
|
+
VectorXd eta_pred = (Ktest * v).array() + b0_full;
|
|
203
264
|
predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
|
|
204
265
|
}
|
|
205
266
|
CVClassiOutput out;
|
|
@@ -230,19 +291,22 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
230
291
|
if (ntr == 0 || nte == 0) continue;
|
|
231
292
|
|
|
232
293
|
MatrixXd Xtr(ntr, final_npc), Xte(nte, final_npc);
|
|
233
|
-
VectorXd
|
|
294
|
+
VectorXd Ytr01(ntr), Yte01(nte);
|
|
234
295
|
for (int i = 0; i < ntr; ++i) {
|
|
235
296
|
Xtr.row(i) = Xtilde.row(tr_idx[i]);
|
|
236
|
-
|
|
297
|
+
Ytr01[i] = Y01[tr_idx[i]];
|
|
237
298
|
}
|
|
238
299
|
for (int i = 0; i < nte; ++i) {
|
|
239
300
|
Xte.row(i) = Xtilde.row(te_idx[i]);
|
|
240
|
-
Yte01[i] =
|
|
301
|
+
Yte01[i] = Y01[te_idx[i]];
|
|
241
302
|
}
|
|
242
303
|
|
|
243
|
-
VectorXd alpha0 =
|
|
304
|
+
VectorXd alpha0 = logistic_ridge_init_y01(Ytr01, Xtr, lambda);
|
|
244
305
|
VectorXd alpha_fold;
|
|
245
306
|
if (with_pgd) {
|
|
307
|
+
// Hard-label only path (soft labels rejected upstream).
|
|
308
|
+
VectorXd Ytr_pm1(ntr);
|
|
309
|
+
for (int i = 0; i < ntr; ++i) Ytr_pm1[i] = (Ytr01[i] > 0.5) ? 1.0 : -1.0;
|
|
246
310
|
OptimizerOutput out = pcghal_classi_call(Ytr_pm1, Xtr, E_Nn, alpha0,
|
|
247
311
|
max_iter, tol, step_factor,
|
|
248
312
|
verbose);
|
|
@@ -251,12 +315,14 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
251
315
|
alpha_fold = alpha0; // logistic ridge only (norm="2")
|
|
252
316
|
}
|
|
253
317
|
|
|
254
|
-
VectorXd
|
|
318
|
+
VectorXd eta_tr = Xtr * alpha_fold;
|
|
319
|
+
const double b0_fold = calibrate_logistic_intercept(Ytr01, eta_tr);
|
|
320
|
+
VectorXd eta = (Xte * alpha_fold).array() + b0_fold;
|
|
255
321
|
VectorXd probs = (1.0 + (-eta.array()).exp()).inverse();
|
|
256
322
|
double dev = 0.0;
|
|
257
323
|
for (int i = 0; i < nte; ++i) {
|
|
258
324
|
double pi = std::max(1e-15, std::min(1.0 - 1e-15, probs[i]));
|
|
259
|
-
dev += (Yte01[i]
|
|
325
|
+
dev += -(Yte01[i] * std::log(pi) + (1.0 - Yte01[i]) * std::log(1.0 - pi));
|
|
260
326
|
}
|
|
261
327
|
fold_error(k - 1, j) = dev / nte;
|
|
262
328
|
}
|
|
@@ -286,7 +352,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
286
352
|
|
|
287
353
|
// Refit on full data at best_lambda (logistic ridge ± PGD).
|
|
288
354
|
OptimizerOutput full_out = logistic_full_fit(
|
|
289
|
-
|
|
355
|
+
Y01, Xtilde, E_Nn, best_lambda,
|
|
290
356
|
max_iter, tol, step_factor, verbose, with_pgd);
|
|
291
357
|
|
|
292
358
|
// Predict on `predict_data` if supplied (else empty vector).
|
|
@@ -298,7 +364,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
298
364
|
MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
|
|
299
365
|
VectorXd d_inv = d_top.cwiseInverse();
|
|
300
366
|
VectorXd v = U_top * (d_inv.asDiagonal() * full_out.alpha);
|
|
301
|
-
VectorXd
|
|
367
|
+
VectorXd eta_full = Xtilde * full_out.alpha;
|
|
368
|
+
VectorXd Y01_full(n);
|
|
369
|
+
for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
|
|
370
|
+
const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
|
|
371
|
+
VectorXd eta_pred = (Ktest * v).array() + b0_full;
|
|
302
372
|
predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
|
|
303
373
|
}
|
|
304
374
|
|
|
@@ -347,8 +347,11 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
|
|
|
347
347
|
if (Rf_length(Y_) != n) Rf_error("length(Y) must equal nrow(X).");
|
|
348
348
|
Map<const MatrixXd> X(REAL(X_), n, p);
|
|
349
349
|
Map<const VectorXd> Y01(REAL(Y_), n);
|
|
350
|
+
// Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. The
|
|
351
|
+
// logistic-ridge fit (norm="2") supports both.
|
|
350
352
|
for (int i = 0; i < n; ++i) {
|
|
351
|
-
if (Y01[i]
|
|
353
|
+
if (Y01[i] < -1e-12 || Y01[i] > 1.0 + 1e-12)
|
|
354
|
+
Rf_error("Y must be in [0,1]");
|
|
352
355
|
}
|
|
353
356
|
int maxdeg = Rf_isInteger(maxdeg_) ? INTEGER(maxdeg_)[0] : (int)REAL(maxdeg_)[0];
|
|
354
357
|
int npc = Rf_isInteger(npc_) ? INTEGER(npc_)[0] : (int)REAL(npc_)[0];
|
|
@@ -365,19 +368,28 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
|
|
|
365
368
|
const int final_npc = (int)des.d.size();
|
|
366
369
|
MatrixXd Xtilde = des.U * des.d.asDiagonal();
|
|
367
370
|
|
|
368
|
-
VectorXd
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
371
|
+
auto calibrate_b0 = [](const VectorXd& y01, const VectorXd& eta) {
|
|
372
|
+
double b0 = 0.0;
|
|
373
|
+
for (int it = 0; it < 50; ++it) {
|
|
374
|
+
VectorXd z = eta.array() + b0;
|
|
375
|
+
VectorXd p = (1.0 + (-z.array()).exp()).inverse();
|
|
376
|
+
double g = (p - y01).sum();
|
|
377
|
+
double h = (p.array() * (1.0 - p.array())).sum();
|
|
378
|
+
if (std::abs(g) < 1e-10 || h < 1e-12) break;
|
|
379
|
+
b0 -= g / h;
|
|
380
|
+
}
|
|
381
|
+
return b0;
|
|
382
|
+
};
|
|
372
383
|
|
|
384
|
+
VectorXd alpha = logistic_ridge_init_y01(Y01, Xtilde, lambda);
|
|
373
385
|
VectorXd eta = Xtilde * alpha;
|
|
386
|
+
const double b0 = calibrate_b0(Y01, eta);
|
|
387
|
+
// Soft cross-entropy risk (equals the {-1,+1} logistic risk on hard labels).
|
|
374
388
|
double risk = 0.0;
|
|
375
389
|
for (int i = 0; i < n; ++i) {
|
|
376
|
-
double
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
else
|
|
380
|
-
risk += -ymu + std::log1p(std::exp(ymu));
|
|
390
|
+
const double pi = 1.0 / (1.0 + std::exp(-(eta[i] + b0)));
|
|
391
|
+
const double pp = std::min(1.0 - 1e-15, std::max(1e-15, pi));
|
|
392
|
+
risk += -(Y01[i] * std::log(pp) + (1.0 - Y01[i]) * std::log(1.0 - pp));
|
|
381
393
|
}
|
|
382
394
|
risk /= n;
|
|
383
395
|
|
|
@@ -392,7 +404,7 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
|
|
|
392
404
|
MatrixXd Ktest = kernel_cross_call(X, Xtest, maxdeg, center);
|
|
393
405
|
VectorXd d_inv = des.d.array().cwiseInverse();
|
|
394
406
|
VectorXd v = des.U * (d_inv.asDiagonal() * alpha);
|
|
395
|
-
VectorXd log_odds = Ktest * v;
|
|
407
|
+
VectorXd log_odds = (Ktest * v).array() + b0;
|
|
396
408
|
predictions = PROTECT(Rf_allocVector(REALSXP, m_pred)); prot++;
|
|
397
409
|
std::copy(log_odds.data(), log_odds.data() + m_pred, REAL(predictions));
|
|
398
410
|
}
|
|
@@ -8,7 +8,7 @@ can be regenerated from the package root::
|
|
|
8
8
|
This uses ``alpha=0.05`` with the **moderate** DGP from the original
|
|
9
9
|
``ate/simulate_data.py`` script (vendored below — exact same draws thanks to
|
|
10
10
|
``np.random.seed`` + the same ``np.random.uniform`` / ``normal`` /
|
|
11
|
-
``binomial`` call order)
|
|
11
|
+
``binomial`` call order). ``ate_hapc`` is run with ``npcs = n - 1``.
|
|
12
12
|
|
|
13
13
|
* ``W1 ~ Uniform(-2, 2)``
|
|
14
14
|
* ``W2 ~ Normal(0, 0.5)``
|
|
@@ -37,7 +37,6 @@ DEMO_SEED = 456
|
|
|
37
37
|
DEMO_N = 300
|
|
38
38
|
DEMO_ALPHA = 0.05
|
|
39
39
|
DEMO_MAX_DEGREE = 2
|
|
40
|
-
DEMO_NPCS = 40
|
|
41
40
|
DEMO_NFOLDS = 4
|
|
42
41
|
DEMO_NORM = "1"
|
|
43
42
|
|
|
@@ -51,10 +50,10 @@ GRID_LENGTH_OUT = 8
|
|
|
51
50
|
|
|
52
51
|
FIGURE_NAME = "ate_hapc_diagnostics_demo.png"
|
|
53
52
|
|
|
54
|
-
# Pinned outputs (``alpha=0.05``, current C++/Python stack)
|
|
55
|
-
_EXPECTED_ESTIMATE = 0.
|
|
56
|
-
_EXPECTED_LOWER = -0.
|
|
57
|
-
_EXPECTED_UPPER = 0.
|
|
53
|
+
# Pinned outputs (``alpha=0.05``, ``npcs = n - 1``, current C++/Python stack)
|
|
54
|
+
_EXPECTED_ESTIMATE = 0.07790009282426053
|
|
55
|
+
_EXPECTED_LOWER = -0.050705979103681936
|
|
56
|
+
_EXPECTED_UPPER = 0.206506164752203
|
|
58
57
|
|
|
59
58
|
|
|
60
59
|
def _expit(x: np.ndarray) -> np.ndarray:
|
|
@@ -104,17 +103,22 @@ def run_ate_hapc_demo(
|
|
|
104
103
|
*,
|
|
105
104
|
plot_diagnostics: bool = False,
|
|
106
105
|
) -> "ATEResult":
|
|
107
|
-
"""Run ``ate_hapc`` with the pinned demo hyperparameters.
|
|
106
|
+
"""Run ``ate_hapc`` with the pinned demo hyperparameters.
|
|
107
|
+
|
|
108
|
+
Uses ``npcs = n - 1`` (sample size from ``load_demo_data``) for both
|
|
109
|
+
propensity and outcome stages, matching the usual HAL rank cap.
|
|
110
|
+
"""
|
|
108
111
|
from hapc import ate_hapc
|
|
109
112
|
|
|
110
113
|
W, A, Y = load_demo_data()
|
|
114
|
+
npcs = int(W.shape[0]) - 1
|
|
111
115
|
return ate_hapc(
|
|
112
116
|
W,
|
|
113
117
|
Y,
|
|
114
118
|
A,
|
|
115
119
|
alpha=DEMO_ALPHA,
|
|
116
120
|
max_degree=DEMO_MAX_DEGREE,
|
|
117
|
-
npcs=
|
|
121
|
+
npcs=npcs,
|
|
118
122
|
log_lambda_prop_min=LOG_LAMBDA_PROP_MIN,
|
|
119
123
|
log_lambda_prop_max=LOG_LAMBDA_PROP_MAX,
|
|
120
124
|
grid_length_prop=GRID_LENGTH_PROP,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|