hapc 2.0.2__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hapc-2.0.2/python/hapc.egg-info → hapc-2.1.0}/PKG-INFO +1 -1
- {hapc-2.0.2 → hapc-2.1.0}/pyproject.toml +1 -1
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc/__init__.py +1 -1
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc/ate.py +55 -21
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc/single.py +35 -6
- {hapc-2.0.2 → hapc-2.1.0/python/hapc.egg-info}/PKG-INFO +1 -1
- {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_cv_classi_cpp.cpp +62 -19
- {hapc-2.0.2 → hapc-2.1.0}/src/r_bindings.cpp +16 -3
- {hapc-2.0.2 → hapc-2.1.0}/tests/test_ate_hapc_diagnostics_example.py +12 -8
- {hapc-2.0.2 → hapc-2.1.0}/CMakeLists.txt +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/LICENSE +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/MANIFEST.in +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/README.md +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc/core.py +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc/cv.py +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/SOURCES.txt +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/dependency_links.txt +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/not-zip-safe +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/requires.txt +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/top_level.txt +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/setup.cfg +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/setup.py +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/bindings.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/cross_kernel.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/cv_classi.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/cv_fast_pchal.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/cv_fast_pchal_python.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/fast_pchal.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/hapc_core.hpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/logistic_call.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/mkernel.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_call.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_classi_call.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_cv.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_cv_cpp.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/pchal_design.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/ridge_wrappers.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/single_pcghal_cpp.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/src/single_pchar.cpp +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/tests/test_api.py +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/tests/test_ate.py +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/tests/test_core.py +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/tests/test_logistic_regression.py +0 -0
- {hapc-2.0.2 → hapc-2.1.0}/tests/test_r_vs_python_alpha.py +0 -0
|
@@ -16,9 +16,9 @@ Provides :func:`ate_hapc`, a high-level convenience wrapper that:
|
|
|
16
16
|
which ``|mean(EIF)| ≤ σ / (√n · log n)``. This is the **undersmoothed**
|
|
17
17
|
outcome model. If no λ in the grid meets the threshold, the smallest λ
|
|
18
18
|
is used.
|
|
19
|
-
5. Returns
|
|
20
|
-
``(1 - alpha)`` Wald confidence interval
|
|
21
|
-
that
|
|
19
|
+
5. Returns a **doubly robust** ATE point estimate at the undersmoothed outcome
|
|
20
|
+
model and a ``(1 - alpha)`` Wald confidence interval from the EIF evaluated
|
|
21
|
+
at that estimate (see Notes).
|
|
22
22
|
|
|
23
23
|
The function does not implement sample splitting / cross-fitting:
|
|
24
24
|
nuisances are fit on the full sample and the EIF is evaluated on the same
|
|
@@ -47,8 +47,9 @@ class ATEResult(NamedTuple):
|
|
|
47
47
|
Attributes
|
|
48
48
|
----------
|
|
49
49
|
estimate : float
|
|
50
|
-
|
|
51
|
-
``mean(
|
|
50
|
+
Doubly robust (AIPW-style) ATE at the undersmoothed outcome model:
|
|
51
|
+
``mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``, matching the
|
|
52
|
+
efficient influence function used for the Wald interval (see Notes).
|
|
52
53
|
lower : float
|
|
53
54
|
Lower endpoint of the ``(1 - alpha)`` Wald confidence interval.
|
|
54
55
|
upper : float
|
|
@@ -228,15 +229,25 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
|
228
229
|
specified).
|
|
229
230
|
2. Fix the propensity at its CV-best λ; refit on the full sample to
|
|
230
231
|
obtain ``π̂(W_i) = P(A=1 | W_i)``.
|
|
231
|
-
3. At the CV-best outcome λ, compute
|
|
232
|
-
|
|
232
|
+
3. At the CV-best outcome λ, compute a **plugin-centered** influence vector
|
|
233
|
+
(same mean as the DR EIF at :math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`)
|
|
234
|
+
and let ``σ = std(·)``.
|
|
233
235
|
4. Threshold ``τ = σ / (√n · log n)``.
|
|
234
236
|
5. Walk the **outcome** λ grid in **decreasing**
|
|
235
237
|
order; pick the first (largest) λ for which
|
|
236
238
|
``|mean(EIF_diff)| ≤ τ`` — call it ``λ_u``.
|
|
237
|
-
6.
|
|
238
|
-
|
|
239
|
-
at
|
|
239
|
+
6. **Doubly robust** point estimate (same nuisances ``(π̂, μ̂₁, μ̂₀)``):
|
|
240
|
+
``ψ̂ = mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``.
|
|
241
|
+
One-step influence function (centered at ``ψ̂``):
|
|
242
|
+
``φ_i = A_i/π̂_i·(Y_i-μ̂_{1i}) + μ̂_{1i} - (1-A_i)/(1-π̂_i)·(Y_i-μ̂_{0i})
|
|
243
|
+
- μ̂_{0i} - ψ̂``.
|
|
244
|
+
CI: ``ψ̂ ± z_{1-α/2} · std(φ) / √n``.
|
|
245
|
+
|
|
246
|
+
This contrasts with **plug-in** G-computation ``mean(μ̂₁(W)-μ̂₀(W))``,
|
|
247
|
+
which can be materially biased when both nuisances are estimated on the
|
|
248
|
+
same sample and the outcome regressions are regularized. The DR
|
|
249
|
+
``ψ̂`` is consistent if **either** the propensity **or** the pair
|
|
250
|
+
``(μ̂₁, μ̂₀)`` is correctly specified (standard double robustness).
|
|
240
251
|
|
|
241
252
|
Examples
|
|
242
253
|
--------
|
|
@@ -329,38 +340,60 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
|
329
340
|
)
|
|
330
341
|
return p[:n], p[n:]
|
|
331
342
|
|
|
332
|
-
def
|
|
343
|
+
def _eif_plugin_centered(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
|
|
344
|
+
"""Plugin-centered influence vector (undersmoothing gate only).
|
|
345
|
+
|
|
346
|
+
Its mean matches the DR EIF evaluated at plug-in
|
|
347
|
+
:math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`. The returned ATE
|
|
348
|
+
uses ``_psi_dr`` / ``_eif_dr`` instead.
|
|
349
|
+
"""
|
|
333
350
|
eif1 = (A01 / pi1) * (Y - mu1) - (mu1 - mu1.mean())
|
|
334
351
|
eif0 = ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0) - (mu0 - mu0.mean())
|
|
335
352
|
return eif1 - eif0
|
|
336
353
|
|
|
354
|
+
def _psi_dr(mu1: np.ndarray, mu0: np.ndarray) -> float:
|
|
355
|
+
return float(
|
|
356
|
+
np.mean(
|
|
357
|
+
(A01 / pi1) * (Y - mu1)
|
|
358
|
+
+ mu1
|
|
359
|
+
- ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
|
|
360
|
+
- mu0
|
|
361
|
+
)
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
def _eif_dr(mu1: np.ndarray, mu0: np.ndarray, psi: float) -> np.ndarray:
|
|
365
|
+
return (
|
|
366
|
+
(A01 / pi1) * (Y - mu1)
|
|
367
|
+
+ mu1
|
|
368
|
+
- ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
|
|
369
|
+
- mu0
|
|
370
|
+
- psi
|
|
371
|
+
)
|
|
372
|
+
|
|
337
373
|
# --- 3. σ at CV configuration → threshold τ ----------------------------
|
|
338
374
|
mu1_cv, mu0_cv = _mu_pair(lam_out_cv)
|
|
339
|
-
eif_cv =
|
|
375
|
+
eif_cv = _eif_plugin_centered(mu1_cv, mu0_cv)
|
|
340
376
|
sigma_cv = float(np.std(eif_cv, ddof=0))
|
|
341
377
|
threshold = sigma_cv / (np.sqrt(n) * np.log(n))
|
|
342
378
|
|
|
343
379
|
# --- 4. Undersmoothing sweep: largest λ → smallest --------------------
|
|
344
380
|
lam_und: Optional[float] = None
|
|
345
|
-
eif_und: Optional[np.ndarray] = None
|
|
346
381
|
mu1_und = mu0_und = None
|
|
347
382
|
for lam in np.sort(lambdas_out)[::-1]:
|
|
348
383
|
try:
|
|
349
384
|
mu1, mu0 = _mu_pair(float(lam))
|
|
350
385
|
except Exception:
|
|
351
386
|
continue
|
|
352
|
-
eif =
|
|
387
|
+
eif = _eif_plugin_centered(mu1, mu0)
|
|
353
388
|
if abs(eif.mean()) <= threshold:
|
|
354
389
|
lam_und = float(lam)
|
|
355
390
|
mu1_und, mu0_und = mu1, mu0
|
|
356
|
-
eif_und = eif
|
|
357
391
|
break
|
|
358
392
|
|
|
359
|
-
if
|
|
393
|
+
if lam_und is None:
|
|
360
394
|
# Threshold never met → fall back to the smallest λ in the grid.
|
|
361
395
|
lam_und = float(lambdas_out.min())
|
|
362
396
|
mu1_und, mu0_und = _mu_pair(lam_und)
|
|
363
|
-
eif_und = _eif_diff(mu1_und, mu0_und)
|
|
364
397
|
|
|
365
398
|
if plot_diagnostics:
|
|
366
399
|
t_lams: list[float] = []
|
|
@@ -370,7 +403,7 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
|
370
403
|
mu1, mu0 = _mu_pair(float(lam))
|
|
371
404
|
except Exception:
|
|
372
405
|
continue
|
|
373
|
-
eif =
|
|
406
|
+
eif = _eif_plugin_centered(mu1, mu0)
|
|
374
407
|
t_lams.append(float(lam))
|
|
375
408
|
t_abs.append(float(np.abs(eif.mean())))
|
|
376
409
|
_plot_ate_diagnostics(
|
|
@@ -379,9 +412,10 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
|
379
412
|
lam_prop_cv, lam_out_cv, lam_und, threshold,
|
|
380
413
|
)
|
|
381
414
|
|
|
382
|
-
# --- 5.
|
|
383
|
-
psi =
|
|
384
|
-
|
|
415
|
+
# --- 5. Doubly robust point estimate + (1 - alpha) Wald CI --------------
|
|
416
|
+
psi = _psi_dr(mu1_und, mu0_und)
|
|
417
|
+
eif_dr = _eif_dr(mu1_und, mu0_und, psi)
|
|
418
|
+
sigma_und = float(np.std(eif_dr, ddof=0))
|
|
385
419
|
z = float(_normal.ppf(1.0 - alpha / 2.0))
|
|
386
420
|
half = z * sigma_und / np.sqrt(n)
|
|
387
421
|
|
|
@@ -95,6 +95,24 @@ def _to_pm1(Y: np.ndarray, *, verbose: bool = False) -> np.ndarray:
|
|
|
95
95
|
)
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
def _calibrate_logistic_intercept(y01: np.ndarray, eta: np.ndarray) -> float:
|
|
99
|
+
"""Newton calibration for intercept with fixed linear predictor ``eta``."""
|
|
100
|
+
y01 = np.asarray(y01, dtype=np.float64).ravel()
|
|
101
|
+
eta = np.asarray(eta, dtype=np.float64).ravel()
|
|
102
|
+
if y01.shape != eta.shape:
|
|
103
|
+
raise ValueError("y01 and eta must have the same shape")
|
|
104
|
+
b0 = 0.0
|
|
105
|
+
for _ in range(50):
|
|
106
|
+
z = eta + b0
|
|
107
|
+
p = 1.0 / (1.0 + np.exp(-z))
|
|
108
|
+
g = float(np.sum(p - y01))
|
|
109
|
+
h = float(np.sum(p * (1.0 - p)))
|
|
110
|
+
if abs(g) < 1e-10 or h < 1e-12:
|
|
111
|
+
break
|
|
112
|
+
b0 -= g / h
|
|
113
|
+
return float(b0)
|
|
114
|
+
|
|
115
|
+
|
|
98
116
|
# ---------------------------------------------------------------------------
|
|
99
117
|
# Single λ — gaussian, norm in {"1", "2"} (closed-form)
|
|
100
118
|
# ---------------------------------------------------------------------------
|
|
@@ -299,6 +317,14 @@ def single_pcghal_classification(
|
|
|
299
317
|
res = pcghal_classification(Y_pm1, Xtilde, ENn, alpha0,
|
|
300
318
|
max_iter=max_iter, tol=tol,
|
|
301
319
|
step_factor=step_factor, verbose=verbose)
|
|
320
|
+
y01 = (Y_pm1 > 0).astype(np.float64)
|
|
321
|
+
eta_train = Xtilde @ np.asarray(res.alpha).ravel()
|
|
322
|
+
b0 = _calibrate_logistic_intercept(y01, eta_train)
|
|
323
|
+
ymu = Y_pm1 * (eta_train + b0)
|
|
324
|
+
risk = float(
|
|
325
|
+
np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
|
|
326
|
+
.mean()
|
|
327
|
+
)
|
|
302
328
|
|
|
303
329
|
predictions = probabilities = predicted_classes = None
|
|
304
330
|
if predict is not None:
|
|
@@ -307,7 +333,7 @@ def single_pcghal_classification(
|
|
|
307
333
|
raise ValueError(f"predict must have {p} columns")
|
|
308
334
|
Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
|
|
309
335
|
v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * res.alpha)
|
|
310
|
-
log_odds = Ktest @ v
|
|
336
|
+
log_odds = Ktest @ v + b0
|
|
311
337
|
predictions = log_odds
|
|
312
338
|
probabilities = 1.0 / (1.0 + np.exp(-log_odds))
|
|
313
339
|
predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
|
|
@@ -315,7 +341,7 @@ def single_pcghal_classification(
|
|
|
315
341
|
return SinglePcghalClassificationResult(
|
|
316
342
|
alpha=res.alpha, predictions=predictions,
|
|
317
343
|
probabilities=probabilities, predicted_classes=predicted_classes,
|
|
318
|
-
lambda_=float(lambda_), risk=
|
|
344
|
+
lambda_=float(lambda_), risk=risk, iter=res.iter,
|
|
319
345
|
)
|
|
320
346
|
|
|
321
347
|
|
|
@@ -352,7 +378,9 @@ def single_pcghal_classification_ridge_only(
|
|
|
352
378
|
).ravel()
|
|
353
379
|
|
|
354
380
|
eta = Xtilde @ alpha
|
|
355
|
-
|
|
381
|
+
y01 = (Y_pm1 > 0).astype(np.float64)
|
|
382
|
+
b0 = _calibrate_logistic_intercept(y01, eta)
|
|
383
|
+
ymu = Y_pm1 * (eta + b0)
|
|
356
384
|
risk = float(
|
|
357
385
|
np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
|
|
358
386
|
.mean()
|
|
@@ -365,7 +393,7 @@ def single_pcghal_classification_ridge_only(
|
|
|
365
393
|
raise ValueError(f"predict must have {p} columns")
|
|
366
394
|
Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
|
|
367
395
|
v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
|
|
368
|
-
log_odds = Ktest @ v
|
|
396
|
+
log_odds = Ktest @ v + b0
|
|
369
397
|
predictions = log_odds
|
|
370
398
|
probabilities = 1.0 / (1.0 + np.exp(-log_odds))
|
|
371
399
|
predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
|
|
@@ -478,8 +506,9 @@ def single_pcghal_classification_lasso(
|
|
|
478
506
|
model = LogisticRegression(penalty="l1", **common_kw)
|
|
479
507
|
model.fit(_C(Xtilde), Y_01)
|
|
480
508
|
alpha = np.asarray(model.coef_, dtype=np.float64).ravel()
|
|
509
|
+
b0 = _calibrate_logistic_intercept(Y_01.astype(np.float64), Xtilde @ alpha)
|
|
481
510
|
|
|
482
|
-
eta = Xtilde @ alpha
|
|
511
|
+
eta = Xtilde @ alpha + b0
|
|
483
512
|
ymu = Y_pm1 * eta
|
|
484
513
|
risk = float(
|
|
485
514
|
np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu))).mean()
|
|
@@ -492,7 +521,7 @@ def single_pcghal_classification_lasso(
|
|
|
492
521
|
raise ValueError(f"predict must have {p} columns")
|
|
493
522
|
Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
|
|
494
523
|
v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
|
|
495
|
-
log_odds = Ktest @ v
|
|
524
|
+
log_odds = Ktest @ v + b0
|
|
496
525
|
predictions = log_odds
|
|
497
526
|
probabilities = 1.0 / (1.0 + np.exp(-log_odds))
|
|
498
527
|
predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
|
|
@@ -66,6 +66,38 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
|
|
|
66
66
|
return beta;
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
+
static double calibrate_logistic_intercept(const VectorXd& Y01,
|
|
70
|
+
const VectorXd& eta) {
|
|
71
|
+
const int n = (int)Y01.size();
|
|
72
|
+
if (eta.size() != n) {
|
|
73
|
+
throw std::runtime_error("calibrate_logistic_intercept: length mismatch");
|
|
74
|
+
}
|
|
75
|
+
double b0 = 0.0;
|
|
76
|
+
for (int it = 0; it < 50; ++it) {
|
|
77
|
+
const VectorXd z = eta.array() + b0;
|
|
78
|
+
const VectorXd p = (1.0 + (-z.array()).exp()).inverse();
|
|
79
|
+
const double g = (p - Y01).sum();
|
|
80
|
+
const double h = (p.array() * (1.0 - p.array())).sum();
|
|
81
|
+
if (std::abs(g) < 1e-10 || h < 1e-12) break;
|
|
82
|
+
b0 -= g / h;
|
|
83
|
+
}
|
|
84
|
+
return b0;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
static double logistic_risk_pm1(const VectorXd& Y_pm1, const VectorXd& eta) {
|
|
88
|
+
const int n = (int)Y_pm1.size();
|
|
89
|
+
if (eta.size() != n) {
|
|
90
|
+
throw std::runtime_error("logistic_risk_pm1: length mismatch");
|
|
91
|
+
}
|
|
92
|
+
double risk = 0.0;
|
|
93
|
+
for (int i = 0; i < n; ++i) {
|
|
94
|
+
const double ymu = Y_pm1[i] * eta[i];
|
|
95
|
+
risk += (ymu > 0) ? std::log1p(std::exp(-ymu))
|
|
96
|
+
: -ymu + std::log1p(std::exp(ymu));
|
|
97
|
+
}
|
|
98
|
+
return risk / n;
|
|
99
|
+
}
|
|
100
|
+
|
|
69
101
|
// ---------------------------------------------------------------------------
|
|
70
102
|
// Build the Eigen-friendly "Xtilde = U_top * diag(d_top)" representation,
|
|
71
103
|
// returning final_npc (which may be capped by the design rank).
|
|
@@ -112,25 +144,24 @@ static OptimizerOutput logistic_full_fit(const VectorXd& Y_pm1,
|
|
|
112
144
|
double step_factor, bool verbose,
|
|
113
145
|
bool with_pgd) {
|
|
114
146
|
VectorXd alpha0 = logistic_ridge_init(Y_pm1, Xtilde, lambda);
|
|
115
|
-
if (with_pgd) {
|
|
116
|
-
return pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
|
|
117
|
-
max_iter, tol, step_factor, verbose);
|
|
118
|
-
}
|
|
119
|
-
// Logistic-ridge-only path: assemble the same OptimizerOutput shape with
|
|
120
|
-
// logistic training risk evaluated on (Y_pm1, Xtilde, alpha0).
|
|
121
147
|
const int n = Xtilde.rows();
|
|
122
|
-
VectorXd
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
148
|
+
VectorXd alpha_fit;
|
|
149
|
+
if (with_pgd) {
|
|
150
|
+
OptimizerOutput out = pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
|
|
151
|
+
max_iter, tol, step_factor, verbose);
|
|
152
|
+
alpha_fit = out.alpha;
|
|
153
|
+
} else {
|
|
154
|
+
alpha_fit = alpha0; // logistic ridge only (norm="2")
|
|
128
155
|
}
|
|
129
|
-
|
|
156
|
+
VectorXd Y01(n);
|
|
157
|
+
for (int i = 0; i < n; ++i) Y01[i] = (Y_pm1[i] > 0.0) ? 1.0 : 0.0;
|
|
158
|
+
VectorXd eta = Xtilde * alpha_fit;
|
|
159
|
+
const double b0 = calibrate_logistic_intercept(Y01, eta);
|
|
160
|
+
const double risk = logistic_risk_pm1(Y_pm1, eta.array() + b0);
|
|
130
161
|
OptimizerOutput out;
|
|
131
|
-
out.alpha =
|
|
132
|
-
out.alphaiters = MatrixXd::Zero(0,
|
|
133
|
-
out.beta = E_Nn *
|
|
162
|
+
out.alpha = alpha_fit;
|
|
163
|
+
out.alphaiters = MatrixXd::Zero(0, alpha_fit.size());
|
|
164
|
+
out.beta = E_Nn * alpha_fit;
|
|
134
165
|
out.risk = risk;
|
|
135
166
|
out.iter = 0;
|
|
136
167
|
return out;
|
|
@@ -199,7 +230,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
199
230
|
MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
|
|
200
231
|
VectorXd d_inv = d_top.cwiseInverse();
|
|
201
232
|
VectorXd v = U_top * (d_inv.asDiagonal() * best_alpha);
|
|
202
|
-
VectorXd
|
|
233
|
+
VectorXd eta_full = Xtilde * best_alpha;
|
|
234
|
+
VectorXd Y01_full(n);
|
|
235
|
+
for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
|
|
236
|
+
const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
|
|
237
|
+
VectorXd eta_pred = (Ktest * v).array() + b0_full;
|
|
203
238
|
predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
|
|
204
239
|
}
|
|
205
240
|
CVClassiOutput out;
|
|
@@ -251,7 +286,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
251
286
|
alpha_fold = alpha0; // logistic ridge only (norm="2")
|
|
252
287
|
}
|
|
253
288
|
|
|
254
|
-
VectorXd
|
|
289
|
+
VectorXd eta_tr = Xtr * alpha_fold;
|
|
290
|
+
VectorXd Ytr01(ntr);
|
|
291
|
+
for (int i = 0; i < ntr; ++i) Ytr01[i] = (Ytr_pm1[i] > 0.0) ? 1.0 : 0.0;
|
|
292
|
+
const double b0_fold = calibrate_logistic_intercept(Ytr01, eta_tr);
|
|
293
|
+
VectorXd eta = (Xte * alpha_fold).array() + b0_fold;
|
|
255
294
|
VectorXd probs = (1.0 + (-eta.array()).exp()).inverse();
|
|
256
295
|
double dev = 0.0;
|
|
257
296
|
for (int i = 0; i < nte; ++i) {
|
|
@@ -298,7 +337,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
298
337
|
MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
|
|
299
338
|
VectorXd d_inv = d_top.cwiseInverse();
|
|
300
339
|
VectorXd v = U_top * (d_inv.asDiagonal() * full_out.alpha);
|
|
301
|
-
VectorXd
|
|
340
|
+
VectorXd eta_full = Xtilde * full_out.alpha;
|
|
341
|
+
VectorXd Y01_full(n);
|
|
342
|
+
for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
|
|
343
|
+
const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
|
|
344
|
+
VectorXd eta_pred = (Ktest * v).array() + b0_full;
|
|
302
345
|
predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
|
|
303
346
|
}
|
|
304
347
|
|
|
@@ -368,12 +368,25 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
|
|
|
368
368
|
VectorXd Y_pm1(n);
|
|
369
369
|
for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] == 1.0) ? 1.0 : -1.0;
|
|
370
370
|
|
|
371
|
-
|
|
371
|
+
auto calibrate_b0 = [](const VectorXd& y01, const VectorXd& eta) {
|
|
372
|
+
double b0 = 0.0;
|
|
373
|
+
for (int it = 0; it < 50; ++it) {
|
|
374
|
+
VectorXd z = eta.array() + b0;
|
|
375
|
+
VectorXd p = (1.0 + (-z.array()).exp()).inverse();
|
|
376
|
+
double g = (p - y01).sum();
|
|
377
|
+
double h = (p.array() * (1.0 - p.array())).sum();
|
|
378
|
+
if (std::abs(g) < 1e-10 || h < 1e-12) break;
|
|
379
|
+
b0 -= g / h;
|
|
380
|
+
}
|
|
381
|
+
return b0;
|
|
382
|
+
};
|
|
372
383
|
|
|
384
|
+
VectorXd alpha = logistic_ridge_init(Y_pm1, Xtilde, lambda);
|
|
373
385
|
VectorXd eta = Xtilde * alpha;
|
|
386
|
+
const double b0 = calibrate_b0(Y01, eta);
|
|
374
387
|
double risk = 0.0;
|
|
375
388
|
for (int i = 0; i < n; ++i) {
|
|
376
|
-
double ymu = Y_pm1[i] * eta[i];
|
|
389
|
+
double ymu = Y_pm1[i] * (eta[i] + b0);
|
|
377
390
|
if (ymu > 0)
|
|
378
391
|
risk += std::log1p(std::exp(-ymu));
|
|
379
392
|
else
|
|
@@ -392,7 +405,7 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
|
|
|
392
405
|
MatrixXd Ktest = kernel_cross_call(X, Xtest, maxdeg, center);
|
|
393
406
|
VectorXd d_inv = des.d.array().cwiseInverse();
|
|
394
407
|
VectorXd v = des.U * (d_inv.asDiagonal() * alpha);
|
|
395
|
-
VectorXd log_odds = Ktest * v;
|
|
408
|
+
VectorXd log_odds = (Ktest * v).array() + b0;
|
|
396
409
|
predictions = PROTECT(Rf_allocVector(REALSXP, m_pred)); prot++;
|
|
397
410
|
std::copy(log_odds.data(), log_odds.data() + m_pred, REAL(predictions));
|
|
398
411
|
}
|
|
@@ -8,7 +8,7 @@ can be regenerated from the package root::
|
|
|
8
8
|
This uses ``alpha=0.05`` with the **moderate** DGP from the original
|
|
9
9
|
``ate/simulate_data.py`` script (vendored below — exact same draws thanks to
|
|
10
10
|
``np.random.seed`` + the same ``np.random.uniform`` / ``normal`` /
|
|
11
|
-
``binomial`` call order)
|
|
11
|
+
``binomial`` call order). ``ate_hapc`` is run with ``npcs = n - 1``.
|
|
12
12
|
|
|
13
13
|
* ``W1 ~ Uniform(-2, 2)``
|
|
14
14
|
* ``W2 ~ Normal(0, 0.5)``
|
|
@@ -37,7 +37,6 @@ DEMO_SEED = 456
|
|
|
37
37
|
DEMO_N = 300
|
|
38
38
|
DEMO_ALPHA = 0.05
|
|
39
39
|
DEMO_MAX_DEGREE = 2
|
|
40
|
-
DEMO_NPCS = 40
|
|
41
40
|
DEMO_NFOLDS = 4
|
|
42
41
|
DEMO_NORM = "1"
|
|
43
42
|
|
|
@@ -51,10 +50,10 @@ GRID_LENGTH_OUT = 8
|
|
|
51
50
|
|
|
52
51
|
FIGURE_NAME = "ate_hapc_diagnostics_demo.png"
|
|
53
52
|
|
|
54
|
-
# Pinned outputs (``alpha=0.05``, current C++/Python stack)
|
|
55
|
-
_EXPECTED_ESTIMATE = 0.
|
|
56
|
-
_EXPECTED_LOWER = -0.
|
|
57
|
-
_EXPECTED_UPPER = 0.
|
|
53
|
+
# Pinned outputs (``alpha=0.05``, ``npcs = n - 1``, current C++/Python stack)
|
|
54
|
+
_EXPECTED_ESTIMATE = 0.07790009282426053
|
|
55
|
+
_EXPECTED_LOWER = -0.050705979103681936
|
|
56
|
+
_EXPECTED_UPPER = 0.206506164752203
|
|
58
57
|
|
|
59
58
|
|
|
60
59
|
def _expit(x: np.ndarray) -> np.ndarray:
|
|
@@ -104,17 +103,22 @@ def run_ate_hapc_demo(
|
|
|
104
103
|
*,
|
|
105
104
|
plot_diagnostics: bool = False,
|
|
106
105
|
) -> "ATEResult":
|
|
107
|
-
"""Run ``ate_hapc`` with the pinned demo hyperparameters.
|
|
106
|
+
"""Run ``ate_hapc`` with the pinned demo hyperparameters.
|
|
107
|
+
|
|
108
|
+
Uses ``npcs = n - 1`` (sample size from ``load_demo_data``) for both
|
|
109
|
+
propensity and outcome stages, matching the usual HAL rank cap.
|
|
110
|
+
"""
|
|
108
111
|
from hapc import ate_hapc
|
|
109
112
|
|
|
110
113
|
W, A, Y = load_demo_data()
|
|
114
|
+
npcs = int(W.shape[0]) - 1
|
|
111
115
|
return ate_hapc(
|
|
112
116
|
W,
|
|
113
117
|
Y,
|
|
114
118
|
A,
|
|
115
119
|
alpha=DEMO_ALPHA,
|
|
116
120
|
max_degree=DEMO_MAX_DEGREE,
|
|
117
|
-
npcs=
|
|
121
|
+
npcs=npcs,
|
|
118
122
|
log_lambda_prop_min=LOG_LAMBDA_PROP_MIN,
|
|
119
123
|
log_lambda_prop_max=LOG_LAMBDA_PROP_MAX,
|
|
120
124
|
grid_length_prop=GRID_LENGTH_PROP,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|