hapc 2.0.2__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {hapc-2.0.2/python/hapc.egg-info → hapc-2.1.0}/PKG-INFO +1 -1
  2. {hapc-2.0.2 → hapc-2.1.0}/pyproject.toml +1 -1
  3. {hapc-2.0.2 → hapc-2.1.0}/python/hapc/__init__.py +1 -1
  4. {hapc-2.0.2 → hapc-2.1.0}/python/hapc/ate.py +55 -21
  5. {hapc-2.0.2 → hapc-2.1.0}/python/hapc/single.py +35 -6
  6. {hapc-2.0.2 → hapc-2.1.0/python/hapc.egg-info}/PKG-INFO +1 -1
  7. {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_cv_classi_cpp.cpp +62 -19
  8. {hapc-2.0.2 → hapc-2.1.0}/src/r_bindings.cpp +16 -3
  9. {hapc-2.0.2 → hapc-2.1.0}/tests/test_ate_hapc_diagnostics_example.py +12 -8
  10. {hapc-2.0.2 → hapc-2.1.0}/CMakeLists.txt +0 -0
  11. {hapc-2.0.2 → hapc-2.1.0}/LICENSE +0 -0
  12. {hapc-2.0.2 → hapc-2.1.0}/MANIFEST.in +0 -0
  13. {hapc-2.0.2 → hapc-2.1.0}/README.md +0 -0
  14. {hapc-2.0.2 → hapc-2.1.0}/python/hapc/core.py +0 -0
  15. {hapc-2.0.2 → hapc-2.1.0}/python/hapc/cv.py +0 -0
  16. {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/SOURCES.txt +0 -0
  17. {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/dependency_links.txt +0 -0
  18. {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/not-zip-safe +0 -0
  19. {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/requires.txt +0 -0
  20. {hapc-2.0.2 → hapc-2.1.0}/python/hapc.egg-info/top_level.txt +0 -0
  21. {hapc-2.0.2 → hapc-2.1.0}/setup.cfg +0 -0
  22. {hapc-2.0.2 → hapc-2.1.0}/setup.py +0 -0
  23. {hapc-2.0.2 → hapc-2.1.0}/src/bindings.cpp +0 -0
  24. {hapc-2.0.2 → hapc-2.1.0}/src/cross_kernel.cpp +0 -0
  25. {hapc-2.0.2 → hapc-2.1.0}/src/cv_classi.cpp +0 -0
  26. {hapc-2.0.2 → hapc-2.1.0}/src/cv_fast_pchal.cpp +0 -0
  27. {hapc-2.0.2 → hapc-2.1.0}/src/cv_fast_pchal_python.cpp +0 -0
  28. {hapc-2.0.2 → hapc-2.1.0}/src/fast_pchal.cpp +0 -0
  29. {hapc-2.0.2 → hapc-2.1.0}/src/hapc_core.hpp +0 -0
  30. {hapc-2.0.2 → hapc-2.1.0}/src/logistic_call.cpp +0 -0
  31. {hapc-2.0.2 → hapc-2.1.0}/src/mkernel.cpp +0 -0
  32. {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_call.cpp +0 -0
  33. {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_classi_call.cpp +0 -0
  34. {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_cv.cpp +0 -0
  35. {hapc-2.0.2 → hapc-2.1.0}/src/pcghal_cv_cpp.cpp +0 -0
  36. {hapc-2.0.2 → hapc-2.1.0}/src/pchal_design.cpp +0 -0
  37. {hapc-2.0.2 → hapc-2.1.0}/src/ridge_wrappers.cpp +0 -0
  38. {hapc-2.0.2 → hapc-2.1.0}/src/single_pcghal_cpp.cpp +0 -0
  39. {hapc-2.0.2 → hapc-2.1.0}/src/single_pchar.cpp +0 -0
  40. {hapc-2.0.2 → hapc-2.1.0}/tests/test_api.py +0 -0
  41. {hapc-2.0.2 → hapc-2.1.0}/tests/test_ate.py +0 -0
  42. {hapc-2.0.2 → hapc-2.1.0}/tests/test_core.py +0 -0
  43. {hapc-2.0.2 → hapc-2.1.0}/tests/test_logistic_regression.py +0 -0
  44. {hapc-2.0.2 → hapc-2.1.0}/tests/test_r_vs_python_alpha.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hapc
3
- Version: 2.0.2
3
+ Version: 2.1.0
4
4
  Summary: Highly Adaptive Principal Components
5
5
  Home-page: https://github.com/meixide/hapc
6
6
  Author: Carlos García Meixide
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hapc"
7
- version = "2.0.2"
7
+ version = "2.1.0"
8
8
  description = "Highly Adaptive Principal Components"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -19,7 +19,7 @@ Lower-level building blocks:
19
19
  - :func:`ate_hapc` — ATE estimate + Wald CI via HAPC + outcome undersmoothing.
20
20
  """
21
21
 
22
- __version__ = "2.0.2"
22
+ __version__ = "2.1.0"
23
23
 
24
24
  from .core import (
25
25
  DesignOutput,
@@ -16,9 +16,9 @@ Provides :func:`ate_hapc`, a high-level convenience wrapper that:
16
16
  which ``|mean(EIF)| ≤ σ / (√n · log n)``. This is the **undersmoothed**
17
17
  outcome model. If no λ in the grid meets the threshold, the smallest λ
18
18
  is used.
19
- 5. Returns the plug-in ATE point estimate at the undersmoothed model and a
20
- ``(1 - alpha)`` Wald confidence interval based on the σ of the EIF at
21
- that undersmoothed model.
19
+ 5. Returns a **doubly robust** ATE point estimate at the undersmoothed outcome
20
+ model and a ``(1 - alpha)`` Wald confidence interval from the EIF evaluated
21
+ at that estimate (see Notes).
22
22
 
23
23
  The function does not implement sample splitting / cross-fitting:
24
24
  nuisances are fit on the full sample and the EIF is evaluated on the same
@@ -47,8 +47,9 @@ class ATEResult(NamedTuple):
47
47
  Attributes
48
48
  ----------
49
49
  estimate : float
50
- Plug-in ATE at the undersmoothed outcome model:
51
- ``mean(μ̂_1(W) - μ̂_0(W))``.
50
+ Doubly robust (AIPW-style) ATE at the undersmoothed outcome model:
51
+ ``mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``, matching the
52
+ efficient influence function used for the Wald interval (see Notes).
52
53
  lower : float
53
54
  Lower endpoint of the ``(1 - alpha)`` Wald confidence interval.
54
55
  upper : float
@@ -228,15 +229,25 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
228
229
  specified).
229
230
  2. Fix the propensity at its CV-best λ; refit on the full sample to
230
231
  obtain ``π̂(W_i) = P(A=1 | W_i)``.
231
- 3. At the CV-best outcome λ, compute the ATE EIF
232
- ``φ̂_diff = φ̂_1 - φ̂_0`` and let ``σ = std(φ̂_diff)``.
232
+ 3. At the CV-best outcome λ, compute a **plugin-centered** influence vector
233
+ (same mean as the DR EIF at :math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`)
234
+ and let ``σ = std(·)``.
233
235
  4. Threshold ``τ = σ / (√n · log n)``.
234
236
  5. Walk the **outcome** λ grid in **decreasing**
235
237
  order; pick the first (largest) λ for which
236
238
  ``|mean(EIF_diff)| ≤ τ`` — call it ``λ_u``.
237
- 6. Plug-in estimate: ``ψ̂ = mean(μ̂_1(W; λ_u) - μ̂_0(W; λ_u))``.
238
- CI: ``ψ̂ ± z_{1 - α/2} · σ_u / √n`` where ``σ_u = std(EIF_diff)``
239
- at ``λ_u``.
239
+ 6. **Doubly robust** point estimate (same nuisances ``(π̂, μ̂₁, μ̂₀)``):
240
+ ``ψ̂ = mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``.
241
+ One-step influence function (centered at ``ψ̂``):
242
+ ``φ_i = A_i/π̂_i·(Y_i-μ̂_{1i}) + μ̂_{1i} - (1-A_i)/(1-π̂_i)·(Y_i-μ̂_{0i})
243
+ - μ̂_{0i} - ψ̂``.
244
+ CI: ``ψ̂ ± z_{1-α/2} · std(φ) / √n``.
245
+
246
+ This contrasts with **plug-in** G-computation ``mean(μ̂₁(W)-μ̂₀(W))``,
247
+ which can be materially biased when both nuisances are estimated on the
248
+ same sample and the outcome regressions are regularized. The DR
249
+ ``ψ̂`` is consistent if **either** the propensity **or** the pair
250
+ ``(μ̂₁, μ̂₀)`` is correctly specified (standard double robustness).
240
251
 
241
252
  Examples
242
253
  --------
@@ -329,38 +340,60 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
329
340
  )
330
341
  return p[:n], p[n:]
331
342
 
332
- def _eif_diff(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
343
+ def _eif_plugin_centered(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
344
+ """Plugin-centered influence vector (undersmoothing gate only).
345
+
346
+ Its mean matches the DR EIF evaluated at plug-in
347
+ :math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`. The returned ATE
348
+ uses ``_psi_dr`` / ``_eif_dr`` instead.
349
+ """
333
350
  eif1 = (A01 / pi1) * (Y - mu1) - (mu1 - mu1.mean())
334
351
  eif0 = ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0) - (mu0 - mu0.mean())
335
352
  return eif1 - eif0
336
353
 
354
+ def _psi_dr(mu1: np.ndarray, mu0: np.ndarray) -> float:
355
+ return float(
356
+ np.mean(
357
+ (A01 / pi1) * (Y - mu1)
358
+ + mu1
359
+ - ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
360
+ - mu0
361
+ )
362
+ )
363
+
364
+ def _eif_dr(mu1: np.ndarray, mu0: np.ndarray, psi: float) -> np.ndarray:
365
+ return (
366
+ (A01 / pi1) * (Y - mu1)
367
+ + mu1
368
+ - ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
369
+ - mu0
370
+ - psi
371
+ )
372
+
337
373
  # --- 3. σ at CV configuration → threshold τ ----------------------------
338
374
  mu1_cv, mu0_cv = _mu_pair(lam_out_cv)
339
- eif_cv = _eif_diff(mu1_cv, mu0_cv)
375
+ eif_cv = _eif_plugin_centered(mu1_cv, mu0_cv)
340
376
  sigma_cv = float(np.std(eif_cv, ddof=0))
341
377
  threshold = sigma_cv / (np.sqrt(n) * np.log(n))
342
378
 
343
379
  # --- 4. Undersmoothing sweep: largest λ → smallest --------------------
344
380
  lam_und: Optional[float] = None
345
- eif_und: Optional[np.ndarray] = None
346
381
  mu1_und = mu0_und = None
347
382
  for lam in np.sort(lambdas_out)[::-1]:
348
383
  try:
349
384
  mu1, mu0 = _mu_pair(float(lam))
350
385
  except Exception:
351
386
  continue
352
- eif = _eif_diff(mu1, mu0)
387
+ eif = _eif_plugin_centered(mu1, mu0)
353
388
  if abs(eif.mean()) <= threshold:
354
389
  lam_und = float(lam)
355
390
  mu1_und, mu0_und = mu1, mu0
356
- eif_und = eif
357
391
  break
358
392
 
359
- if eif_und is None:
393
+ if lam_und is None:
360
394
  # Threshold never met → fall back to the smallest λ in the grid.
361
395
  lam_und = float(lambdas_out.min())
362
396
  mu1_und, mu0_und = _mu_pair(lam_und)
363
- eif_und = _eif_diff(mu1_und, mu0_und)
364
397
 
365
398
  if plot_diagnostics:
366
399
  t_lams: list[float] = []
@@ -370,7 +403,7 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
370
403
  mu1, mu0 = _mu_pair(float(lam))
371
404
  except Exception:
372
405
  continue
373
- eif = _eif_diff(mu1, mu0)
406
+ eif = _eif_plugin_centered(mu1, mu0)
374
407
  t_lams.append(float(lam))
375
408
  t_abs.append(float(np.abs(eif.mean())))
376
409
  _plot_ate_diagnostics(
@@ -379,9 +412,10 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
379
412
  lam_prop_cv, lam_out_cv, lam_und, threshold,
380
413
  )
381
414
 
382
- # --- 5. Point estimate + (1 - alpha) Wald CI --------------------------
383
- psi = float(np.mean(mu1_und - mu0_und))
384
- sigma_und = float(np.std(eif_und, ddof=0))
415
+ # --- 5. Doubly robust point estimate + (1 - alpha) Wald CI --------------
416
+ psi = _psi_dr(mu1_und, mu0_und)
417
+ eif_dr = _eif_dr(mu1_und, mu0_und, psi)
418
+ sigma_und = float(np.std(eif_dr, ddof=0))
385
419
  z = float(_normal.ppf(1.0 - alpha / 2.0))
386
420
  half = z * sigma_und / np.sqrt(n)
387
421
 
@@ -95,6 +95,24 @@ def _to_pm1(Y: np.ndarray, *, verbose: bool = False) -> np.ndarray:
95
95
  )
96
96
 
97
97
 
98
+ def _calibrate_logistic_intercept(y01: np.ndarray, eta: np.ndarray) -> float:
99
+ """Newton calibration for intercept with fixed linear predictor ``eta``."""
100
+ y01 = np.asarray(y01, dtype=np.float64).ravel()
101
+ eta = np.asarray(eta, dtype=np.float64).ravel()
102
+ if y01.shape != eta.shape:
103
+ raise ValueError("y01 and eta must have the same shape")
104
+ b0 = 0.0
105
+ for _ in range(50):
106
+ z = eta + b0
107
+ p = 1.0 / (1.0 + np.exp(-z))
108
+ g = float(np.sum(p - y01))
109
+ h = float(np.sum(p * (1.0 - p)))
110
+ if abs(g) < 1e-10 or h < 1e-12:
111
+ break
112
+ b0 -= g / h
113
+ return float(b0)
114
+
115
+
98
116
  # ---------------------------------------------------------------------------
99
117
  # Single λ — gaussian, norm in {"1", "2"} (closed-form)
100
118
  # ---------------------------------------------------------------------------
@@ -299,6 +317,14 @@ def single_pcghal_classification(
299
317
  res = pcghal_classification(Y_pm1, Xtilde, ENn, alpha0,
300
318
  max_iter=max_iter, tol=tol,
301
319
  step_factor=step_factor, verbose=verbose)
320
+ y01 = (Y_pm1 > 0).astype(np.float64)
321
+ eta_train = Xtilde @ np.asarray(res.alpha).ravel()
322
+ b0 = _calibrate_logistic_intercept(y01, eta_train)
323
+ ymu = Y_pm1 * (eta_train + b0)
324
+ risk = float(
325
+ np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
326
+ .mean()
327
+ )
302
328
 
303
329
  predictions = probabilities = predicted_classes = None
304
330
  if predict is not None:
@@ -307,7 +333,7 @@ def single_pcghal_classification(
307
333
  raise ValueError(f"predict must have {p} columns")
308
334
  Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
309
335
  v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * res.alpha)
310
- log_odds = Ktest @ v
336
+ log_odds = Ktest @ v + b0
311
337
  predictions = log_odds
312
338
  probabilities = 1.0 / (1.0 + np.exp(-log_odds))
313
339
  predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -315,7 +341,7 @@ def single_pcghal_classification(
315
341
  return SinglePcghalClassificationResult(
316
342
  alpha=res.alpha, predictions=predictions,
317
343
  probabilities=probabilities, predicted_classes=predicted_classes,
318
- lambda_=float(lambda_), risk=res.risk, iter=res.iter,
344
+ lambda_=float(lambda_), risk=risk, iter=res.iter,
319
345
  )
320
346
 
321
347
 
@@ -352,7 +378,9 @@ def single_pcghal_classification_ridge_only(
352
378
  ).ravel()
353
379
 
354
380
  eta = Xtilde @ alpha
355
- ymu = Y_pm1 * eta
381
+ y01 = (Y_pm1 > 0).astype(np.float64)
382
+ b0 = _calibrate_logistic_intercept(y01, eta)
383
+ ymu = Y_pm1 * (eta + b0)
356
384
  risk = float(
357
385
  np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
358
386
  .mean()
@@ -365,7 +393,7 @@ def single_pcghal_classification_ridge_only(
365
393
  raise ValueError(f"predict must have {p} columns")
366
394
  Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
367
395
  v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
368
- log_odds = Ktest @ v
396
+ log_odds = Ktest @ v + b0
369
397
  predictions = log_odds
370
398
  probabilities = 1.0 / (1.0 + np.exp(-log_odds))
371
399
  predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -478,8 +506,9 @@ def single_pcghal_classification_lasso(
478
506
  model = LogisticRegression(penalty="l1", **common_kw)
479
507
  model.fit(_C(Xtilde), Y_01)
480
508
  alpha = np.asarray(model.coef_, dtype=np.float64).ravel()
509
+ b0 = _calibrate_logistic_intercept(Y_01.astype(np.float64), Xtilde @ alpha)
481
510
 
482
- eta = Xtilde @ alpha
511
+ eta = Xtilde @ alpha + b0
483
512
  ymu = Y_pm1 * eta
484
513
  risk = float(
485
514
  np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu))).mean()
@@ -492,7 +521,7 @@ def single_pcghal_classification_lasso(
492
521
  raise ValueError(f"predict must have {p} columns")
493
522
  Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
494
523
  v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
495
- log_odds = Ktest @ v
524
+ log_odds = Ktest @ v + b0
496
525
  predictions = log_odds
497
526
  probabilities = 1.0 / (1.0 + np.exp(-log_odds))
498
527
  predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hapc
3
- Version: 2.0.2
3
+ Version: 2.1.0
4
4
  Summary: Highly Adaptive Principal Components
5
5
  Home-page: https://github.com/meixide/hapc
6
6
  Author: Carlos García Meixide
@@ -66,6 +66,38 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
66
66
  return beta;
67
67
  }
68
68
 
69
+ static double calibrate_logistic_intercept(const VectorXd& Y01,
70
+ const VectorXd& eta) {
71
+ const int n = (int)Y01.size();
72
+ if (eta.size() != n) {
73
+ throw std::runtime_error("calibrate_logistic_intercept: length mismatch");
74
+ }
75
+ double b0 = 0.0;
76
+ for (int it = 0; it < 50; ++it) {
77
+ const VectorXd z = eta.array() + b0;
78
+ const VectorXd p = (1.0 + (-z.array()).exp()).inverse();
79
+ const double g = (p - Y01).sum();
80
+ const double h = (p.array() * (1.0 - p.array())).sum();
81
+ if (std::abs(g) < 1e-10 || h < 1e-12) break;
82
+ b0 -= g / h;
83
+ }
84
+ return b0;
85
+ }
86
+
87
+ static double logistic_risk_pm1(const VectorXd& Y_pm1, const VectorXd& eta) {
88
+ const int n = (int)Y_pm1.size();
89
+ if (eta.size() != n) {
90
+ throw std::runtime_error("logistic_risk_pm1: length mismatch");
91
+ }
92
+ double risk = 0.0;
93
+ for (int i = 0; i < n; ++i) {
94
+ const double ymu = Y_pm1[i] * eta[i];
95
+ risk += (ymu > 0) ? std::log1p(std::exp(-ymu))
96
+ : -ymu + std::log1p(std::exp(ymu));
97
+ }
98
+ return risk / n;
99
+ }
100
+
69
101
  // ---------------------------------------------------------------------------
70
102
  // Build the Eigen-friendly "Xtilde = U_top * diag(d_top)" representation,
71
103
  // returning final_npc (which may be capped by the design rank).
@@ -112,25 +144,24 @@ static OptimizerOutput logistic_full_fit(const VectorXd& Y_pm1,
112
144
  double step_factor, bool verbose,
113
145
  bool with_pgd) {
114
146
  VectorXd alpha0 = logistic_ridge_init(Y_pm1, Xtilde, lambda);
115
- if (with_pgd) {
116
- return pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
117
- max_iter, tol, step_factor, verbose);
118
- }
119
- // Logistic-ridge-only path: assemble the same OptimizerOutput shape with
120
- // logistic training risk evaluated on (Y_pm1, Xtilde, alpha0).
121
147
  const int n = Xtilde.rows();
122
- VectorXd eta = Xtilde * alpha0;
123
- double risk = 0.0;
124
- for (int i = 0; i < n; ++i) {
125
- const double ymu = Y_pm1[i] * eta[i];
126
- risk += (ymu > 0) ? std::log1p(std::exp(-ymu))
127
- : -ymu + std::log1p(std::exp(ymu));
148
+ VectorXd alpha_fit;
149
+ if (with_pgd) {
150
+ OptimizerOutput out = pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
151
+ max_iter, tol, step_factor, verbose);
152
+ alpha_fit = out.alpha;
153
+ } else {
154
+ alpha_fit = alpha0; // logistic ridge only (norm="2")
128
155
  }
129
- risk /= n;
156
+ VectorXd Y01(n);
157
+ for (int i = 0; i < n; ++i) Y01[i] = (Y_pm1[i] > 0.0) ? 1.0 : 0.0;
158
+ VectorXd eta = Xtilde * alpha_fit;
159
+ const double b0 = calibrate_logistic_intercept(Y01, eta);
160
+ const double risk = logistic_risk_pm1(Y_pm1, eta.array() + b0);
130
161
  OptimizerOutput out;
131
- out.alpha = alpha0;
132
- out.alphaiters = MatrixXd::Zero(0, alpha0.size());
133
- out.beta = E_Nn * alpha0;
162
+ out.alpha = alpha_fit;
163
+ out.alphaiters = MatrixXd::Zero(0, alpha_fit.size());
164
+ out.beta = E_Nn * alpha_fit;
134
165
  out.risk = risk;
135
166
  out.iter = 0;
136
167
  return out;
@@ -199,7 +230,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
199
230
  MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
200
231
  VectorXd d_inv = d_top.cwiseInverse();
201
232
  VectorXd v = U_top * (d_inv.asDiagonal() * best_alpha);
202
- VectorXd eta_pred = Ktest * v;
233
+ VectorXd eta_full = Xtilde * best_alpha;
234
+ VectorXd Y01_full(n);
235
+ for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
236
+ const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
237
+ VectorXd eta_pred = (Ktest * v).array() + b0_full;
203
238
  predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
204
239
  }
205
240
  CVClassiOutput out;
@@ -251,7 +286,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
251
286
  alpha_fold = alpha0; // logistic ridge only (norm="2")
252
287
  }
253
288
 
254
- VectorXd eta = Xte * alpha_fold;
289
+ VectorXd eta_tr = Xtr * alpha_fold;
290
+ VectorXd Ytr01(ntr);
291
+ for (int i = 0; i < ntr; ++i) Ytr01[i] = (Ytr_pm1[i] > 0.0) ? 1.0 : 0.0;
292
+ const double b0_fold = calibrate_logistic_intercept(Ytr01, eta_tr);
293
+ VectorXd eta = (Xte * alpha_fold).array() + b0_fold;
255
294
  VectorXd probs = (1.0 + (-eta.array()).exp()).inverse();
256
295
  double dev = 0.0;
257
296
  for (int i = 0; i < nte; ++i) {
@@ -298,7 +337,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
298
337
  MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
299
338
  VectorXd d_inv = d_top.cwiseInverse();
300
339
  VectorXd v = U_top * (d_inv.asDiagonal() * full_out.alpha);
301
- VectorXd eta_pred = Ktest * v;
340
+ VectorXd eta_full = Xtilde * full_out.alpha;
341
+ VectorXd Y01_full(n);
342
+ for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
343
+ const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
344
+ VectorXd eta_pred = (Ktest * v).array() + b0_full;
302
345
  predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
303
346
  }
304
347
 
@@ -368,12 +368,25 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
368
368
  VectorXd Y_pm1(n);
369
369
  for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] == 1.0) ? 1.0 : -1.0;
370
370
 
371
- VectorXd alpha = logistic_ridge_init(Y_pm1, Xtilde, lambda);
371
+ auto calibrate_b0 = [](const VectorXd& y01, const VectorXd& eta) {
372
+ double b0 = 0.0;
373
+ for (int it = 0; it < 50; ++it) {
374
+ VectorXd z = eta.array() + b0;
375
+ VectorXd p = (1.0 + (-z.array()).exp()).inverse();
376
+ double g = (p - y01).sum();
377
+ double h = (p.array() * (1.0 - p.array())).sum();
378
+ if (std::abs(g) < 1e-10 || h < 1e-12) break;
379
+ b0 -= g / h;
380
+ }
381
+ return b0;
382
+ };
372
383
 
384
+ VectorXd alpha = logistic_ridge_init(Y_pm1, Xtilde, lambda);
373
385
  VectorXd eta = Xtilde * alpha;
386
+ const double b0 = calibrate_b0(Y01, eta);
374
387
  double risk = 0.0;
375
388
  for (int i = 0; i < n; ++i) {
376
- double ymu = Y_pm1[i] * eta[i];
389
+ double ymu = Y_pm1[i] * (eta[i] + b0);
377
390
  if (ymu > 0)
378
391
  risk += std::log1p(std::exp(-ymu));
379
392
  else
@@ -392,7 +405,7 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
392
405
  MatrixXd Ktest = kernel_cross_call(X, Xtest, maxdeg, center);
393
406
  VectorXd d_inv = des.d.array().cwiseInverse();
394
407
  VectorXd v = des.U * (d_inv.asDiagonal() * alpha);
395
- VectorXd log_odds = Ktest * v;
408
+ VectorXd log_odds = (Ktest * v).array() + b0;
396
409
  predictions = PROTECT(Rf_allocVector(REALSXP, m_pred)); prot++;
397
410
  std::copy(log_odds.data(), log_odds.data() + m_pred, REAL(predictions));
398
411
  }
@@ -8,7 +8,7 @@ can be regenerated from the package root::
8
8
  This uses ``alpha=0.05`` with the **moderate** DGP from the original
9
9
  ``ate/simulate_data.py`` script (vendored below — exact same draws thanks to
10
10
  ``np.random.seed`` + the same ``np.random.uniform`` / ``normal`` /
11
- ``binomial`` call order):
11
+ ``binomial`` call order). ``ate_hapc`` is run with ``npcs = n - 1``.
12
12
 
13
13
  * ``W1 ~ Uniform(-2, 2)``
14
14
  * ``W2 ~ Normal(0, 0.5)``
@@ -37,7 +37,6 @@ DEMO_SEED = 456
37
37
  DEMO_N = 300
38
38
  DEMO_ALPHA = 0.05
39
39
  DEMO_MAX_DEGREE = 2
40
- DEMO_NPCS = 40
41
40
  DEMO_NFOLDS = 4
42
41
  DEMO_NORM = "1"
43
42
 
@@ -51,10 +50,10 @@ GRID_LENGTH_OUT = 8
51
50
 
52
51
  FIGURE_NAME = "ate_hapc_diagnostics_demo.png"
53
52
 
54
- # Pinned outputs (``alpha=0.05``, current C++/Python stack)
55
- _EXPECTED_ESTIMATE = 0.09213745592304026
56
- _EXPECTED_LOWER = -0.03604174118365536
57
- _EXPECTED_UPPER = 0.22031665302973588
53
+ # Pinned outputs (``alpha=0.05``, ``npcs = n - 1``, current C++/Python stack)
54
+ _EXPECTED_ESTIMATE = 0.07790009282426053
55
+ _EXPECTED_LOWER = -0.050705979103681936
56
+ _EXPECTED_UPPER = 0.206506164752203
58
57
 
59
58
 
60
59
  def _expit(x: np.ndarray) -> np.ndarray:
@@ -104,17 +103,22 @@ def run_ate_hapc_demo(
104
103
  *,
105
104
  plot_diagnostics: bool = False,
106
105
  ) -> "ATEResult":
107
- """Run ``ate_hapc`` with the pinned demo hyperparameters."""
106
+ """Run ``ate_hapc`` with the pinned demo hyperparameters.
107
+
108
+ Uses ``npcs = n - 1`` (sample size from ``load_demo_data``) for both
109
+ propensity and outcome stages, matching the usual HAL rank cap.
110
+ """
108
111
  from hapc import ate_hapc
109
112
 
110
113
  W, A, Y = load_demo_data()
114
+ npcs = int(W.shape[0]) - 1
111
115
  return ate_hapc(
112
116
  W,
113
117
  Y,
114
118
  A,
115
119
  alpha=DEMO_ALPHA,
116
120
  max_degree=DEMO_MAX_DEGREE,
117
- npcs=DEMO_NPCS,
121
+ npcs=npcs,
118
122
  log_lambda_prop_min=LOG_LAMBDA_PROP_MIN,
119
123
  log_lambda_prop_max=LOG_LAMBDA_PROP_MAX,
120
124
  grid_length_prop=GRID_LENGTH_PROP,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes