hapc 2.0.2__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {hapc-2.0.2/python/hapc.egg-info → hapc-2.3.0}/PKG-INFO +1 -1
  2. {hapc-2.0.2 → hapc-2.3.0}/pyproject.toml +1 -1
  3. {hapc-2.0.2 → hapc-2.3.0}/python/hapc/__init__.py +1 -1
  4. {hapc-2.0.2 → hapc-2.3.0}/python/hapc/ate.py +55 -21
  5. {hapc-2.0.2 → hapc-2.3.0}/python/hapc/cv.py +12 -5
  6. {hapc-2.0.2 → hapc-2.3.0}/python/hapc/single.py +128 -26
  7. {hapc-2.0.2 → hapc-2.3.0/python/hapc.egg-info}/PKG-INFO +1 -1
  8. {hapc-2.0.2 → hapc-2.3.0}/src/bindings.cpp +5 -0
  9. {hapc-2.0.2 → hapc-2.3.0}/src/hapc_core.hpp +8 -1
  10. {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_cv_classi_cpp.cpp +111 -41
  11. {hapc-2.0.2 → hapc-2.3.0}/src/r_bindings.cpp +23 -11
  12. {hapc-2.0.2 → hapc-2.3.0}/tests/test_ate_hapc_diagnostics_example.py +12 -8
  13. {hapc-2.0.2 → hapc-2.3.0}/CMakeLists.txt +0 -0
  14. {hapc-2.0.2 → hapc-2.3.0}/LICENSE +0 -0
  15. {hapc-2.0.2 → hapc-2.3.0}/MANIFEST.in +0 -0
  16. {hapc-2.0.2 → hapc-2.3.0}/README.md +0 -0
  17. {hapc-2.0.2 → hapc-2.3.0}/python/hapc/core.py +0 -0
  18. {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/SOURCES.txt +0 -0
  19. {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/dependency_links.txt +0 -0
  20. {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/not-zip-safe +0 -0
  21. {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/requires.txt +0 -0
  22. {hapc-2.0.2 → hapc-2.3.0}/python/hapc.egg-info/top_level.txt +0 -0
  23. {hapc-2.0.2 → hapc-2.3.0}/setup.cfg +0 -0
  24. {hapc-2.0.2 → hapc-2.3.0}/setup.py +0 -0
  25. {hapc-2.0.2 → hapc-2.3.0}/src/cross_kernel.cpp +0 -0
  26. {hapc-2.0.2 → hapc-2.3.0}/src/cv_classi.cpp +0 -0
  27. {hapc-2.0.2 → hapc-2.3.0}/src/cv_fast_pchal.cpp +0 -0
  28. {hapc-2.0.2 → hapc-2.3.0}/src/cv_fast_pchal_python.cpp +0 -0
  29. {hapc-2.0.2 → hapc-2.3.0}/src/fast_pchal.cpp +0 -0
  30. {hapc-2.0.2 → hapc-2.3.0}/src/logistic_call.cpp +0 -0
  31. {hapc-2.0.2 → hapc-2.3.0}/src/mkernel.cpp +0 -0
  32. {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_call.cpp +0 -0
  33. {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_classi_call.cpp +0 -0
  34. {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_cv.cpp +0 -0
  35. {hapc-2.0.2 → hapc-2.3.0}/src/pcghal_cv_cpp.cpp +0 -0
  36. {hapc-2.0.2 → hapc-2.3.0}/src/pchal_design.cpp +0 -0
  37. {hapc-2.0.2 → hapc-2.3.0}/src/ridge_wrappers.cpp +0 -0
  38. {hapc-2.0.2 → hapc-2.3.0}/src/single_pcghal_cpp.cpp +0 -0
  39. {hapc-2.0.2 → hapc-2.3.0}/src/single_pchar.cpp +0 -0
  40. {hapc-2.0.2 → hapc-2.3.0}/tests/test_api.py +0 -0
  41. {hapc-2.0.2 → hapc-2.3.0}/tests/test_ate.py +0 -0
  42. {hapc-2.0.2 → hapc-2.3.0}/tests/test_core.py +0 -0
  43. {hapc-2.0.2 → hapc-2.3.0}/tests/test_logistic_regression.py +0 -0
  44. {hapc-2.0.2 → hapc-2.3.0}/tests/test_r_vs_python_alpha.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hapc
3
- Version: 2.0.2
3
+ Version: 2.3.0
4
4
  Summary: Highly Adaptive Principal Components
5
5
  Home-page: https://github.com/meixide/hapc
6
6
  Author: Carlos García Meixide
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hapc"
7
- version = "2.0.2"
7
+ version = "2.3.0"
8
8
  description = "Highly Adaptive Principal Components"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -19,7 +19,7 @@ Lower-level building blocks:
19
19
  - :func:`ate_hapc` — ATE estimate + Wald CI via HAPC + outcome undersmoothing.
20
20
  """
21
21
 
22
- __version__ = "2.0.2"
22
+ __version__ = "2.3.0"
23
23
 
24
24
  from .core import (
25
25
  DesignOutput,
@@ -16,9 +16,9 @@ Provides :func:`ate_hapc`, a high-level convenience wrapper that:
16
16
  which ``|mean(EIF)| ≤ σ / (√n · log n)``. This is the **undersmoothed**
17
17
  outcome model. If no λ in the grid meets the threshold, the smallest λ
18
18
  is used.
19
- 5. Returns the plug-in ATE point estimate at the undersmoothed model and a
20
- ``(1 - alpha)`` Wald confidence interval based on the σ of the EIF at
21
- that undersmoothed model.
19
+ 5. Returns a **doubly robust** ATE point estimate at the undersmoothed outcome
20
+ model and a ``(1 - alpha)`` Wald confidence interval from the EIF evaluated
21
+ at that estimate (see Notes).
22
22
 
23
23
  The function does not implement sample splitting / cross-fitting:
24
24
  nuisances are fit on the full sample and the EIF is evaluated on the same
@@ -47,8 +47,9 @@ class ATEResult(NamedTuple):
47
47
  Attributes
48
48
  ----------
49
49
  estimate : float
50
- Plug-in ATE at the undersmoothed outcome model:
51
- ``mean(μ̂_1(W) - μ̂_0(W))``.
50
+ Doubly robust (AIPW-style) ATE at the undersmoothed outcome model:
51
+ ``mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``, matching the
52
+ efficient influence function used for the Wald interval (see Notes).
52
53
  lower : float
53
54
  Lower endpoint of the ``(1 - alpha)`` Wald confidence interval.
54
55
  upper : float
@@ -228,15 +229,25 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
228
229
  specified).
229
230
  2. Fix the propensity at its CV-best λ; refit on the full sample to
230
231
  obtain ``π̂(W_i) = P(A=1 | W_i)``.
231
- 3. At the CV-best outcome λ, compute the ATE EIF
232
- ``φ̂_diff = φ̂_1 - φ̂_0`` and let ``σ = std(φ̂_diff)``.
232
+ 3. At the CV-best outcome λ, compute a **plugin-centered** influence vector
233
+ (same mean as the DR EIF at :math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`)
234
+ and let ``σ = std(·)``.
233
235
  4. Threshold ``τ = σ / (√n · log n)``.
234
236
  5. Walk the **outcome** λ grid in **decreasing**
235
237
  order; pick the first (largest) λ for which
236
238
  ``|mean(EIF_diff)| ≤ τ`` — call it ``λ_u``.
237
- 6. Plug-in estimate: ``ψ̂ = mean(μ̂_1(W; λ_u) - μ̂_0(W; λ_u))``.
238
- CI: ``ψ̂ ± z_{1 - α/2} · σ_u / √n`` where ``σ_u = std(EIF_diff)``
239
- at ``λ_u``.
239
+ 6. **Doubly robust** point estimate (same nuisances ``(π̂, μ̂₁, μ̂₀)``):
240
+ ``ψ̂ = mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``.
241
+ One-step influence function (centered at ``ψ̂``):
242
+ ``φ_i = A_i/π̂_i·(Y_i-μ̂_{1i}) + μ̂_{1i} - (1-A_i)/(1-π̂_i)·(Y_i-μ̂_{0i})
243
+ - μ̂_{0i} - ψ̂``.
244
+ CI: ``ψ̂ ± z_{1-α/2} · std(φ) / √n``.
245
+
246
+ This contrasts with **plug-in** G-computation ``mean(μ̂₁(W)-μ̂₀(W))``,
247
+ which can be materially biased when both nuisances are estimated on the
248
+ same sample and the outcome regressions are regularized. The DR
249
+ ``ψ̂`` is consistent if **either** the propensity **or** the pair
250
+ ``(μ̂₁, μ̂₀)`` is correctly specified (standard double robustness).
240
251
 
241
252
  Examples
242
253
  --------
@@ -329,38 +340,60 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
329
340
  )
330
341
  return p[:n], p[n:]
331
342
 
332
- def _eif_diff(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
343
+ def _eif_plugin_centered(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
344
+ """Plugin-centered influence vector (undersmoothing gate only).
345
+
346
+ Its mean matches the DR EIF evaluated at plug-in
347
+ :math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`. The returned ATE
348
+ uses ``_psi_dr`` / ``_eif_dr`` instead.
349
+ """
333
350
  eif1 = (A01 / pi1) * (Y - mu1) - (mu1 - mu1.mean())
334
351
  eif0 = ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0) - (mu0 - mu0.mean())
335
352
  return eif1 - eif0
336
353
 
354
+ def _psi_dr(mu1: np.ndarray, mu0: np.ndarray) -> float:
355
+ return float(
356
+ np.mean(
357
+ (A01 / pi1) * (Y - mu1)
358
+ + mu1
359
+ - ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
360
+ - mu0
361
+ )
362
+ )
363
+
364
+ def _eif_dr(mu1: np.ndarray, mu0: np.ndarray, psi: float) -> np.ndarray:
365
+ return (
366
+ (A01 / pi1) * (Y - mu1)
367
+ + mu1
368
+ - ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
369
+ - mu0
370
+ - psi
371
+ )
372
+
337
373
  # --- 3. σ at CV configuration → threshold τ ----------------------------
338
374
  mu1_cv, mu0_cv = _mu_pair(lam_out_cv)
339
- eif_cv = _eif_diff(mu1_cv, mu0_cv)
375
+ eif_cv = _eif_plugin_centered(mu1_cv, mu0_cv)
340
376
  sigma_cv = float(np.std(eif_cv, ddof=0))
341
377
  threshold = sigma_cv / (np.sqrt(n) * np.log(n))
342
378
 
343
379
  # --- 4. Undersmoothing sweep: largest λ → smallest --------------------
344
380
  lam_und: Optional[float] = None
345
- eif_und: Optional[np.ndarray] = None
346
381
  mu1_und = mu0_und = None
347
382
  for lam in np.sort(lambdas_out)[::-1]:
348
383
  try:
349
384
  mu1, mu0 = _mu_pair(float(lam))
350
385
  except Exception:
351
386
  continue
352
- eif = _eif_diff(mu1, mu0)
387
+ eif = _eif_plugin_centered(mu1, mu0)
353
388
  if abs(eif.mean()) <= threshold:
354
389
  lam_und = float(lam)
355
390
  mu1_und, mu0_und = mu1, mu0
356
- eif_und = eif
357
391
  break
358
392
 
359
- if eif_und is None:
393
+ if lam_und is None:
360
394
  # Threshold never met → fall back to the smallest λ in the grid.
361
395
  lam_und = float(lambdas_out.min())
362
396
  mu1_und, mu0_und = _mu_pair(lam_und)
363
- eif_und = _eif_diff(mu1_und, mu0_und)
364
397
 
365
398
  if plot_diagnostics:
366
399
  t_lams: list[float] = []
@@ -370,7 +403,7 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
370
403
  mu1, mu0 = _mu_pair(float(lam))
371
404
  except Exception:
372
405
  continue
373
- eif = _eif_diff(mu1, mu0)
406
+ eif = _eif_plugin_centered(mu1, mu0)
374
407
  t_lams.append(float(lam))
375
408
  t_abs.append(float(np.abs(eif.mean())))
376
409
  _plot_ate_diagnostics(
@@ -379,9 +412,10 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
379
412
  lam_prop_cv, lam_out_cv, lam_und, threshold,
380
413
  )
381
414
 
382
- # --- 5. Point estimate + (1 - alpha) Wald CI --------------------------
383
- psi = float(np.mean(mu1_und - mu0_und))
384
- sigma_und = float(np.std(eif_und, ddof=0))
415
+ # --- 5. Doubly robust point estimate + (1 - alpha) Wald CI --------------
416
+ psi = _psi_dr(mu1_und, mu0_und)
417
+ eif_dr = _eif_dr(mu1_und, mu0_und, psi)
418
+ sigma_und = float(np.std(eif_dr, ddof=0))
385
419
  z = float(_normal.ppf(1.0 - alpha / 2.0))
386
420
  half = z * sigma_und / np.sqrt(n)
387
421
 
@@ -18,7 +18,11 @@ import numpy as np
18
18
 
19
19
  from . import hapc_core
20
20
  from .core import _C, cross_kernel_hapc, design_hapc
21
- from .single import single_pcghal_classification_lasso
21
+ from .single import (
22
+ _check_binomial_labels,
23
+ _to_soft01,
24
+ single_pcghal_classification_lasso,
25
+ )
22
26
 
23
27
 
24
28
  class CVResult(NamedTuple):
@@ -376,6 +380,9 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
376
380
  if not np.all(lams > 0):
377
381
  raise ValueError("All lambdas must be > 0 for logistic LASSO.")
378
382
 
383
+ # Soft target in [0,1] used for the held-out cross-entropy deviance
384
+ # (accepts hard {0,1}/{-1,+1} or fractional EM-HAL posteriors).
385
+ q = _to_soft01(Y)
379
386
  folds = _native_folds(n, int(nfolds))
380
387
  L = lams.size
381
388
  fold_dev = np.full((int(nfolds), L), np.nan)
@@ -386,7 +393,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
386
393
  if te.size == 0 or tr.size == 0:
387
394
  continue
388
395
  Xtr, Ytr = X[tr], Y[tr]
389
- Xte, Yte = X[te], Y[te]
396
+ Xte, Yte = X[te], q[te]
390
397
 
391
398
  for j, lam in enumerate(lams):
392
399
  res = single_pcghal_classification_lasso(
@@ -395,9 +402,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
395
402
  verbose=bool(verbose), max_iter=int(max_iter),
396
403
  )
397
404
  probs = np.clip(res.probabilities, 1e-15, 1 - 1e-15)
398
- yte01 = (Yte == 1).astype(np.float64) if set(np.unique(Yte).tolist()).issubset({0.0, 1.0}) \
399
- else (Yte > 0).astype(np.float64)
400
- dev = -(yte01 * np.log(probs) + (1 - yte01) * np.log(1 - probs))
405
+ dev = -(Yte * np.log(probs) + (1 - Yte) * np.log(1 - probs))
401
406
  fold_dev[k - 1, j] = float(dev.mean())
402
407
 
403
408
  deviances = np.nanmean(fold_dev, axis=0)
@@ -500,6 +505,8 @@ def cv_hapc(X: np.ndarray, Y: np.ndarray,
500
505
  lams = _grid(None, log_lambda_min, log_lambda_max, grid_length)
501
506
 
502
507
  if family == "binomial":
508
+ # Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
509
+ _check_binomial_labels(Y, norm)
503
510
  if norm in {"sv", "2"}:
504
511
  return pcghal_cv_classi(
505
512
  X, Y, max_degree=max_degree, npcs=npcs,
@@ -95,6 +95,79 @@ def _to_pm1(Y: np.ndarray, *, verbose: bool = False) -> np.ndarray:
95
95
  )
96
96
 
97
97
 
98
+ def _label_kind(Y: np.ndarray) -> str:
99
+ """Classify a binomial response vector.
100
+
101
+ Returns ``"01"`` (hard labels in ``{0,1}``), ``"pm1"`` (hard labels in
102
+ ``{-1,+1}``), or ``"soft"`` (fractional labels in ``[0,1]``, e.g. EM-HAL
103
+ E-step posteriors). Raises ``ValueError`` if any value falls outside
104
+ ``[0,1]`` and the set is not exactly ``{-1,+1}``.
105
+ """
106
+ Y = np.asarray(Y, dtype=np.float64).ravel()
107
+ u = np.unique(Y[~np.isnan(Y)])
108
+ s = set(u.tolist())
109
+ if s.issubset({0.0, 1.0}):
110
+ return "01"
111
+ if s == {-1.0, 1.0}:
112
+ return "pm1"
113
+ if u.size and u.min() >= 0.0 and u.max() <= 1.0:
114
+ return "soft"
115
+ raise ValueError(
116
+ "family='binomial' requires Y in {0,1}, {-1,+1}, or soft labels in "
117
+ "[0,1]; found values outside [0,1]."
118
+ )
119
+
120
+
121
+ def _to_soft01(Y: np.ndarray) -> np.ndarray:
122
+ """Map a binomial response to a soft cross-entropy target in ``[0,1]``."""
123
+ Y = np.asarray(Y, dtype=np.float64).ravel()
124
+ return (Y + 1.0) / 2.0 if _label_kind(Y) == "pm1" else Y
125
+
126
+
127
+ def _check_binomial_labels(Y: np.ndarray, norm: str) -> str:
128
+ """Validate labels and enforce the soft-label norm restriction.
129
+
130
+ Soft labels (any value strictly inside ``(0,1)``) are supported only for
131
+ ``norm`` in ``{"1","2"}``; ``norm="sv"`` raises ``NotImplementedError``.
132
+ A warning is emitted whenever soft labels are detected. Returns the label
133
+ kind from :func:`_label_kind`.
134
+ """
135
+ import warnings
136
+
137
+ kind = _label_kind(Y)
138
+ if kind == "soft":
139
+ if norm == "sv":
140
+ raise NotImplementedError(
141
+ "Soft labels (Y in (0,1)) are not implemented for norm='sv'; "
142
+ "use norm='1' or norm='2'."
143
+ )
144
+ warnings.warn(
145
+ "Non-binary labels detected in Y: treating them as soft labels in "
146
+ "[0,1] (cross-entropy target). Supported only for norm='1' and "
147
+ "norm='2'.",
148
+ stacklevel=2,
149
+ )
150
+ return kind
151
+
152
+
153
+ def _calibrate_logistic_intercept(y01: np.ndarray, eta: np.ndarray) -> float:
154
+ """Newton calibration for intercept with fixed linear predictor ``eta``."""
155
+ y01 = np.asarray(y01, dtype=np.float64).ravel()
156
+ eta = np.asarray(eta, dtype=np.float64).ravel()
157
+ if y01.shape != eta.shape:
158
+ raise ValueError("y01 and eta must have the same shape")
159
+ b0 = 0.0
160
+ for _ in range(50):
161
+ z = eta + b0
162
+ p = 1.0 / (1.0 + np.exp(-z))
163
+ g = float(np.sum(p - y01))
164
+ h = float(np.sum(p * (1.0 - p)))
165
+ if abs(g) < 1e-10 or h < 1e-12:
166
+ break
167
+ b0 -= g / h
168
+ return float(b0)
169
+
170
+
98
171
  # ---------------------------------------------------------------------------
99
172
  # Single λ — gaussian, norm in {"1", "2"} (closed-form)
100
173
  # ---------------------------------------------------------------------------
@@ -299,6 +372,14 @@ def single_pcghal_classification(
299
372
  res = pcghal_classification(Y_pm1, Xtilde, ENn, alpha0,
300
373
  max_iter=max_iter, tol=tol,
301
374
  step_factor=step_factor, verbose=verbose)
375
+ y01 = (Y_pm1 > 0).astype(np.float64)
376
+ eta_train = Xtilde @ np.asarray(res.alpha).ravel()
377
+ b0 = _calibrate_logistic_intercept(y01, eta_train)
378
+ ymu = Y_pm1 * (eta_train + b0)
379
+ risk = float(
380
+ np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
381
+ .mean()
382
+ )
302
383
 
303
384
  predictions = probabilities = predicted_classes = None
304
385
  if predict is not None:
@@ -307,7 +388,7 @@ def single_pcghal_classification(
307
388
  raise ValueError(f"predict must have {p} columns")
308
389
  Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
309
390
  v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * res.alpha)
310
- log_odds = Ktest @ v
391
+ log_odds = Ktest @ v + b0
311
392
  predictions = log_odds
312
393
  probabilities = 1.0 / (1.0 + np.exp(-log_odds))
313
394
  predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -315,7 +396,7 @@ def single_pcghal_classification(
315
396
  return SinglePcghalClassificationResult(
316
397
  alpha=res.alpha, predictions=predictions,
317
398
  probabilities=probabilities, predicted_classes=predicted_classes,
318
- lambda_=float(lambda_), risk=res.risk, iter=res.iter,
399
+ lambda_=float(lambda_), risk=risk, iter=res.iter,
319
400
  )
320
401
 
321
402
 
@@ -341,22 +422,21 @@ def single_pcghal_classification_ridge_only(
341
422
  SinglePcghalClassificationResult
342
423
  """
343
424
  X, Y, n, p = _check_xy(X, Y)
344
- Y_pm1 = _to_pm1(Y, verbose=verbose)
425
+ # Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
426
+ y01 = _to_soft01(Y)
345
427
 
346
428
  des = design_hapc(X, max_degree, npcs, center=center)
347
429
  final_npc = des.d.shape[0]
348
430
  Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
349
431
 
350
432
  alpha = np.asarray(
351
- hapc_core.logistic_ridge_init(_C(Y_pm1), _C(Xtilde), float(lambda_))
433
+ hapc_core.logistic_ridge_init_y01(_C(y01), _C(Xtilde), float(lambda_))
352
434
  ).ravel()
353
435
 
354
436
  eta = Xtilde @ alpha
355
- ymu = Y_pm1 * eta
356
- risk = float(
357
- np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
358
- .mean()
359
- )
437
+ b0 = _calibrate_logistic_intercept(y01, eta)
438
+ phat = np.clip(1.0 / (1.0 + np.exp(-(eta + b0))), 1e-15, 1 - 1e-15)
439
+ risk = float((-(y01 * np.log(phat) + (1 - y01) * np.log(1 - phat))).mean())
360
440
 
361
441
  predictions = probabilities = predicted_classes = None
362
442
  if predict is not None:
@@ -365,7 +445,7 @@ def single_pcghal_classification_ridge_only(
365
445
  raise ValueError(f"predict must have {p} columns")
366
446
  Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
367
447
  v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
368
- log_odds = Ktest @ v
448
+ log_odds = Ktest @ v + b0
369
449
  predictions = log_odds
370
450
  probabilities = 1.0 / (1.0 + np.exp(-log_odds))
371
451
  predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -452,13 +532,26 @@ def single_pcghal_classification_lasso(
452
532
  raise ValueError(f"lambda_ must be > 0 for LASSO; got {lambda_}")
453
533
 
454
534
  X, Y, n, p = _check_xy(X, Y)
455
- Y_pm1 = _to_pm1(Y, verbose=verbose)
456
- Y_01 = (Y_pm1 > 0).astype(np.int64)
535
+ # Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
536
+ q = _to_soft01(Y)
457
537
 
458
538
  des = design_hapc(X, max_degree, npcs, center=center)
459
539
  final_npc = des.d.shape[0]
460
540
  Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
461
541
 
542
+ # For soft labels, replicate each row as a (label=1, weight=q) and
543
+ # (label=0, weight=1-q) pair so the sample-weighted logistic loss equals
544
+ # the soft cross-entropy. On hard labels this reduces to the plain fit.
545
+ is_soft = bool(np.any((q > 1e-12) & (q < 1.0 - 1e-12)))
546
+ if is_soft:
547
+ Xfit = _C(np.vstack([Xtilde, Xtilde]))
548
+ yfit = np.concatenate([np.ones(n), np.zeros(n)]).astype(np.int64)
549
+ wfit = np.concatenate([q, 1.0 - q]).astype(np.float64)
550
+ else:
551
+ Xfit = _C(Xtilde)
552
+ yfit = (q > 0.5).astype(np.int64)
553
+ wfit = None
554
+
462
555
  C = 1.0 / (n * float(lambda_))
463
556
  # sklearn>=1.8 deprecated penalty="l1" in favour of l1_ratio=1 with the
464
557
  # liblinear solver; older versions still need penalty="l1". Try the new
@@ -467,23 +560,28 @@ def single_pcghal_classification_lasso(
467
560
  sig_params = inspect.signature(LogisticRegression).parameters
468
561
  common_kw = dict(solver="liblinear", C=C, fit_intercept=False,
469
562
  max_iter=int(max_iter))
563
+
564
+ def _fit(**ctor):
565
+ m = LogisticRegression(**ctor, **common_kw)
566
+ if wfit is None:
567
+ m.fit(Xfit, yfit)
568
+ else:
569
+ m.fit(Xfit, yfit, sample_weight=wfit)
570
+ return m
571
+
470
572
  if "l1_ratio" in sig_params and "penalty" in sig_params:
471
573
  try:
472
- model = LogisticRegression(l1_ratio=1.0, **common_kw)
473
- model.fit(_C(Xtilde), Y_01)
574
+ model = _fit(l1_ratio=1.0)
474
575
  except (TypeError, ValueError):
475
- model = LogisticRegression(penalty="l1", **common_kw)
476
- model.fit(_C(Xtilde), Y_01)
576
+ model = _fit(penalty="l1")
477
577
  else: # pragma: no cover (very old sklearn)
478
- model = LogisticRegression(penalty="l1", **common_kw)
479
- model.fit(_C(Xtilde), Y_01)
578
+ model = _fit(penalty="l1")
480
579
  alpha = np.asarray(model.coef_, dtype=np.float64).ravel()
580
+ b0 = _calibrate_logistic_intercept(q, Xtilde @ alpha)
481
581
 
482
- eta = Xtilde @ alpha
483
- ymu = Y_pm1 * eta
484
- risk = float(
485
- np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu))).mean()
486
- )
582
+ eta = Xtilde @ alpha + b0
583
+ phat = np.clip(1.0 / (1.0 + np.exp(-eta)), 1e-15, 1 - 1e-15)
584
+ risk = float((-(q * np.log(phat) + (1 - q) * np.log(1 - phat))).mean())
487
585
 
488
586
  predictions = probabilities = predicted_classes = None
489
587
  if predict is not None:
@@ -492,7 +590,7 @@ def single_pcghal_classification_lasso(
492
590
  raise ValueError(f"predict must have {p} columns")
493
591
  Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
494
592
  v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
495
- log_odds = Ktest @ v
593
+ log_odds = Ktest @ v + b0
496
594
  predictions = log_odds
497
595
  probabilities = 1.0 / (1.0 + np.exp(-log_odds))
498
596
  predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -531,8 +629,10 @@ def hapc(X: np.ndarray, Y: np.ndarray,
531
629
  X : np.ndarray, shape (n, p)
532
630
  Features.
533
631
  Y : np.ndarray, shape (n,)
534
- Response. For ``family="binomial"`` must contain only ``{0,1}`` or
535
- ``{-1,+1}``.
632
+ Response. For ``family="binomial"``: hard labels in ``{0,1}`` or
633
+ ``{-1,+1}``, or soft labels in ``[0,1]`` (e.g. EM-HAL E-step
634
+ posteriors). Soft labels are supported only for ``norm`` in
635
+ ``{"1","2"}``; ``norm="sv"`` requires hard labels.
536
636
  family : {"gaussian", "binomial"}, default "gaussian"
537
637
  Loss family.
538
638
  max_degree : int, default 1
@@ -588,6 +688,8 @@ def hapc(X: np.ndarray, Y: np.ndarray,
588
688
  npcs = int(X.shape[0])
589
689
 
590
690
  if family == "binomial":
691
+ # Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
692
+ _check_binomial_labels(Y, norm)
591
693
  if norm == "sv":
592
694
  return single_pcghal_classification(
593
695
  X, Y, max_degree, npcs, lambda_,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hapc
3
- Version: 2.0.2
3
+ Version: 2.3.0
4
4
  Summary: Highly Adaptive Principal Components
5
5
  Home-page: https://github.com/meixide/hapc
6
6
  Author: Carlos García Meixide
@@ -117,4 +117,9 @@ PYBIND11_MODULE(hapc_core, m) {
117
117
 
118
118
  m.def("logistic_ridge_init", &logistic_ridge_init,
119
119
  py::arg("Y"), py::arg("X"), py::arg("lambda"));
120
+
121
+ // Soft-label logistic ridge initialiser: target Y may be any value in
122
+ // [0,1] (hard {0,1} labels or fractional EM-HAL E-step posteriors).
123
+ m.def("logistic_ridge_init_y01", &logistic_ridge_init_y01,
124
+ py::arg("Y"), py::arg("X"), py::arg("lambda"));
120
125
  }
@@ -91,6 +91,11 @@ FastCVOutput fasthal_cv_python(const MatrixXd& X, const VectorXd& Y, int npc,
91
91
  // (internally multiplied by n, matching logistic_call).
92
92
  VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda);
93
93
 
94
+ // Soft-label variant: target `y01` may take any value in [0, 1] (hard {0,1}
95
+ // labels or fractional EM-HAL E-step posteriors). On hard {0,1} inputs the
96
+ // result is identical to logistic_ridge_init. lambda has the same scaling.
97
+ VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda);
98
+
94
99
  // Cross-validation output for binomial (logistic) HAPC.
95
100
  struct CVClassiOutput {
96
101
  std::vector<double> deviances;
@@ -101,7 +106,9 @@ struct CVClassiOutput {
101
106
  };
102
107
 
103
108
  // Python-friendly binomial CV (mirrors R `pchal_cv_classi_call`).
104
- // Y must contain only 0 or 1 values.
109
+ // Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
110
+ // labels are supported only when with_pgd == false (norm="2"); with_pgd ==
111
+ // true (norm="sv") rejects soft labels.
105
112
  //
106
113
  // When `with_pgd == true` (default): per fold runs logistic-ridge initialiser
107
114
  // followed by projected gradient descent on logistic loss (norm="sv").
@@ -28,10 +28,15 @@
28
28
  // rule `beta := delta_beta` (i.e. solving the full normal equation each
29
29
  // iteration, treating the IRLS working response as the regression target).
30
30
  // ---------------------------------------------------------------------------
31
- VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda) {
31
+ // Soft-label logistic ridge. The target `y01` may take any value in [0, 1]:
32
+ // hard {0,1} labels or fractional EM-HAL E-step posteriors. The IRLS update
33
+ // is unchanged; fractional targets are standard for cross-entropy
34
+ // minimisation, so on hard {0,1} inputs the result is bit-identical to the
35
+ // former {-1,+1} implementation.
36
+ VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda) {
32
37
  const int n = X.rows();
33
38
  const int p = X.cols();
34
- if (Y_pm1.size() != n) {
39
+ if (y01.size() != n) {
35
40
  throw std::runtime_error("logistic_ridge_init: Y length must match nrow(X).");
36
41
  }
37
42
  // Match logistic_call: lambda is multiplied by n internally.
@@ -39,12 +44,6 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
39
44
  const int max_iter = 100;
40
45
  const double tol = 1e-8;
41
46
 
42
- // logistic_call expects Y in {-1,+1} but treats it via the GLM update with
43
- // the {0,1} working response. We replicate that behaviour exactly: convert
44
- // back to a {0,1} response y01 = (Y_pm1 + 1) / 2 to compute mu/working z.
45
- VectorXd y01(n);
46
- for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
47
-
48
47
  VectorXd beta = VectorXd::Zero(p);
49
48
  for (int iter = 0; iter < max_iter; ++iter) {
50
49
  VectorXd eta = X * beta;
@@ -66,6 +65,51 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
66
65
  return beta;
67
66
  }
68
67
 
68
+ // Backward-compatible wrapper: accepts Y in {-1,+1} and converts to {0,1}.
69
+ // Used by the PGD (norm="sv") single-fit path, which is hard-label only.
70
+ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda) {
71
+ const int n = X.rows();
72
+ VectorXd y01(n);
73
+ for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
74
+ return logistic_ridge_init_y01(y01, X, lambda);
75
+ }
76
+
77
+ static double calibrate_logistic_intercept(const VectorXd& Y01,
78
+ const VectorXd& eta) {
79
+ const int n = (int)Y01.size();
80
+ if (eta.size() != n) {
81
+ throw std::runtime_error("calibrate_logistic_intercept: length mismatch");
82
+ }
83
+ double b0 = 0.0;
84
+ for (int it = 0; it < 50; ++it) {
85
+ const VectorXd z = eta.array() + b0;
86
+ const VectorXd p = (1.0 + (-z.array()).exp()).inverse();
87
+ const double g = (p - Y01).sum();
88
+ const double h = (p.array() * (1.0 - p.array())).sum();
89
+ if (std::abs(g) < 1e-10 || h < 1e-12) break;
90
+ b0 -= g / h;
91
+ }
92
+ return b0;
93
+ }
94
+
95
+ // Soft cross-entropy risk for fractional targets y01 in [0,1], given a linear
96
+ // predictor `eta` (intercept already folded in). On hard {0,1} labels this
97
+ // equals the former {-1,+1} logistic risk, so behaviour is unchanged on
98
+ // binary inputs.
99
+ static double logistic_risk_y01(const VectorXd& y01, const VectorXd& eta) {
100
+ const int n = (int)y01.size();
101
+ if (eta.size() != n) {
102
+ throw std::runtime_error("logistic_risk_y01: length mismatch");
103
+ }
104
+ double risk = 0.0;
105
+ for (int i = 0; i < n; ++i) {
106
+ const double pi = 1.0 / (1.0 + std::exp(-eta[i]));
107
+ const double p = std::min(1.0 - 1e-15, std::max(1e-15, pi));
108
+ risk += -(y01[i] * std::log(p) + (1.0 - y01[i]) * std::log(1.0 - p));
109
+ }
110
+ return risk / n;
111
+ }
112
+
69
113
  // ---------------------------------------------------------------------------
70
114
  // Build the Eigen-friendly "Xtilde = U_top * diag(d_top)" representation,
71
115
  // returning final_npc (which may be capped by the design rank).
@@ -104,33 +148,35 @@ static std::vector<int> make_folds(int n, int K) {
104
148
  // for the post-CV refit). When `with_pgd == false`, returns the logistic-ridge
105
149
  // initialiser α directly with its training logistic risk; otherwise runs the
106
150
  // PGD step on top of it (norm="sv").
107
- static OptimizerOutput logistic_full_fit(const VectorXd& Y_pm1,
151
+ static OptimizerOutput logistic_full_fit(const VectorXd& Y01,
108
152
  const MatrixXd& Xtilde,
109
153
  const MatrixXd& E_Nn,
110
154
  double lambda,
111
155
  int max_iter, double tol,
112
156
  double step_factor, bool verbose,
113
157
  bool with_pgd) {
114
- VectorXd alpha0 = logistic_ridge_init(Y_pm1, Xtilde, lambda);
115
- if (with_pgd) {
116
- return pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
117
- max_iter, tol, step_factor, verbose);
118
- }
119
- // Logistic-ridge-only path: assemble the same OptimizerOutput shape with
120
- // logistic training risk evaluated on (Y_pm1, Xtilde, alpha0).
158
+ VectorXd alpha0 = logistic_ridge_init_y01(Y01, Xtilde, lambda);
121
159
  const int n = Xtilde.rows();
122
- VectorXd eta = Xtilde * alpha0;
123
- double risk = 0.0;
124
- for (int i = 0; i < n; ++i) {
125
- const double ymu = Y_pm1[i] * eta[i];
126
- risk += (ymu > 0) ? std::log1p(std::exp(-ymu))
127
- : -ymu + std::log1p(std::exp(ymu));
160
+ VectorXd alpha_fit;
161
+ if (with_pgd) {
162
+ // PGD (norm="sv") uses the {-1,+1} logistic loss and is reached only
163
+ // for hard labels (soft labels are rejected upstream), so thresholding
164
+ // at 0.5 recovers the exact {-1,+1} encoding.
165
+ VectorXd Y_pm1(n);
166
+ for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] > 0.5) ? 1.0 : -1.0;
167
+ OptimizerOutput out = pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
168
+ max_iter, tol, step_factor, verbose);
169
+ alpha_fit = out.alpha;
170
+ } else {
171
+ alpha_fit = alpha0; // logistic ridge only (norm="2")
128
172
  }
129
- risk /= n;
173
+ VectorXd eta = Xtilde * alpha_fit;
174
+ const double b0 = calibrate_logistic_intercept(Y01, eta);
175
+ const double risk = logistic_risk_y01(Y01, eta.array() + b0);
130
176
  OptimizerOutput out;
131
- out.alpha = alpha0;
132
- out.alphaiters = MatrixXd::Zero(0, alpha0.size());
133
- out.beta = E_Nn * alpha0;
177
+ out.alpha = alpha_fit;
178
+ out.alphaiters = MatrixXd::Zero(0, alpha_fit.size());
179
+ out.beta = E_Nn * alpha_fit;
134
180
  out.risk = risk;
135
181
  out.iter = 0;
136
182
  return out;
@@ -146,10 +192,21 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
146
192
  const int n = X.rows();
147
193
  const int p = X.cols();
148
194
  if (Y.size() != n) throw std::runtime_error("pcghal_cv_classi: length(Y) != nrow(X)");
195
+ // Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
196
+ // labels (any value strictly inside (0,1)) are supported only for the
197
+ // logistic-ridge path (norm="2"); the PGD path (norm="sv", with_pgd=true)
198
+ // is not implemented for soft labels.
199
+ bool soft = false;
149
200
  for (int i = 0; i < n; ++i) {
150
- if (Y[i] != 0.0 && Y[i] != 1.0) {
151
- throw std::runtime_error("pcghal_cv_classi: Y must be 0/1");
201
+ if (Y[i] < -1e-12 || Y[i] > 1.0 + 1e-12) {
202
+ throw std::runtime_error("pcghal_cv_classi: Y must be in [0,1]");
152
203
  }
204
+ if (Y[i] > 1e-12 && Y[i] < 1.0 - 1e-12) soft = true;
205
+ }
206
+ if (soft && with_pgd) {
207
+ throw std::runtime_error(
208
+ "pcghal_cv_classi: soft labels (Y in (0,1)) are not implemented for "
209
+ "norm='sv'; use norm='1' or norm='2'.");
153
210
  }
154
211
  const int L = (int)lambdas.size();
155
212
  if (L <= 0) throw std::runtime_error("pcghal_cv_classi: lambdas must be non-empty");
@@ -167,9 +224,9 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
167
224
  const int final_npc = compute_classi_design(X, maxdeg, npc_eff, center,
168
225
  Xtilde, E_Nn, U_top, d_top);
169
226
 
170
- // Y in {-1,+1} for the optimiser
171
- VectorXd Y_pm1(n);
172
- for (int i = 0; i < n; ++i) Y_pm1[i] = (Y[i] == 1.0) ? 1.0 : -1.0;
227
+ // Soft target in [0,1] used throughout (the ridge/CE machinery works
228
+ // directly in this space; the PGD branch builds {-1,+1} locally).
229
+ const VectorXd& Y01 = Y;
173
230
 
174
231
  // Degenerate case: R `hapc(family="binomial", …)` passes nfolds=1 with a
175
232
  // single λ — there is no proper train/test split. Fit on full data and
@@ -182,7 +239,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
182
239
  for (int j = 0; j < L; ++j) {
183
240
  const double lam = lambdas[j];
184
241
  OptimizerOutput full_out = logistic_full_fit(
185
- Y_pm1, Xtilde, E_Nn, lam, max_iter, tol, step_factor,
242
+ Y01, Xtilde, E_Nn, lam, max_iter, tol, step_factor,
186
243
  verbose, with_pgd);
187
244
  deviances[j] = full_out.risk;
188
245
  if (full_out.risk < best_val) {
@@ -199,7 +256,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
199
256
  MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
200
257
  VectorXd d_inv = d_top.cwiseInverse();
201
258
  VectorXd v = U_top * (d_inv.asDiagonal() * best_alpha);
202
- VectorXd eta_pred = Ktest * v;
259
+ VectorXd eta_full = Xtilde * best_alpha;
260
+ VectorXd Y01_full(n);
261
+ for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
262
+ const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
263
+ VectorXd eta_pred = (Ktest * v).array() + b0_full;
203
264
  predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
204
265
  }
205
266
  CVClassiOutput out;
@@ -230,19 +291,22 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
230
291
  if (ntr == 0 || nte == 0) continue;
231
292
 
232
293
  MatrixXd Xtr(ntr, final_npc), Xte(nte, final_npc);
233
- VectorXd Ytr_pm1(ntr), Yte01(nte);
294
+ VectorXd Ytr01(ntr), Yte01(nte);
234
295
  for (int i = 0; i < ntr; ++i) {
235
296
  Xtr.row(i) = Xtilde.row(tr_idx[i]);
236
- Ytr_pm1[i] = Y_pm1[tr_idx[i]];
297
+ Ytr01[i] = Y01[tr_idx[i]];
237
298
  }
238
299
  for (int i = 0; i < nte; ++i) {
239
300
  Xte.row(i) = Xtilde.row(te_idx[i]);
240
- Yte01[i] = Y[te_idx[i]];
301
+ Yte01[i] = Y01[te_idx[i]];
241
302
  }
242
303
 
243
- VectorXd alpha0 = logistic_ridge_init(Ytr_pm1, Xtr, lambda);
304
+ VectorXd alpha0 = logistic_ridge_init_y01(Ytr01, Xtr, lambda);
244
305
  VectorXd alpha_fold;
245
306
  if (with_pgd) {
307
+ // Hard-label only path (soft labels rejected upstream).
308
+ VectorXd Ytr_pm1(ntr);
309
+ for (int i = 0; i < ntr; ++i) Ytr_pm1[i] = (Ytr01[i] > 0.5) ? 1.0 : -1.0;
246
310
  OptimizerOutput out = pcghal_classi_call(Ytr_pm1, Xtr, E_Nn, alpha0,
247
311
  max_iter, tol, step_factor,
248
312
  verbose);
@@ -251,12 +315,14 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
251
315
  alpha_fold = alpha0; // logistic ridge only (norm="2")
252
316
  }
253
317
 
254
- VectorXd eta = Xte * alpha_fold;
318
+ VectorXd eta_tr = Xtr * alpha_fold;
319
+ const double b0_fold = calibrate_logistic_intercept(Ytr01, eta_tr);
320
+ VectorXd eta = (Xte * alpha_fold).array() + b0_fold;
255
321
  VectorXd probs = (1.0 + (-eta.array()).exp()).inverse();
256
322
  double dev = 0.0;
257
323
  for (int i = 0; i < nte; ++i) {
258
324
  double pi = std::max(1e-15, std::min(1.0 - 1e-15, probs[i]));
259
- dev += (Yte01[i] == 1.0) ? -std::log(pi) : -std::log(1.0 - pi);
325
+ dev += -(Yte01[i] * std::log(pi) + (1.0 - Yte01[i]) * std::log(1.0 - pi));
260
326
  }
261
327
  fold_error(k - 1, j) = dev / nte;
262
328
  }
@@ -286,7 +352,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
286
352
 
287
353
  // Refit on full data at best_lambda (logistic ridge ± PGD).
288
354
  OptimizerOutput full_out = logistic_full_fit(
289
- Y_pm1, Xtilde, E_Nn, best_lambda,
355
+ Y01, Xtilde, E_Nn, best_lambda,
290
356
  max_iter, tol, step_factor, verbose, with_pgd);
291
357
 
292
358
  // Predict on `predict_data` if supplied (else empty vector).
@@ -298,7 +364,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
298
364
  MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
299
365
  VectorXd d_inv = d_top.cwiseInverse();
300
366
  VectorXd v = U_top * (d_inv.asDiagonal() * full_out.alpha);
301
- VectorXd eta_pred = Ktest * v;
367
+ VectorXd eta_full = Xtilde * full_out.alpha;
368
+ VectorXd Y01_full(n);
369
+ for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
370
+ const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
371
+ VectorXd eta_pred = (Ktest * v).array() + b0_full;
302
372
  predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
303
373
  }
304
374
 
@@ -347,8 +347,11 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
347
347
  if (Rf_length(Y_) != n) Rf_error("length(Y) must equal nrow(X).");
348
348
  Map<const MatrixXd> X(REAL(X_), n, p);
349
349
  Map<const VectorXd> Y01(REAL(Y_), n);
350
+ // Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. The
351
+ // logistic-ridge fit (norm="2") supports both.
350
352
  for (int i = 0; i < n; ++i) {
351
- if (Y01[i] != 0.0 && Y01[i] != 1.0) Rf_error("Y must contain only 0 and 1");
353
+ if (Y01[i] < -1e-12 || Y01[i] > 1.0 + 1e-12)
354
+ Rf_error("Y must be in [0,1]");
352
355
  }
353
356
  int maxdeg = Rf_isInteger(maxdeg_) ? INTEGER(maxdeg_)[0] : (int)REAL(maxdeg_)[0];
354
357
  int npc = Rf_isInteger(npc_) ? INTEGER(npc_)[0] : (int)REAL(npc_)[0];
@@ -365,19 +368,28 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
365
368
  const int final_npc = (int)des.d.size();
366
369
  MatrixXd Xtilde = des.U * des.d.asDiagonal();
367
370
 
368
- VectorXd Y_pm1(n);
369
- for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] == 1.0) ? 1.0 : -1.0;
370
-
371
- VectorXd alpha = logistic_ridge_init(Y_pm1, Xtilde, lambda);
371
+ auto calibrate_b0 = [](const VectorXd& y01, const VectorXd& eta) {
372
+ double b0 = 0.0;
373
+ for (int it = 0; it < 50; ++it) {
374
+ VectorXd z = eta.array() + b0;
375
+ VectorXd p = (1.0 + (-z.array()).exp()).inverse();
376
+ double g = (p - y01).sum();
377
+ double h = (p.array() * (1.0 - p.array())).sum();
378
+ if (std::abs(g) < 1e-10 || h < 1e-12) break;
379
+ b0 -= g / h;
380
+ }
381
+ return b0;
382
+ };
372
383
 
384
+ VectorXd alpha = logistic_ridge_init_y01(Y01, Xtilde, lambda);
373
385
  VectorXd eta = Xtilde * alpha;
386
+ const double b0 = calibrate_b0(Y01, eta);
387
+ // Soft cross-entropy risk (equals the {-1,+1} logistic risk on hard labels).
374
388
  double risk = 0.0;
375
389
  for (int i = 0; i < n; ++i) {
376
- double ymu = Y_pm1[i] * eta[i];
377
- if (ymu > 0)
378
- risk += std::log1p(std::exp(-ymu));
379
- else
380
- risk += -ymu + std::log1p(std::exp(ymu));
390
+ const double pi = 1.0 / (1.0 + std::exp(-(eta[i] + b0)));
391
+ const double pp = std::min(1.0 - 1e-15, std::max(1e-15, pi));
392
+ risk += -(Y01[i] * std::log(pp) + (1.0 - Y01[i]) * std::log(1.0 - pp));
381
393
  }
382
394
  risk /= n;
383
395
 
@@ -392,7 +404,7 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
392
404
  MatrixXd Ktest = kernel_cross_call(X, Xtest, maxdeg, center);
393
405
  VectorXd d_inv = des.d.array().cwiseInverse();
394
406
  VectorXd v = des.U * (d_inv.asDiagonal() * alpha);
395
- VectorXd log_odds = Ktest * v;
407
+ VectorXd log_odds = (Ktest * v).array() + b0;
396
408
  predictions = PROTECT(Rf_allocVector(REALSXP, m_pred)); prot++;
397
409
  std::copy(log_odds.data(), log_odds.data() + m_pred, REAL(predictions));
398
410
  }
@@ -8,7 +8,7 @@ can be regenerated from the package root::
8
8
  This uses ``alpha=0.05`` with the **moderate** DGP from the original
9
9
  ``ate/simulate_data.py`` script (vendored below — exact same draws thanks to
10
10
  ``np.random.seed`` + the same ``np.random.uniform`` / ``normal`` /
11
- ``binomial`` call order):
11
+ ``binomial`` call order). ``ate_hapc`` is run with ``npcs = n - 1``.
12
12
 
13
13
  * ``W1 ~ Uniform(-2, 2)``
14
14
  * ``W2 ~ Normal(0, 0.5)``
@@ -37,7 +37,6 @@ DEMO_SEED = 456
37
37
  DEMO_N = 300
38
38
  DEMO_ALPHA = 0.05
39
39
  DEMO_MAX_DEGREE = 2
40
- DEMO_NPCS = 40
41
40
  DEMO_NFOLDS = 4
42
41
  DEMO_NORM = "1"
43
42
 
@@ -51,10 +50,10 @@ GRID_LENGTH_OUT = 8
51
50
 
52
51
  FIGURE_NAME = "ate_hapc_diagnostics_demo.png"
53
52
 
54
- # Pinned outputs (``alpha=0.05``, current C++/Python stack)
55
- _EXPECTED_ESTIMATE = 0.09213745592304026
56
- _EXPECTED_LOWER = -0.03604174118365536
57
- _EXPECTED_UPPER = 0.22031665302973588
53
+ # Pinned outputs (``alpha=0.05``, ``npcs = n - 1``, current C++/Python stack)
54
+ _EXPECTED_ESTIMATE = 0.07790009282426053
55
+ _EXPECTED_LOWER = -0.050705979103681936
56
+ _EXPECTED_UPPER = 0.206506164752203
58
57
 
59
58
 
60
59
  def _expit(x: np.ndarray) -> np.ndarray:
@@ -104,17 +103,22 @@ def run_ate_hapc_demo(
104
103
  *,
105
104
  plot_diagnostics: bool = False,
106
105
  ) -> "ATEResult":
107
- """Run ``ate_hapc`` with the pinned demo hyperparameters."""
106
+ """Run ``ate_hapc`` with the pinned demo hyperparameters.
107
+
108
+ Uses ``npcs = n - 1`` (sample size from ``load_demo_data``) for both
109
+ propensity and outcome stages, matching the usual HAL rank cap.
110
+ """
108
111
  from hapc import ate_hapc
109
112
 
110
113
  W, A, Y = load_demo_data()
114
+ npcs = int(W.shape[0]) - 1
111
115
  return ate_hapc(
112
116
  W,
113
117
  Y,
114
118
  A,
115
119
  alpha=DEMO_ALPHA,
116
120
  max_degree=DEMO_MAX_DEGREE,
117
- npcs=DEMO_NPCS,
121
+ npcs=npcs,
118
122
  log_lambda_prop_min=LOG_LAMBDA_PROP_MIN,
119
123
  log_lambda_prop_max=LOG_LAMBDA_PROP_MAX,
120
124
  grid_length_prop=GRID_LENGTH_PROP,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes