PyPI - hapc - Versions diffs - 2.0.2__tar.gz → 2.3.0__tar.gz - Mend

hapc 2.0.2tar.gz → 2.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{hapc-2.0.2/python/hapc.egg-info → hapc-2.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hapc
-Version: 2.0.2
+Version: 2.3.0
 Summary: Highly Adaptive Principal Components
 Home-page: https://github.com/meixide/hapc
 Author: Carlos García Meixide

{hapc-2.0.2 → hapc-2.3.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "hapc"
-version = "2.0.2"
+version = "2.3.0"
 description = "Highly Adaptive Principal Components"
 readme = "README.md"
 requires-python = ">=3.8"

{hapc-2.0.2 → hapc-2.3.0}/python/hapc/__init__.py RENAMED Viewed

@@ -19,7 +19,7 @@ Lower-level building blocks:
 - :func:`ate_hapc` — ATE estimate + Wald CI via HAPC + outcome undersmoothing.
 """
-__version__ = "2.0.2"
+__version__ = "2.3.0"
 from .core import (
     DesignOutput,

{hapc-2.0.2 → hapc-2.3.0}/python/hapc/ate.py RENAMED Viewed

@@ -16,9 +16,9 @@ Provides :func:`ate_hapc`, a high-level convenience wrapper that:
    which ``|mean(EIF)| ≤ σ / (√n · log n)``.  This is the **undersmoothed**
    outcome model.  If no λ in the grid meets the threshold, the smallest λ
    is used.
-5. Returns the plug-in ATE point estimate at the undersmoothed model and a
-   ``(1 - alpha)`` Wald confidence interval based on the σ of the EIF at
-   that undersmoothed model.
+5. Returns a **doubly robust** ATE point estimate at the undersmoothed outcome
+   model and a ``(1 - alpha)`` Wald confidence interval from the EIF evaluated
+   at that estimate (see Notes).
 The function does not implement sample splitting / cross-fitting:
 nuisances are fit on the full sample and the EIF is evaluated on the same
@@ -47,8 +47,9 @@ class ATEResult(NamedTuple):
     Attributes
     ----------
     estimate : float
-        Plug-in ATE at the undersmoothed outcome model:
-        ``mean(μ̂_1(W) - μ̂_0(W))``.
+        Doubly robust (AIPW-style) ATE at the undersmoothed outcome model:
+        ``mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``, matching the
+        efficient influence function used for the Wald interval (see Notes).
     lower : float
         Lower endpoint of the ``(1 - alpha)`` Wald confidence interval.
     upper : float
@@ -228,15 +229,25 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
        specified).
     2. Fix the propensity at its CV-best λ; refit on the full sample to
        obtain ``π̂(W_i) = P(A=1 | W_i)``.
-    3. At the CV-best outcome λ, compute the ATE EIF
-       ``φ̂_diff = φ̂_1 - φ̂_0`` and let ``σ = std(φ̂_diff)``.
+    3. At the CV-best outcome λ, compute a **plugin-centered** influence vector
+       (same mean as the DR EIF at :math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`)
+       and let ``σ = std(·)``.
     4. Threshold ``τ = σ / (√n · log n)``.
     5. Walk the **outcome** λ grid in **decreasing**
        order; pick the first (largest) λ for which
        ``|mean(EIF_diff)| ≤ τ`` — call it ``λ_u``.
-    6. Plug-in estimate: ``ψ̂ = mean(μ̂_1(W; λ_u) - μ̂_0(W; λ_u))``.
-       CI: ``ψ̂ ± z_{1 - α/2} · σ_u / √n`` where ``σ_u = std(EIF_diff)``
-       at ``λ_u``.
+    6. **Doubly robust** point estimate (same nuisances ``(π̂, μ̂₁, μ̂₀)``):
+       ``ψ̂ = mean(A/π̂·(Y-μ̂₁)+μ̂₁ - (1-A)/(1-π̂)·(Y-μ̂₀) - μ̂₀)``.
+       One-step influence function (centered at ``ψ̂``):
+       ``φ_i = A_i/π̂_i·(Y_i-μ̂_{1i}) + μ̂_{1i} - (1-A_i)/(1-π̂_i)·(Y_i-μ̂_{0i})
+       - μ̂_{0i} - ψ̂``.
+       CI: ``ψ̂ ± z_{1-α/2} · std(φ) / √n``.
+       This contrasts with **plug-in** G-computation ``mean(μ̂₁(W)-μ̂₀(W))``,
+       which can be materially biased when both nuisances are estimated on the
+       same sample and the outcome regressions are regularized.  The DR
+       ``ψ̂`` is consistent if **either** the propensity **or** the pair
+       ``(μ̂₁, μ̂₀)`` is correctly specified (standard double robustness).
     Examples
     --------
@@ -329,38 +340,60 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
             )
         return p[:n], p[n:]
-    def _eif_diff(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
+    def _eif_plugin_centered(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
+        """Plugin-centered influence vector (undersmoothing gate only).
+        Its mean matches the DR EIF evaluated at plug-in
+        :math:`\\psi=\\overline{\\mu}_1-\\overline{\\mu}_0`. The returned ATE
+        uses ``_psi_dr`` / ``_eif_dr`` instead.
+        """
         eif1 = (A01 / pi1) * (Y - mu1) - (mu1 - mu1.mean())
         eif0 = ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0) - (mu0 - mu0.mean())
         return eif1 - eif0
+    def _psi_dr(mu1: np.ndarray, mu0: np.ndarray) -> float:
+        return float(
+            np.mean(
+                (A01 / pi1) * (Y - mu1)
+                + mu1
+                - ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
+                - mu0
+            )
+        )
+    def _eif_dr(mu1: np.ndarray, mu0: np.ndarray, psi: float) -> np.ndarray:
+        return (
+            (A01 / pi1) * (Y - mu1)
+            + mu1
+            - ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0)
+            - mu0
+            - psi
+        )
     # --- 3. σ at CV configuration → threshold τ ----------------------------
     mu1_cv, mu0_cv = _mu_pair(lam_out_cv)
-    eif_cv = _eif_diff(mu1_cv, mu0_cv)
+    eif_cv = _eif_plugin_centered(mu1_cv, mu0_cv)
     sigma_cv = float(np.std(eif_cv, ddof=0))
     threshold = sigma_cv / (np.sqrt(n) * np.log(n))
     # --- 4. Undersmoothing sweep: largest λ → smallest --------------------
     lam_und: Optional[float] = None
-    eif_und: Optional[np.ndarray] = None
     mu1_und = mu0_und = None
     for lam in np.sort(lambdas_out)[::-1]:
         try:
             mu1, mu0 = _mu_pair(float(lam))
         except Exception:
             continue
-        eif = _eif_diff(mu1, mu0)
+        eif = _eif_plugin_centered(mu1, mu0)
         if abs(eif.mean()) <= threshold:
             lam_und = float(lam)
             mu1_und, mu0_und = mu1, mu0
-            eif_und = eif
             break
-    if eif_und is None:
+    if lam_und is None:
         # Threshold never met → fall back to the smallest λ in the grid.
         lam_und = float(lambdas_out.min())
         mu1_und, mu0_und = _mu_pair(lam_und)
-        eif_und = _eif_diff(mu1_und, mu0_und)
     if plot_diagnostics:
         t_lams: list[float] = []
@@ -370,7 +403,7 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
                 mu1, mu0 = _mu_pair(float(lam))
             except Exception:
                 continue
-            eif = _eif_diff(mu1, mu0)
+            eif = _eif_plugin_centered(mu1, mu0)
             t_lams.append(float(lam))
             t_abs.append(float(np.abs(eif.mean())))
         _plot_ate_diagnostics(
@@ -379,9 +412,10 @@ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
             lam_prop_cv, lam_out_cv, lam_und, threshold,
         )
-    # --- 5. Point estimate + (1 - alpha) Wald CI --------------------------
-    psi = float(np.mean(mu1_und - mu0_und))
-    sigma_und = float(np.std(eif_und, ddof=0))
+    # --- 5. Doubly robust point estimate + (1 - alpha) Wald CI --------------
+    psi = _psi_dr(mu1_und, mu0_und)
+    eif_dr = _eif_dr(mu1_und, mu0_und, psi)
+    sigma_und = float(np.std(eif_dr, ddof=0))
     z = float(_normal.ppf(1.0 - alpha / 2.0))
     half = z * sigma_und / np.sqrt(n)

{hapc-2.0.2 → hapc-2.3.0}/python/hapc/cv.py RENAMED Viewed

@@ -18,7 +18,11 @@ import numpy as np
 from . import hapc_core
 from .core import _C, cross_kernel_hapc, design_hapc
-from .single import single_pcghal_classification_lasso
+from .single import (
+    _check_binomial_labels,
+    _to_soft01,
+    single_pcghal_classification_lasso,
+)
 class CVResult(NamedTuple):
@@ -376,6 +380,9 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
     if not np.all(lams > 0):
         raise ValueError("All lambdas must be > 0 for logistic LASSO.")
+    # Soft target in [0,1] used for the held-out cross-entropy deviance
+    # (accepts hard {0,1}/{-1,+1} or fractional EM-HAL posteriors).
+    q = _to_soft01(Y)
     folds = _native_folds(n, int(nfolds))
     L = lams.size
     fold_dev = np.full((int(nfolds), L), np.nan)
@@ -386,7 +393,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
         if te.size == 0 or tr.size == 0:
             continue
         Xtr, Ytr = X[tr], Y[tr]
-        Xte, Yte = X[te], Y[te]
+        Xte, Yte = X[te], q[te]
         for j, lam in enumerate(lams):
             res = single_pcghal_classification_lasso(
@@ -395,9 +402,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
                 verbose=bool(verbose), max_iter=int(max_iter),
             )
             probs = np.clip(res.probabilities, 1e-15, 1 - 1e-15)
-            yte01 = (Yte == 1).astype(np.float64) if set(np.unique(Yte).tolist()).issubset({0.0, 1.0}) \
-                else (Yte > 0).astype(np.float64)
-            dev = -(yte01 * np.log(probs) + (1 - yte01) * np.log(1 - probs))
+            dev = -(Yte * np.log(probs) + (1 - Yte) * np.log(1 - probs))
             fold_dev[k - 1, j] = float(dev.mean())
     deviances = np.nanmean(fold_dev, axis=0)
@@ -500,6 +505,8 @@ def cv_hapc(X: np.ndarray, Y: np.ndarray,
     lams = _grid(None, log_lambda_min, log_lambda_max, grid_length)
     if family == "binomial":
+        # Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
+        _check_binomial_labels(Y, norm)
         if norm in {"sv", "2"}:
             return pcghal_cv_classi(
                 X, Y, max_degree=max_degree, npcs=npcs,

{hapc-2.0.2 → hapc-2.3.0}/python/hapc/single.py RENAMED Viewed

@@ -95,6 +95,79 @@ def _to_pm1(Y: np.ndarray, *, verbose: bool = False) -> np.ndarray:
     )
+def _label_kind(Y: np.ndarray) -> str:
+    """Classify a binomial response vector.
+    Returns ``"01"`` (hard labels in ``{0,1}``), ``"pm1"`` (hard labels in
+    ``{-1,+1}``), or ``"soft"`` (fractional labels in ``[0,1]``, e.g. EM-HAL
+    E-step posteriors). Raises ``ValueError`` if any value falls outside
+    ``[0,1]`` and the set is not exactly ``{-1,+1}``.
+    """
+    Y = np.asarray(Y, dtype=np.float64).ravel()
+    u = np.unique(Y[~np.isnan(Y)])
+    s = set(u.tolist())
+    if s.issubset({0.0, 1.0}):
+        return "01"
+    if s == {-1.0, 1.0}:
+        return "pm1"
+    if u.size and u.min() >= 0.0 and u.max() <= 1.0:
+        return "soft"
+    raise ValueError(
+        "family='binomial' requires Y in {0,1}, {-1,+1}, or soft labels in "
+        "[0,1]; found values outside [0,1]."
+    )
+def _to_soft01(Y: np.ndarray) -> np.ndarray:
+    """Map a binomial response to a soft cross-entropy target in ``[0,1]``."""
+    Y = np.asarray(Y, dtype=np.float64).ravel()
+    return (Y + 1.0) / 2.0 if _label_kind(Y) == "pm1" else Y
+def _check_binomial_labels(Y: np.ndarray, norm: str) -> str:
+    """Validate labels and enforce the soft-label norm restriction.
+    Soft labels (any value strictly inside ``(0,1)``) are supported only for
+    ``norm`` in ``{"1","2"}``; ``norm="sv"`` raises ``NotImplementedError``.
+    A warning is emitted whenever soft labels are detected. Returns the label
+    kind from :func:`_label_kind`.
+    """
+    import warnings
+    kind = _label_kind(Y)
+    if kind == "soft":
+        if norm == "sv":
+            raise NotImplementedError(
+                "Soft labels (Y in (0,1)) are not implemented for norm='sv'; "
+                "use norm='1' or norm='2'."
+            )
+        warnings.warn(
+            "Non-binary labels detected in Y: treating them as soft labels in "
+            "[0,1] (cross-entropy target). Supported only for norm='1' and "
+            "norm='2'.",
+            stacklevel=2,
+        )
+    return kind
+def _calibrate_logistic_intercept(y01: np.ndarray, eta: np.ndarray) -> float:
+    """Newton calibration for intercept with fixed linear predictor ``eta``."""
+    y01 = np.asarray(y01, dtype=np.float64).ravel()
+    eta = np.asarray(eta, dtype=np.float64).ravel()
+    if y01.shape != eta.shape:
+        raise ValueError("y01 and eta must have the same shape")
+    b0 = 0.0
+    for _ in range(50):
+        z = eta + b0
+        p = 1.0 / (1.0 + np.exp(-z))
+        g = float(np.sum(p - y01))
+        h = float(np.sum(p * (1.0 - p)))
+        if abs(g) < 1e-10 or h < 1e-12:
+            break
+        b0 -= g / h
+    return float(b0)
 # ---------------------------------------------------------------------------
 # Single λ — gaussian, norm in {"1", "2"} (closed-form)
 # ---------------------------------------------------------------------------
@@ -299,6 +372,14 @@ def single_pcghal_classification(
     res = pcghal_classification(Y_pm1, Xtilde, ENn, alpha0,
                                 max_iter=max_iter, tol=tol,
                                 step_factor=step_factor, verbose=verbose)
+    y01 = (Y_pm1 > 0).astype(np.float64)
+    eta_train = Xtilde @ np.asarray(res.alpha).ravel()
+    b0 = _calibrate_logistic_intercept(y01, eta_train)
+    ymu = Y_pm1 * (eta_train + b0)
+    risk = float(
+        np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
+        .mean()
+    )
     predictions = probabilities = predicted_classes = None
     if predict is not None:
@@ -307,7 +388,7 @@ def single_pcghal_classification(
             raise ValueError(f"predict must have {p} columns")
         Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
         v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * res.alpha)
-        log_odds = Ktest @ v
+        log_odds = Ktest @ v + b0
         predictions = log_odds
         probabilities = 1.0 / (1.0 + np.exp(-log_odds))
         predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -315,7 +396,7 @@ def single_pcghal_classification(
     return SinglePcghalClassificationResult(
         alpha=res.alpha, predictions=predictions,
         probabilities=probabilities, predicted_classes=predicted_classes,
-        lambda_=float(lambda_), risk=res.risk, iter=res.iter,
+        lambda_=float(lambda_), risk=risk, iter=res.iter,
     )
@@ -341,22 +422,21 @@ def single_pcghal_classification_ridge_only(
     SinglePcghalClassificationResult
     """
     X, Y, n, p = _check_xy(X, Y)
-    Y_pm1 = _to_pm1(Y, verbose=verbose)
+    # Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
+    y01 = _to_soft01(Y)
     des = design_hapc(X, max_degree, npcs, center=center)
     final_npc = des.d.shape[0]
     Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
     alpha = np.asarray(
-        hapc_core.logistic_ridge_init(_C(Y_pm1), _C(Xtilde), float(lambda_))
+        hapc_core.logistic_ridge_init_y01(_C(y01), _C(Xtilde), float(lambda_))
     ).ravel()
     eta = Xtilde @ alpha
-    ymu = Y_pm1 * eta
-    risk = float(
-        np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
-        .mean()
-    )
+    b0 = _calibrate_logistic_intercept(y01, eta)
+    phat = np.clip(1.0 / (1.0 + np.exp(-(eta + b0))), 1e-15, 1 - 1e-15)
+    risk = float((-(y01 * np.log(phat) + (1 - y01) * np.log(1 - phat))).mean())
     predictions = probabilities = predicted_classes = None
     if predict is not None:
@@ -365,7 +445,7 @@ def single_pcghal_classification_ridge_only(
             raise ValueError(f"predict must have {p} columns")
         Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
         v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
-        log_odds = Ktest @ v
+        log_odds = Ktest @ v + b0
         predictions = log_odds
         probabilities = 1.0 / (1.0 + np.exp(-log_odds))
         predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -452,13 +532,26 @@ def single_pcghal_classification_lasso(
         raise ValueError(f"lambda_ must be > 0 for LASSO; got {lambda_}")
     X, Y, n, p = _check_xy(X, Y)
-    Y_pm1 = _to_pm1(Y, verbose=verbose)
-    Y_01 = (Y_pm1 > 0).astype(np.int64)
+    # Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
+    q = _to_soft01(Y)
     des = design_hapc(X, max_degree, npcs, center=center)
     final_npc = des.d.shape[0]
     Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
+    # For soft labels, replicate each row as a (label=1, weight=q) and
+    # (label=0, weight=1-q) pair so the sample-weighted logistic loss equals
+    # the soft cross-entropy. On hard labels this reduces to the plain fit.
+    is_soft = bool(np.any((q > 1e-12) & (q < 1.0 - 1e-12)))
+    if is_soft:
+        Xfit = _C(np.vstack([Xtilde, Xtilde]))
+        yfit = np.concatenate([np.ones(n), np.zeros(n)]).astype(np.int64)
+        wfit = np.concatenate([q, 1.0 - q]).astype(np.float64)
+    else:
+        Xfit = _C(Xtilde)
+        yfit = (q > 0.5).astype(np.int64)
+        wfit = None
     C = 1.0 / (n * float(lambda_))
     # sklearn>=1.8 deprecated penalty="l1" in favour of l1_ratio=1 with the
     # liblinear solver; older versions still need penalty="l1". Try the new
@@ -467,23 +560,28 @@ def single_pcghal_classification_lasso(
     sig_params = inspect.signature(LogisticRegression).parameters
     common_kw = dict(solver="liblinear", C=C, fit_intercept=False,
                      max_iter=int(max_iter))
+    def _fit(**ctor):
+        m = LogisticRegression(**ctor, **common_kw)
+        if wfit is None:
+            m.fit(Xfit, yfit)
+        else:
+            m.fit(Xfit, yfit, sample_weight=wfit)
+        return m
     if "l1_ratio" in sig_params and "penalty" in sig_params:
         try:
-            model = LogisticRegression(l1_ratio=1.0, **common_kw)
-            model.fit(_C(Xtilde), Y_01)
+            model = _fit(l1_ratio=1.0)
         except (TypeError, ValueError):
-            model = LogisticRegression(penalty="l1", **common_kw)
-            model.fit(_C(Xtilde), Y_01)
+            model = _fit(penalty="l1")
     else:  # pragma: no cover  (very old sklearn)
-        model = LogisticRegression(penalty="l1", **common_kw)
-        model.fit(_C(Xtilde), Y_01)
+        model = _fit(penalty="l1")
     alpha = np.asarray(model.coef_, dtype=np.float64).ravel()
+    b0 = _calibrate_logistic_intercept(q, Xtilde @ alpha)
-    eta = Xtilde @ alpha
-    ymu = Y_pm1 * eta
-    risk = float(
-        np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu))).mean()
-    )
+    eta = Xtilde @ alpha + b0
+    phat = np.clip(1.0 / (1.0 + np.exp(-eta)), 1e-15, 1 - 1e-15)
+    risk = float((-(q * np.log(phat) + (1 - q) * np.log(1 - phat))).mean())
     predictions = probabilities = predicted_classes = None
     if predict is not None:
@@ -492,7 +590,7 @@ def single_pcghal_classification_lasso(
             raise ValueError(f"predict must have {p} columns")
         Ktest = cross_kernel_hapc(X, Xte, max_degree, center=center)
         v = des.U[:, :final_npc] @ ((1.0 / (des.d[:final_npc] + 1e-12)) * alpha)
-        log_odds = Ktest @ v
+        log_odds = Ktest @ v + b0
         predictions = log_odds
         probabilities = 1.0 / (1.0 + np.exp(-log_odds))
         predicted_classes = np.where(probabilities > 0.5, 1.0, -1.0)
@@ -531,8 +629,10 @@ def hapc(X: np.ndarray, Y: np.ndarray,
     X : np.ndarray, shape (n, p)
         Features.
     Y : np.ndarray, shape (n,)
-        Response. For ``family="binomial"`` must contain only ``{0,1}`` or
-        ``{-1,+1}``.
+        Response. For ``family="binomial"``: hard labels in ``{0,1}`` or
+        ``{-1,+1}``, or soft labels in ``[0,1]`` (e.g. EM-HAL E-step
+        posteriors). Soft labels are supported only for ``norm`` in
+        ``{"1","2"}``; ``norm="sv"`` requires hard labels.
     family : {"gaussian", "binomial"}, default "gaussian"
         Loss family.
     max_degree : int, default 1
@@ -588,6 +688,8 @@ def hapc(X: np.ndarray, Y: np.ndarray,
         npcs = int(X.shape[0])
     if family == "binomial":
+        # Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
+        _check_binomial_labels(Y, norm)
         if norm == "sv":
             return single_pcghal_classification(
                 X, Y, max_degree, npcs, lambda_,

{hapc-2.0.2 → hapc-2.3.0/python/hapc.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hapc
-Version: 2.0.2
+Version: 2.3.0
 Summary: Highly Adaptive Principal Components
 Home-page: https://github.com/meixide/hapc
 Author: Carlos García Meixide

{hapc-2.0.2 → hapc-2.3.0}/src/bindings.cpp RENAMED Viewed

@@ -117,4 +117,9 @@ PYBIND11_MODULE(hapc_core, m) {
     m.def("logistic_ridge_init", &logistic_ridge_init,
           py::arg("Y"), py::arg("X"), py::arg("lambda"));
+    // Soft-label logistic ridge initialiser: target Y may be any value in
+    // [0,1] (hard {0,1} labels or fractional EM-HAL E-step posteriors).
+    m.def("logistic_ridge_init_y01", &logistic_ridge_init_y01,
+          py::arg("Y"), py::arg("X"), py::arg("lambda"));
 }

{hapc-2.0.2 → hapc-2.3.0}/src/hapc_core.hpp RENAMED Viewed

@@ -91,6 +91,11 @@ FastCVOutput fasthal_cv_python(const MatrixXd& X, const VectorXd& Y, int npc,
 // (internally multiplied by n, matching logistic_call).
 VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda);
+// Soft-label variant: target `y01` may take any value in [0, 1] (hard {0,1}
+// labels or fractional EM-HAL E-step posteriors). On hard {0,1} inputs the
+// result is identical to logistic_ridge_init. lambda has the same scaling.
+VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda);
 // Cross-validation output for binomial (logistic) HAPC.
 struct CVClassiOutput {
     std::vector<double> deviances;
@@ -101,7 +106,9 @@ struct CVClassiOutput {
 };
 // Python-friendly binomial CV (mirrors R `pchal_cv_classi_call`).
-// Y must contain only 0 or 1 values.
+// Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
+// labels are supported only when with_pgd == false (norm="2"); with_pgd ==
+// true (norm="sv") rejects soft labels.
 //
 // When `with_pgd == true` (default): per fold runs logistic-ridge initialiser
 // followed by projected gradient descent on logistic loss (norm="sv").

{hapc-2.0.2 → hapc-2.3.0}/src/pcghal_cv_classi_cpp.cpp RENAMED Viewed

@@ -28,10 +28,15 @@
 // rule `beta := delta_beta` (i.e. solving the full normal equation each
 // iteration, treating the IRLS working response as the regression target).
 // ---------------------------------------------------------------------------
-VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda) {
+// Soft-label logistic ridge.  The target `y01` may take any value in [0, 1]:
+// hard {0,1} labels or fractional EM-HAL E-step posteriors.  The IRLS update
+// is unchanged; fractional targets are standard for cross-entropy
+// minimisation, so on hard {0,1} inputs the result is bit-identical to the
+// former {-1,+1} implementation.
+VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda) {
     const int n = X.rows();
     const int p = X.cols();
-    if (Y_pm1.size() != n) {
+    if (y01.size() != n) {
         throw std::runtime_error("logistic_ridge_init: Y length must match nrow(X).");
     }
     // Match logistic_call: lambda is multiplied by n internally.
@@ -39,12 +44,6 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
     const int max_iter = 100;
     const double tol = 1e-8;
-    // logistic_call expects Y in {-1,+1} but treats it via the GLM update with
-    // the {0,1} working response.  We replicate that behaviour exactly: convert
-    // back to a {0,1} response y01 = (Y_pm1 + 1) / 2 to compute mu/working z.
-    VectorXd y01(n);
-    for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
     VectorXd beta = VectorXd::Zero(p);
     for (int iter = 0; iter < max_iter; ++iter) {
         VectorXd eta = X * beta;
@@ -66,6 +65,51 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
     return beta;
 }
+// Backward-compatible wrapper: accepts Y in {-1,+1} and converts to {0,1}.
+// Used by the PGD (norm="sv") single-fit path, which is hard-label only.
+VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda) {
+    const int n = X.rows();
+    VectorXd y01(n);
+    for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
+    return logistic_ridge_init_y01(y01, X, lambda);
+}
+static double calibrate_logistic_intercept(const VectorXd& Y01,
+                                           const VectorXd& eta) {
+    const int n = (int)Y01.size();
+    if (eta.size() != n) {
+        throw std::runtime_error("calibrate_logistic_intercept: length mismatch");
+    }
+    double b0 = 0.0;
+    for (int it = 0; it < 50; ++it) {
+        const VectorXd z = eta.array() + b0;
+        const VectorXd p = (1.0 + (-z.array()).exp()).inverse();
+        const double g = (p - Y01).sum();
+        const double h = (p.array() * (1.0 - p.array())).sum();
+        if (std::abs(g) < 1e-10 || h < 1e-12) break;
+        b0 -= g / h;
+    }
+    return b0;
+}
+// Soft cross-entropy risk for fractional targets y01 in [0,1], given a linear
+// predictor `eta` (intercept already folded in).  On hard {0,1} labels this
+// equals the former {-1,+1} logistic risk, so behaviour is unchanged on
+// binary inputs.
+static double logistic_risk_y01(const VectorXd& y01, const VectorXd& eta) {
+    const int n = (int)y01.size();
+    if (eta.size() != n) {
+        throw std::runtime_error("logistic_risk_y01: length mismatch");
+    }
+    double risk = 0.0;
+    for (int i = 0; i < n; ++i) {
+        const double pi = 1.0 / (1.0 + std::exp(-eta[i]));
+        const double p = std::min(1.0 - 1e-15, std::max(1e-15, pi));
+        risk += -(y01[i] * std::log(p) + (1.0 - y01[i]) * std::log(1.0 - p));
+    }
+    return risk / n;
+}
 // ---------------------------------------------------------------------------
 // Build the Eigen-friendly "Xtilde = U_top * diag(d_top)" representation,
 // returning final_npc (which may be capped by the design rank).
@@ -104,33 +148,35 @@ static std::vector<int> make_folds(int n, int K) {
 // for the post-CV refit). When `with_pgd == false`, returns the logistic-ridge
 // initialiser α directly with its training logistic risk; otherwise runs the
 // PGD step on top of it (norm="sv").
-static OptimizerOutput logistic_full_fit(const VectorXd& Y_pm1,
+static OptimizerOutput logistic_full_fit(const VectorXd& Y01,
                                           const MatrixXd& Xtilde,
                                           const MatrixXd& E_Nn,
                                           double lambda,
                                           int max_iter, double tol,
                                           double step_factor, bool verbose,
                                           bool with_pgd) {
-    VectorXd alpha0 = logistic_ridge_init(Y_pm1, Xtilde, lambda);
-    if (with_pgd) {
-        return pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
-                                  max_iter, tol, step_factor, verbose);
-    }
-    // Logistic-ridge-only path: assemble the same OptimizerOutput shape with
-    // logistic training risk evaluated on (Y_pm1, Xtilde, alpha0).
+    VectorXd alpha0 = logistic_ridge_init_y01(Y01, Xtilde, lambda);
     const int n = Xtilde.rows();
-    VectorXd eta = Xtilde * alpha0;
-    double risk = 0.0;
-    for (int i = 0; i < n; ++i) {
-        const double ymu = Y_pm1[i] * eta[i];
-        risk += (ymu > 0) ? std::log1p(std::exp(-ymu))
-                          : -ymu + std::log1p(std::exp(ymu));
+    VectorXd alpha_fit;
+    if (with_pgd) {
+        // PGD (norm="sv") uses the {-1,+1} logistic loss and is reached only
+        // for hard labels (soft labels are rejected upstream), so thresholding
+        // at 0.5 recovers the exact {-1,+1} encoding.
+        VectorXd Y_pm1(n);
+        for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] > 0.5) ? 1.0 : -1.0;
+        OptimizerOutput out = pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
+                                                 max_iter, tol, step_factor, verbose);
+        alpha_fit = out.alpha;
+    } else {
+        alpha_fit = alpha0;  // logistic ridge only (norm="2")
     }
-    risk /= n;
+    VectorXd eta = Xtilde * alpha_fit;
+    const double b0 = calibrate_logistic_intercept(Y01, eta);
+    const double risk = logistic_risk_y01(Y01, eta.array() + b0);
     OptimizerOutput out;
-    out.alpha = alpha0;
-    out.alphaiters = MatrixXd::Zero(0, alpha0.size());
-    out.beta = E_Nn * alpha0;
+    out.alpha = alpha_fit;
+    out.alphaiters = MatrixXd::Zero(0, alpha_fit.size());
+    out.beta = E_Nn * alpha_fit;
     out.risk = risk;
     out.iter = 0;
     return out;
@@ -146,10 +192,21 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
     const int n = X.rows();
     const int p = X.cols();
     if (Y.size() != n) throw std::runtime_error("pcghal_cv_classi: length(Y) != nrow(X)");
+    // Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
+    // labels (any value strictly inside (0,1)) are supported only for the
+    // logistic-ridge path (norm="2"); the PGD path (norm="sv", with_pgd=true)
+    // is not implemented for soft labels.
+    bool soft = false;
     for (int i = 0; i < n; ++i) {
-        if (Y[i] != 0.0 && Y[i] != 1.0) {
-            throw std::runtime_error("pcghal_cv_classi: Y must be 0/1");
+        if (Y[i] < -1e-12 || Y[i] > 1.0 + 1e-12) {
+            throw std::runtime_error("pcghal_cv_classi: Y must be in [0,1]");
         }
+        if (Y[i] > 1e-12 && Y[i] < 1.0 - 1e-12) soft = true;
+    }
+    if (soft && with_pgd) {
+        throw std::runtime_error(
+            "pcghal_cv_classi: soft labels (Y in (0,1)) are not implemented for "
+            "norm='sv'; use norm='1' or norm='2'.");
     }
     const int L = (int)lambdas.size();
     if (L <= 0) throw std::runtime_error("pcghal_cv_classi: lambdas must be non-empty");
@@ -167,9 +224,9 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
     const int final_npc = compute_classi_design(X, maxdeg, npc_eff, center,
                                                  Xtilde, E_Nn, U_top, d_top);
-    // Y in {-1,+1} for the optimiser
-    VectorXd Y_pm1(n);
-    for (int i = 0; i < n; ++i) Y_pm1[i] = (Y[i] == 1.0) ? 1.0 : -1.0;
+    // Soft target in [0,1] used throughout (the ridge/CE machinery works
+    // directly in this space; the PGD branch builds {-1,+1} locally).
+    const VectorXd& Y01 = Y;
     // Degenerate case: R `hapc(family="binomial", …)` passes nfolds=1 with a
     // single λ — there is no proper train/test split.  Fit on full data and
@@ -182,7 +239,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
         for (int j = 0; j < L; ++j) {
             const double lam = lambdas[j];
             OptimizerOutput full_out = logistic_full_fit(
-                Y_pm1, Xtilde, E_Nn, lam, max_iter, tol, step_factor,
+                Y01, Xtilde, E_Nn, lam, max_iter, tol, step_factor,
                 verbose, with_pgd);
             deviances[j] = full_out.risk;
             if (full_out.risk < best_val) {
@@ -199,7 +256,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
             MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
             VectorXd d_inv = d_top.cwiseInverse();
             VectorXd v = U_top * (d_inv.asDiagonal() * best_alpha);
-            VectorXd eta_pred = Ktest * v;
+            VectorXd eta_full = Xtilde * best_alpha;
+            VectorXd Y01_full(n);
+            for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
+            const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
+            VectorXd eta_pred = (Ktest * v).array() + b0_full;
             predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
         }
         CVClassiOutput out;
@@ -230,19 +291,22 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
             if (ntr == 0 || nte == 0) continue;
             MatrixXd Xtr(ntr, final_npc), Xte(nte, final_npc);
-            VectorXd Ytr_pm1(ntr), Yte01(nte);
+            VectorXd Ytr01(ntr), Yte01(nte);
             for (int i = 0; i < ntr; ++i) {
                 Xtr.row(i) = Xtilde.row(tr_idx[i]);
-                Ytr_pm1[i] = Y_pm1[tr_idx[i]];
+                Ytr01[i] = Y01[tr_idx[i]];
             }
             for (int i = 0; i < nte; ++i) {
                 Xte.row(i) = Xtilde.row(te_idx[i]);
-                Yte01[i] = Y[te_idx[i]];
+                Yte01[i] = Y01[te_idx[i]];
             }
-            VectorXd alpha0 = logistic_ridge_init(Ytr_pm1, Xtr, lambda);
+            VectorXd alpha0 = logistic_ridge_init_y01(Ytr01, Xtr, lambda);
             VectorXd alpha_fold;
             if (with_pgd) {
+                // Hard-label only path (soft labels rejected upstream).
+                VectorXd Ytr_pm1(ntr);
+                for (int i = 0; i < ntr; ++i) Ytr_pm1[i] = (Ytr01[i] > 0.5) ? 1.0 : -1.0;
                 OptimizerOutput out = pcghal_classi_call(Ytr_pm1, Xtr, E_Nn, alpha0,
                                                           max_iter, tol, step_factor,
                                                           verbose);
@@ -251,12 +315,14 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
                 alpha_fold = alpha0;  // logistic ridge only (norm="2")
             }
-            VectorXd eta = Xte * alpha_fold;
+            VectorXd eta_tr = Xtr * alpha_fold;
+            const double b0_fold = calibrate_logistic_intercept(Ytr01, eta_tr);
+            VectorXd eta = (Xte * alpha_fold).array() + b0_fold;
             VectorXd probs = (1.0 + (-eta.array()).exp()).inverse();
             double dev = 0.0;
             for (int i = 0; i < nte; ++i) {
                 double pi = std::max(1e-15, std::min(1.0 - 1e-15, probs[i]));
-                dev += (Yte01[i] == 1.0) ? -std::log(pi) : -std::log(1.0 - pi);
+                dev += -(Yte01[i] * std::log(pi) + (1.0 - Yte01[i]) * std::log(1.0 - pi));
             }
             fold_error(k - 1, j) = dev / nte;
         }
@@ -286,7 +352,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
     // Refit on full data at best_lambda (logistic ridge ± PGD).
     OptimizerOutput full_out = logistic_full_fit(
-        Y_pm1, Xtilde, E_Nn, best_lambda,
+        Y01, Xtilde, E_Nn, best_lambda,
         max_iter, tol, step_factor, verbose, with_pgd);
     // Predict on `predict_data` if supplied (else empty vector).
@@ -298,7 +364,11 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
         MatrixXd Ktest = kernel_cross_call(X, predict_data, maxdeg, center);
         VectorXd d_inv = d_top.cwiseInverse();
         VectorXd v = U_top * (d_inv.asDiagonal() * full_out.alpha);
-        VectorXd eta_pred = Ktest * v;
+        VectorXd eta_full = Xtilde * full_out.alpha;
+        VectorXd Y01_full(n);
+        for (int i = 0; i < n; ++i) Y01_full[i] = Y[i];
+        const double b0_full = calibrate_logistic_intercept(Y01_full, eta_full);
+        VectorXd eta_pred = (Ktest * v).array() + b0_full;
         predictions = (1.0 + (-eta_pred.array()).exp()).inverse();
     }

{hapc-2.0.2 → hapc-2.3.0}/src/r_bindings.cpp RENAMED Viewed

@@ -347,8 +347,11 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
     if (Rf_length(Y_) != n) Rf_error("length(Y) must equal nrow(X).");
     Map<const MatrixXd> X(REAL(X_), n, p);
     Map<const VectorXd> Y01(REAL(Y_), n);
+    // Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. The
+    // logistic-ridge fit (norm="2") supports both.
     for (int i = 0; i < n; ++i) {
-        if (Y01[i] != 0.0 && Y01[i] != 1.0) Rf_error("Y must contain only 0 and 1");
+        if (Y01[i] < -1e-12 || Y01[i] > 1.0 + 1e-12)
+            Rf_error("Y must be in [0,1]");
     }
     int maxdeg = Rf_isInteger(maxdeg_) ? INTEGER(maxdeg_)[0] : (int)REAL(maxdeg_)[0];
     int npc = Rf_isInteger(npc_) ? INTEGER(npc_)[0] : (int)REAL(npc_)[0];
@@ -365,19 +368,28 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
     const int final_npc = (int)des.d.size();
     MatrixXd Xtilde = des.U * des.d.asDiagonal();
-    VectorXd Y_pm1(n);
-    for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] == 1.0) ? 1.0 : -1.0;
-    VectorXd alpha = logistic_ridge_init(Y_pm1, Xtilde, lambda);
+    auto calibrate_b0 = [](const VectorXd& y01, const VectorXd& eta) {
+        double b0 = 0.0;
+        for (int it = 0; it < 50; ++it) {
+            VectorXd z = eta.array() + b0;
+            VectorXd p = (1.0 + (-z.array()).exp()).inverse();
+            double g = (p - y01).sum();
+            double h = (p.array() * (1.0 - p.array())).sum();
+            if (std::abs(g) < 1e-10 || h < 1e-12) break;
+            b0 -= g / h;
+        }
+        return b0;
+    };
+    VectorXd alpha = logistic_ridge_init_y01(Y01, Xtilde, lambda);
     VectorXd eta = Xtilde * alpha;
+    const double b0 = calibrate_b0(Y01, eta);
+    // Soft cross-entropy risk (equals the {-1,+1} logistic risk on hard labels).
     double risk = 0.0;
     for (int i = 0; i < n; ++i) {
-        double ymu = Y_pm1[i] * eta[i];
-        if (ymu > 0)
-            risk += std::log1p(std::exp(-ymu));
-        else
-            risk += -ymu + std::log1p(std::exp(ymu));
+        const double pi = 1.0 / (1.0 + std::exp(-(eta[i] + b0)));
+        const double pp = std::min(1.0 - 1e-15, std::max(1e-15, pi));
+        risk += -(Y01[i] * std::log(pp) + (1.0 - Y01[i]) * std::log(1.0 - pp));
     }
     risk /= n;
@@ -392,7 +404,7 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
         MatrixXd Ktest = kernel_cross_call(X, Xtest, maxdeg, center);
         VectorXd d_inv = des.d.array().cwiseInverse();
         VectorXd v = des.U * (d_inv.asDiagonal() * alpha);
-        VectorXd log_odds = Ktest * v;
+        VectorXd log_odds = (Ktest * v).array() + b0;
         predictions = PROTECT(Rf_allocVector(REALSXP, m_pred)); prot++;
         std::copy(log_odds.data(), log_odds.data() + m_pred, REAL(predictions));
     }

{hapc-2.0.2 → hapc-2.3.0}/tests/test_ate_hapc_diagnostics_example.py RENAMED Viewed

@@ -8,7 +8,7 @@ can be regenerated from the package root::
 This uses ``alpha=0.05`` with the **moderate** DGP from the original
 ``ate/simulate_data.py`` script (vendored below — exact same draws thanks to
 ``np.random.seed`` + the same ``np.random.uniform`` / ``normal`` /
-``binomial`` call order):
+``binomial`` call order).  ``ate_hapc`` is run with ``npcs = n - 1``.
 * ``W1 ~ Uniform(-2, 2)``
 * ``W2 ~ Normal(0, 0.5)``
@@ -37,7 +37,6 @@ DEMO_SEED = 456
 DEMO_N = 300
 DEMO_ALPHA = 0.05
 DEMO_MAX_DEGREE = 2
-DEMO_NPCS = 40
 DEMO_NFOLDS = 4
 DEMO_NORM = "1"
@@ -51,10 +50,10 @@ GRID_LENGTH_OUT = 8
 FIGURE_NAME = "ate_hapc_diagnostics_demo.png"
-# Pinned outputs (``alpha=0.05``, current C++/Python stack)
-_EXPECTED_ESTIMATE = 0.09213745592304026
-_EXPECTED_LOWER = -0.03604174118365536
-_EXPECTED_UPPER = 0.22031665302973588
+# Pinned outputs (``alpha=0.05``, ``npcs = n - 1``, current C++/Python stack)
+_EXPECTED_ESTIMATE = 0.07790009282426053
+_EXPECTED_LOWER = -0.050705979103681936
+_EXPECTED_UPPER = 0.206506164752203
 def _expit(x: np.ndarray) -> np.ndarray:
@@ -104,17 +103,22 @@ def run_ate_hapc_demo(
     *,
     plot_diagnostics: bool = False,
 ) -> "ATEResult":
-    """Run ``ate_hapc`` with the pinned demo hyperparameters."""
+    """Run ``ate_hapc`` with the pinned demo hyperparameters.
+    Uses ``npcs = n - 1`` (sample size from ``load_demo_data``) for both
+    propensity and outcome stages, matching the usual HAL rank cap.
+    """
     from hapc import ate_hapc
     W, A, Y = load_demo_data()
+    npcs = int(W.shape[0]) - 1
     return ate_hapc(
         W,
         Y,
         A,
         alpha=DEMO_ALPHA,
         max_degree=DEMO_MAX_DEGREE,
-        npcs=DEMO_NPCS,
+        npcs=npcs,
         log_lambda_prop_min=LOG_LAMBDA_PROP_MIN,
         log_lambda_prop_max=LOG_LAMBDA_PROP_MAX,
         grid_length_prop=GRID_LENGTH_PROP,