hapc 0.2.0__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {hapc-0.2.0 → hapc-2.0.2}/CMakeLists.txt +14 -2
  2. {hapc-0.2.0/python/hapc.egg-info → hapc-2.0.2}/PKG-INFO +2 -3
  3. {hapc-0.2.0 → hapc-2.0.2}/pyproject.toml +2 -3
  4. hapc-2.0.2/python/hapc/__init__.py +99 -0
  5. hapc-2.0.2/python/hapc/ate.py +391 -0
  6. hapc-2.0.2/python/hapc/core.py +318 -0
  7. hapc-2.0.2/python/hapc/cv.py +545 -0
  8. hapc-2.0.2/python/hapc/single.py +636 -0
  9. {hapc-0.2.0 → hapc-2.0.2/python/hapc.egg-info}/PKG-INFO +2 -3
  10. {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/SOURCES.txt +5 -3
  11. {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/requires.txt +1 -2
  12. {hapc-0.2.0 → hapc-2.0.2}/setup.py +52 -3
  13. {hapc-0.2.0 → hapc-2.0.2}/src/bindings.cpp +52 -15
  14. hapc-2.0.2/src/cv_classi.cpp +113 -0
  15. {hapc-0.2.0 → hapc-2.0.2}/src/cv_fast_pchal.cpp +0 -6
  16. {hapc-0.2.0 → hapc-2.0.2}/src/fast_pchal.cpp +10 -4
  17. {hapc-0.2.0 → hapc-2.0.2}/src/hapc_core.hpp +32 -2
  18. {hapc-0.2.0 → hapc-2.0.2}/src/logistic_call.cpp +4 -8
  19. {hapc-0.2.0 → hapc-2.0.2}/src/pcghal_call.cpp +43 -8
  20. {hapc-0.2.0 → hapc-2.0.2}/src/pcghal_classi_call.cpp +43 -8
  21. {hapc-0.2.0 → hapc-2.0.2}/src/pcghal_cv.cpp +23 -4
  22. hapc-2.0.2/src/pcghal_cv_classi_cpp.cpp +312 -0
  23. {hapc-0.2.0 → hapc-2.0.2}/src/pcghal_cv_cpp.cpp +56 -19
  24. {hapc-0.2.0 → hapc-2.0.2}/src/r_bindings.cpp +115 -17
  25. {hapc-0.2.0 → hapc-2.0.2}/src/single_pcghal_cpp.cpp +23 -8
  26. {hapc-0.2.0 → hapc-2.0.2}/src/single_pchar.cpp +0 -11
  27. hapc-2.0.2/tests/test_api.py +76 -0
  28. hapc-2.0.2/tests/test_ate.py +162 -0
  29. hapc-2.0.2/tests/test_ate_hapc_diagnostics_example.py +180 -0
  30. hapc-2.0.2/tests/test_core.py +126 -0
  31. hapc-2.0.2/tests/test_logistic_regression.py +137 -0
  32. hapc-2.0.2/tests/test_r_vs_python_alpha.py +146 -0
  33. hapc-0.2.0/python/demo_single.py +0 -39
  34. hapc-0.2.0/python/hapc/__init__.py +0 -33
  35. hapc-0.2.0/python/hapc/core.py +0 -112
  36. hapc-0.2.0/python/hapc/cv.py +0 -340
  37. hapc-0.2.0/python/hapc/single.py +0 -259
  38. hapc-0.2.0/python/test_install.py +0 -65
  39. hapc-0.2.0/src/cv_classi.cpp +0 -314
  40. hapc-0.2.0/src/single_pcghal.cpp +0 -204
  41. hapc-0.2.0/tests/test_api.py +0 -68
  42. hapc-0.2.0/tests/test_core.py +0 -140
  43. hapc-0.2.0/tests/test_r_vs_python_alpha.py +0 -363
  44. {hapc-0.2.0 → hapc-2.0.2}/LICENSE +0 -0
  45. {hapc-0.2.0 → hapc-2.0.2}/MANIFEST.in +0 -0
  46. {hapc-0.2.0 → hapc-2.0.2}/README.md +0 -0
  47. {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/dependency_links.txt +0 -0
  48. {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/not-zip-safe +0 -0
  49. {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/top_level.txt +0 -0
  50. {hapc-0.2.0 → hapc-2.0.2}/setup.cfg +0 -0
  51. {hapc-0.2.0 → hapc-2.0.2}/src/cross_kernel.cpp +0 -0
  52. {hapc-0.2.0 → hapc-2.0.2}/src/cv_fast_pchal_python.cpp +0 -0
  53. {hapc-0.2.0 → hapc-2.0.2}/src/mkernel.cpp +0 -0
  54. {hapc-0.2.0 → hapc-2.0.2}/src/pchal_design.cpp +0 -0
  55. {hapc-0.2.0 → hapc-2.0.2}/src/ridge_wrappers.cpp +0 -0
@@ -11,17 +11,28 @@ if(WIN32 AND EXISTS "C:/vcpkg")
11
11
  list(APPEND CMAKE_PREFIX_PATH "C:/vcpkg/installed/x64-windows")
12
12
  endif()
13
13
 
14
- # Find Python
14
+ # Find Python. We intentionally use the modern FindPython3 module and pass
15
+ # Python3_EXECUTABLE from setup.py so the build always targets the *same*
16
+ # interpreter that pip is using. Without this CMake may discover a newer/
17
+ # older system Python and produce a .so tagged for the wrong ABI.
15
18
  find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
19
+ message(STATUS "Python3_EXECUTABLE: ${Python3_EXECUTABLE}")
20
+ message(STATUS "Python3_VERSION: ${Python3_VERSION}")
16
21
 
17
- # Find Eigen3 with fallback to FetchContent for Windows
22
+ # Find Eigen3 with fallback to FetchContent for Windows.
23
+ # Disabling Eigen's tests/docs avoids long Windows configuration hangs and
24
+ # matches the working CI configuration in v0.2.x wheels.
18
25
  find_package(Eigen3 QUIET NO_MODULE)
19
26
  if(NOT Eigen3_FOUND)
20
27
  message(STATUS "Eigen3 not found, downloading via FetchContent...")
28
+ set(EIGEN_BUILD_TESTING OFF CACHE BOOL "" FORCE)
29
+ set(EIGEN_BUILD_DOC OFF CACHE BOOL "" FORCE)
30
+ set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
21
31
  FetchContent_Declare(
22
32
  Eigen3
23
33
  URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
24
34
  URL_HASH SHA256=8586084f71f9bde545ee7fa6d00288b264a2b7ac3607b974e54d13e7162c1c72
35
+ DOWNLOAD_EXTRACT_TIMESTAMP TRUE
25
36
  )
26
37
  FetchContent_MakeAvailable(Eigen3)
27
38
  endif()
@@ -48,6 +59,7 @@ set(HAPC_CORE_SOURCES
48
59
  src/cv_fast_pchal_python.cpp
49
60
  src/single_pcghal_cpp.cpp
50
61
  src/pcghal_cv_cpp.cpp
62
+ src/pcghal_cv_classi_cpp.cpp
51
63
  )
52
64
 
53
65
  # Python module
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hapc
3
- Version: 0.2.0
3
+ Version: 2.0.2
4
4
  Summary: Highly Adaptive Principal Components
5
5
  Home-page: https://github.com/meixide/hapc
6
6
  Author: Carlos García Meixide
@@ -21,8 +21,7 @@ Requires-Python: >=3.8
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: numpy<2.3,>=1.24
24
- Requires-Dist: scipy>=1.7
25
- Requires-Dist: scikit-learn>=0.24
24
+ Requires-Dist: scikit-learn>=1.0
26
25
  Provides-Extra: dev
27
26
  Requires-Dist: pytest; extra == "dev"
28
27
  Requires-Dist: pytest-cov; extra == "dev"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hapc"
7
- version = "0.2.0"
7
+ version = "2.0.2"
8
8
  description = "Highly Adaptive Principal Components"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -23,8 +23,7 @@ classifiers = [
23
23
  ]
24
24
  dependencies = [
25
25
  "numpy>=1.24,<2.3",
26
- "scipy>=1.7",
27
- "scikit-learn>=0.24",
26
+ "scikit-learn>=1.0",
28
27
  ]
29
28
 
30
29
  [project.optional-dependencies]
@@ -0,0 +1,99 @@
1
+ """HAPC: Highly Adaptive Principal Components.
2
+
3
+ Public API
4
+ ----------
5
+ High-level entry points (mirror the R package):
6
+
7
+ - :func:`hapc` — single-λ fit (gaussian / binomial; norm in {"sv","1","2"}).
8
+ - :func:`cv_hapc` — k-fold cross-validated fit.
9
+
10
+ Lower-level building blocks:
11
+
12
+ - :func:`design_hapc`, :func:`kernel_hapc`, :func:`cross_kernel_hapc`
13
+ - :func:`ridge_regression`, :func:`fast_pchal`
14
+ - :func:`pcghal`, :func:`pcghal_classification`, :func:`pc_hal_classi`
15
+ - :func:`single_pcghal`, :func:`single_lambda_fit`,
16
+ :func:`single_pcghal_classification`,
17
+ :func:`single_pcghal_classification_ridge_only`
18
+ - :func:`pcghal_cv`, :func:`pcghal_cv_classi`, :func:`fasthal_cv`
19
+ - :func:`ate_hapc` — ATE estimate + Wald CI via HAPC + outcome undersmoothing.
20
+ """
21
+
22
+ __version__ = "2.0.2"
23
+
24
+ from .core import (
25
+ DesignOutput,
26
+ OptimizerOutput,
27
+ cross_kernel_hapc,
28
+ design_hapc,
29
+ fast_pchal,
30
+ kernel_cross, # alias for cross_kernel_hapc (backward compat)
31
+ kernel_hapc,
32
+ mkernel, # alias for kernel_hapc (backward compat)
33
+ pchal_design, # alias for design_hapc (backward compat)
34
+ pcghal,
35
+ pcghal_classification,
36
+ pc_hal_classi,
37
+ ridge_regression,
38
+ )
39
+ from .single import (
40
+ SingleLambdaResult,
41
+ SinglePcghalClassificationResult,
42
+ SinglePcghalResult,
43
+ hapc,
44
+ single_lambda_fit,
45
+ single_pcghal,
46
+ single_pcghal_classification,
47
+ single_pcghal_classification_lasso,
48
+ single_pcghal_classification_ridge_only,
49
+ )
50
+ from .cv import (
51
+ CVResult,
52
+ cv_hapc,
53
+ fasthal_cv,
54
+ pcghal_cv,
55
+ pcghal_cv_classi,
56
+ pcghal_cv_classi_lasso,
57
+ )
58
+ from .ate import ATEResult, ate_hapc
59
+
60
+ __all__ = [
61
+ "__version__",
62
+ # high level
63
+ "hapc",
64
+ "cv_hapc",
65
+ "ate_hapc",
66
+ # design & kernels
67
+ "design_hapc",
68
+ "kernel_hapc",
69
+ "cross_kernel_hapc",
70
+ # solvers
71
+ "ridge_regression",
72
+ "fast_pchal",
73
+ "pcghal",
74
+ "pcghal_classification",
75
+ "pc_hal_classi",
76
+ # single-λ
77
+ "single_pcghal",
78
+ "single_lambda_fit",
79
+ "single_pcghal_classification",
80
+ "single_pcghal_classification_ridge_only",
81
+ "single_pcghal_classification_lasso",
82
+ # CV
83
+ "pcghal_cv",
84
+ "pcghal_cv_classi",
85
+ "pcghal_cv_classi_lasso",
86
+ "fasthal_cv",
87
+ # result types
88
+ "ATEResult",
89
+ "CVResult",
90
+ "DesignOutput",
91
+ "OptimizerOutput",
92
+ "SingleLambdaResult",
93
+ "SinglePcghalResult",
94
+ "SinglePcghalClassificationResult",
95
+ # backward-compat aliases
96
+ "pchal_design",
97
+ "mkernel",
98
+ "kernel_cross",
99
+ ]
@@ -0,0 +1,391 @@
1
+ """Average Treatment Effect estimation with HAPC + undersmoothing.
2
+
3
+ Provides :func:`ate_hapc`, a high-level convenience wrapper that:
4
+
5
+ 1. Cross-validates the **propensity** model (binomial, ``A ~ W``) on a
6
+ log-spaced grid
7
+ ``(log_lambda_prop_min, log_lambda_prop_max, grid_length_prop)``
8
+ and the **outcome** model (gaussian, ``Y ~ (A, W)``) on a separate grid
9
+ ``(log_lambda_out_min, log_lambda_out_max, grid_length_out)``
10
+ (each built like :func:`hapc.cv_hapc`).
11
+ 2. Fixes the propensity score at its CV-best λ.
12
+ 3. Computes σ = std of the ATE efficient influence function (EIF) at the
13
+ CV configuration ``(π̂_CV, μ̂_CV)``.
14
+ 4. Sweeps the **outcome** λ grid in **decreasing**
15
+ order (most smoothing → least smoothing) and stops at the first λ for
16
+ which ``|mean(EIF)| ≤ σ / (√n · log n)``. This is the **undersmoothed**
17
+ outcome model. If no λ in the grid meets the threshold, the smallest λ
18
+ is used.
19
+ 5. Returns the plug-in ATE point estimate at the undersmoothed model and a
20
+ ``(1 - alpha)`` Wald confidence interval based on the σ of the EIF at
21
+ that undersmoothed model.
22
+
23
+ The function does not implement sample splitting / cross-fitting:
24
+ nuisances are fit on the full sample and the EIF is evaluated on the same
25
+ sample. Bias control is provided by the undersmoothing step instead.
26
+ """
27
+
28
+ from typing import NamedTuple, Optional
29
+
30
+ import numpy as np
31
+
32
+ try:
33
+ from scipy.stats import norm as _normal
34
+ except ImportError as _e: # pragma: no cover
35
+ raise ImportError(
36
+ "scipy is required for ate_hapc (used for normal quantiles). "
37
+ "It ships transitively with scikit-learn; run `pip install scipy`."
38
+ ) from _e
39
+
40
+ from .cv import CVResult, cv_hapc
41
+ from .single import hapc as _hapc
42
+
43
+
44
+ class ATEResult(NamedTuple):
45
+ """Output of :func:`ate_hapc`.
46
+
47
+ Attributes
48
+ ----------
49
+ estimate : float
50
+ Plug-in ATE at the undersmoothed outcome model:
51
+ ``mean(μ̂_1(W) - μ̂_0(W))``.
52
+ lower : float
53
+ Lower endpoint of the ``(1 - alpha)`` Wald confidence interval.
54
+ upper : float
55
+ Upper endpoint of the ``(1 - alpha)`` Wald confidence interval.
56
+ """
57
+
58
+ estimate: float
59
+ lower: float
60
+ upper: float
61
+
62
+
63
+ def _plot_ate_diagnostics(
64
+ cv_prop: CVResult,
65
+ cv_out: CVResult,
66
+ traj_lambdas: np.ndarray,
67
+ traj_abs_mean_eif: np.ndarray,
68
+ lam_prop_cv: float,
69
+ lam_out_cv: float,
70
+ lam_undersmooth: float,
71
+ threshold: float,
72
+ ) -> None:
73
+ """Raise ImportError if matplotlib is missing; otherwise show diagnostic figures."""
74
+ try:
75
+ import matplotlib.pyplot as plt
76
+ except ImportError as e: # pragma: no cover
77
+ raise ImportError(
78
+ "plot_diagnostics=True requires matplotlib. "
79
+ "Install with: pip install matplotlib"
80
+ ) from e
81
+
82
+ fig = plt.figure(figsize=(11.0, 7.5))
83
+ gs = fig.add_gridspec(2, 2, height_ratios=[1.0, 1.1], hspace=0.35, wspace=0.3)
84
+ ax_prop = fig.add_subplot(gs[0, 0])
85
+ ax_out = fig.add_subplot(gs[0, 1])
86
+ ax_traj = fig.add_subplot(gs[1, :])
87
+
88
+ lp = np.asarray(cv_prop.lambdas, dtype=float)
89
+ sp = np.asarray(cv_prop.mses, dtype=float)
90
+ lo = np.asarray(cv_out.lambdas, dtype=float)
91
+ so = np.asarray(cv_out.mses, dtype=float)
92
+
93
+ ax_prop.semilogx(lp, sp, "o-", color="C1", lw=1.5, ms=5)
94
+ ax_prop.axvline(lam_prop_cv, color="C3", ls="--", lw=1.5,
95
+ label=f"CV λ = {lam_prop_cv:.4g}")
96
+ ax_prop.set_xlabel("λ (propensity)")
97
+ ax_prop.set_ylabel("Mean CV logistic deviance")
98
+ ax_prop.set_title("Propensity CV (A ~ W, binomial)")
99
+ ax_prop.legend(loc="best", fontsize=8)
100
+ ax_prop.grid(True, alpha=0.3)
101
+
102
+ ax_out.semilogx(lo, so, "o-", color="C2", lw=1.5, ms=5)
103
+ ax_out.axvline(lam_out_cv, color="C3", ls="--", lw=1.5,
104
+ label=f"CV λ = {lam_out_cv:.4g}")
105
+ ax_out.set_xlabel("λ (outcome)")
106
+ ax_out.set_ylabel("Mean CV MSE")
107
+ ax_out.set_title("Outcome CV (Y ~ (A,W), gaussian)")
108
+ ax_out.legend(loc="best", fontsize=8)
109
+ ax_out.grid(True, alpha=0.3)
110
+
111
+ tv = np.asarray(traj_lambdas, dtype=float)
112
+ yv = np.asarray(traj_abs_mean_eif, dtype=float)
113
+ ok = np.isfinite(tv) & np.isfinite(yv) & (tv > 0)
114
+ tv, yv = tv[ok], yv[ok]
115
+ order = np.argsort(tv)
116
+ tv, yv = tv[order], yv[order]
117
+
118
+ if tv.size:
119
+ ax_traj.semilogx(tv, yv, "o-", color="C0", lw=2, ms=6,
120
+ label=r"$|\mathrm{mean}(\mathrm{EIF}_{\mathrm{ATE}})|$")
121
+ ax_traj.fill_between(tv, 0, threshold, alpha=0.12, color="gray")
122
+ else:
123
+ ax_traj.text(
124
+ 0.5, 0.5, "No valid outcome fits on λ grid",
125
+ transform=ax_traj.transAxes, ha="center", va="center",
126
+ )
127
+ ax_traj.axhline(threshold, color="gray", lw=2, alpha=0.85,
128
+ label=r"Threshold $\sigma_{\mathrm{CV}}/(\sqrt{n}\log n)$")
129
+ ax_traj.axvline(lam_out_cv, color="C3", ls="--", lw=1.8,
130
+ label=f"Outcome CV λ = {lam_out_cv:.4g}")
131
+ ax_traj.axvline(lam_undersmooth, color="C4", ls="-", lw=2.0,
132
+ label=f"Undersmoothed λ = {lam_undersmooth:.4g}")
133
+ ax_traj.set_xlabel("Outcome λ (undersmoothing grid)")
134
+ ax_traj.set_ylabel(r"$|\mathrm{mean}(\mathrm{EIF})|$")
135
+ ax_traj.set_title("Undersmoothing trajectory (fixed propensity at its CV-λ)")
136
+ ax_traj.legend(loc="best", fontsize=9, ncol=2)
137
+ ax_traj.grid(True, alpha=0.3)
138
+
139
+ fig.suptitle("ate_hapc diagnostics", fontsize=12, y=0.98)
140
+ fig.subplots_adjust(top=0.92, bottom=0.08, hspace=0.4, wspace=0.3)
141
+ plt.show()
142
+
143
+
144
+ def _coerce_binary(A: np.ndarray) -> np.ndarray:
145
+ """Return ``A`` re-encoded as floats in ``{0,1}``.
146
+
147
+ Accepts ``{0,1}``, ``{-1,+1}`` (or any pair where one value is non-positive
148
+ and one positive — falls back to the sign).
149
+ """
150
+ A = np.asarray(A).ravel()
151
+ u = set(np.unique(A).tolist())
152
+ if u.issubset({0, 1, 0.0, 1.0}):
153
+ return A.astype(np.float64)
154
+ if u.issubset({-1, 1, -1.0, 1.0}):
155
+ return ((A > 0).astype(np.float64))
156
+ raise ValueError(
157
+ f"A must be binary in {{0,1}} or {{-1,+1}}; found {sorted(u)}"
158
+ )
159
+
160
+
161
+ def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
162
+ alpha: float = 0.05,
163
+ max_degree: int = 1,
164
+ npcs: Optional[int] = None,
165
+ log_lambda_prop_min: float = -5,
166
+ log_lambda_prop_max: float = -3,
167
+ grid_length_prop: int = 10,
168
+ log_lambda_out_min: float = -5,
169
+ log_lambda_out_max: float = -3,
170
+ grid_length_out: int = 10,
171
+ nfolds: int = 5,
172
+ norm: str = "sv",
173
+ predict: Optional[np.ndarray] = None,
174
+ max_iter: int = 5000,
175
+ tol: float = 1e-3,
176
+ step_factor: float = 0.8,
177
+ verbose: bool = False,
178
+ crit: str = "grad",
179
+ center: bool = True,
180
+ approx: bool = False,
181
+ ini: str = "1",
182
+ plot_diagnostics: bool = False) -> ATEResult:
183
+ """ATE estimate with HAPC nuisances and outcome undersmoothing.
184
+
185
+ Parameters
186
+ ----------
187
+ X : np.ndarray, shape (n, p)
188
+ Covariate matrix ``W`` (do NOT include the treatment column).
189
+ Y : np.ndarray, shape (n,)
190
+ Continuous outcome.
191
+ A : np.ndarray, shape (n,)
192
+ Binary treatment in ``{0,1}`` or ``{-1,+1}``.
193
+ alpha : float, default 0.05
194
+ Significance level. The returned interval has confidence
195
+ ``1 - alpha``.
196
+ max_degree, npcs, nfolds, norm, predict, max_iter, tol, step_factor,\
197
+ verbose, crit, center, approx, ini :
198
+ Same meaning and defaults as in :func:`hapc.cv_hapc` (except λ grids,
199
+ see below).
200
+ ``predict`` is accepted for signature parity with :func:`cv_hapc` and
201
+ is currently ignored (``ate_hapc`` always evaluates the EIF on the
202
+ training sample).
203
+ log_lambda_prop_min, log_lambda_prop_max, grid_length_prop :
204
+ Equally spaced log-λ grid for **propensity** cross-validation
205
+ (``A ~ W``, binomial), same rule as :func:`cv_hapc`.
206
+ log_lambda_out_min, log_lambda_out_max, grid_length_out :
207
+ Log-λ grid for **outcome** cross-validation ``Y ~ (A, W)`` (gaussian)
208
+ and for the **undersmoothing** scan (same points, evaluated in
209
+ decreasing λ order until ``|mean(EIF)| ≤ τ``).
210
+ plot_diagnostics : bool, default False
211
+ If True, open a matplotlib figure with (1) propensity CV curve
212
+ (logistic deviance vs λ), (2) outcome CV curve (MSE vs λ), and (3)
213
+ the undersmoothing path: ``|mean(EIF)|`` vs outcome λ with the
214
+ threshold line and vertical markers for the CV and selected
215
+ undersmoothed λ. Requires ``matplotlib`` (``pip install matplotlib``).
216
+
217
+ Returns
218
+ -------
219
+ ATEResult
220
+ Named tuple with three fields ``(estimate, lower, upper)``.
221
+
222
+ Notes
223
+ -----
224
+ The procedure is:
225
+
226
+ 1. Cross-validate the propensity ``A ~ W`` (binomial) on its grid and the
227
+ outcome ``Y ~ (A, W)`` (gaussian) on the outcome grid (independently
228
+ specified).
229
+ 2. Fix the propensity at its CV-best λ; refit on the full sample to
230
+ obtain ``π̂(W_i) = P(A=1 | W_i)``.
231
+ 3. At the CV-best outcome λ, compute the ATE EIF
232
+ ``φ̂_diff = φ̂_1 - φ̂_0`` and let ``σ = std(φ̂_diff)``.
233
+ 4. Threshold ``τ = σ / (√n · log n)``.
234
+ 5. Walk the **outcome** λ grid in **decreasing**
235
+ order; pick the first (largest) λ for which
236
+ ``|mean(EIF_diff)| ≤ τ`` — call it ``λ_u``.
237
+ 6. Plug-in estimate: ``ψ̂ = mean(μ̂_1(W; λ_u) - μ̂_0(W; λ_u))``.
238
+ CI: ``ψ̂ ± z_{1 - α/2} · σ_u / √n`` where ``σ_u = std(EIF_diff)``
239
+ at ``λ_u``.
240
+
241
+ Examples
242
+ --------
243
+ >>> import numpy as np
244
+ >>> from hapc import ate_hapc
245
+ >>> rng = np.random.default_rng(0)
246
+ >>> n = 200
247
+ >>> W = np.column_stack([rng.uniform(-2, 2, n), rng.normal(0, 0.5, n)])
248
+ >>> p = 1.0 / (1.0 + np.exp(-(W[:, 0] + 0.5 * W[:, 1])))
249
+ >>> A = rng.binomial(1, p, n)
250
+ >>> Y = 2 * W[:, 0] + 0.5 + rng.normal(0, 0.5, n) # truth: ATE=0
251
+ >>> res = ate_hapc(W, Y, A, alpha=0.05, max_degree=2, npcs=50,
252
+ ... grid_length_prop=4, grid_length_out=4, nfolds=3,
253
+ ... norm="2")
254
+ >>> bool(res.lower <= res.estimate <= res.upper)
255
+ True
256
+ """
257
+ if not (0.0 < alpha < 1.0):
258
+ raise ValueError(f"alpha must be in (0,1); got {alpha}")
259
+
260
+ # --- Coerce inputs ------------------------------------------------------
261
+ X = np.ascontiguousarray(np.asarray(X, dtype=np.float64))
262
+ if X.ndim != 2:
263
+ raise ValueError(f"X must be 2-D; got shape {X.shape}")
264
+ Y = np.asarray(Y, dtype=np.float64).ravel()
265
+ A01 = _coerce_binary(A)
266
+ n, _p = X.shape
267
+ if Y.size != n or A01.size != n:
268
+ raise ValueError("X, Y, A must all have the same number of rows.")
269
+
270
+ if npcs is None:
271
+ npcs = int(n)
272
+
273
+ lambdas_out = np.exp(
274
+ np.linspace(log_lambda_out_min, log_lambda_out_max, grid_length_out))
275
+
276
+ cv_kwargs_base = dict(
277
+ max_degree=max_degree, npcs=npcs, nfolds=nfolds, norm=norm,
278
+ max_iter=max_iter, tol=tol, step_factor=step_factor,
279
+ verbose=verbose, crit=crit, center=center, approx=approx, ini=ini,
280
+ )
281
+
282
+ # --- 1. CV propensity (binomial) ---------------------------------------
283
+ cv_prop = cv_hapc(
284
+ X, A01, family="binomial",
285
+ log_lambda_min=log_lambda_prop_min,
286
+ log_lambda_max=log_lambda_prop_max,
287
+ grid_length=grid_length_prop,
288
+ **cv_kwargs_base,
289
+ )
290
+ lam_prop_cv = float(cv_prop.best_lambda)
291
+
292
+ # Refit propensity at CV λ on full data, predict in-sample probabilities.
293
+ prop = _hapc(
294
+ X, A01, family="binomial", max_degree=max_degree, npcs=npcs,
295
+ lambda_=lam_prop_cv, norm=norm, predict=X,
296
+ max_iter=max_iter, tol=tol, step_factor=step_factor,
297
+ verbose=verbose, crit=crit, center=center, approx=approx, ini=ini,
298
+ )
299
+ pi1 = np.clip(np.asarray(prop.probabilities).ravel(), 1e-8, 1 - 1e-8)
300
+
301
+ # --- 2. CV outcome (gaussian on [A,W]) ---------------------------------
302
+ Xout = np.column_stack([A01, X])
303
+ cv_out = cv_hapc(
304
+ Xout, Y, family="gaussian",
305
+ log_lambda_min=log_lambda_out_min,
306
+ log_lambda_max=log_lambda_out_max,
307
+ grid_length=grid_length_out,
308
+ **cv_kwargs_base,
309
+ )
310
+ lam_out_cv = float(cv_out.best_lambda)
311
+
312
+ # Stacked design for one-shot prediction at both arms.
313
+ Xmu1 = np.column_stack([np.ones(n), X])
314
+ Xmu0 = np.column_stack([np.zeros(n), X])
315
+ Xeval = np.vstack([Xmu1, Xmu0])
316
+
317
+ def _mu_pair(lam: float):
318
+ """Refit outcome at λ on full data, return (μ̂_1, μ̂_0) on training W."""
319
+ res = _hapc(
320
+ Xout, Y, family="gaussian", max_degree=max_degree, npcs=npcs,
321
+ lambda_=float(lam), norm=norm, predict=Xeval,
322
+ max_iter=max_iter, tol=tol, step_factor=step_factor,
323
+ verbose=verbose, crit=crit, center=center, approx=approx, ini=ini,
324
+ )
325
+ p = np.asarray(res.predictions).ravel()
326
+ if p.size != 2 * n:
327
+ raise RuntimeError(
328
+ f"Outcome predict returned {p.size} values, expected {2 * n}."
329
+ )
330
+ return p[:n], p[n:]
331
+
332
+ def _eif_diff(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
333
+ eif1 = (A01 / pi1) * (Y - mu1) - (mu1 - mu1.mean())
334
+ eif0 = ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0) - (mu0 - mu0.mean())
335
+ return eif1 - eif0
336
+
337
+ # --- 3. σ at CV configuration → threshold τ ----------------------------
338
+ mu1_cv, mu0_cv = _mu_pair(lam_out_cv)
339
+ eif_cv = _eif_diff(mu1_cv, mu0_cv)
340
+ sigma_cv = float(np.std(eif_cv, ddof=0))
341
+ threshold = sigma_cv / (np.sqrt(n) * np.log(n))
342
+
343
+ # --- 4. Undersmoothing sweep: largest λ → smallest --------------------
344
+ lam_und: Optional[float] = None
345
+ eif_und: Optional[np.ndarray] = None
346
+ mu1_und = mu0_und = None
347
+ for lam in np.sort(lambdas_out)[::-1]:
348
+ try:
349
+ mu1, mu0 = _mu_pair(float(lam))
350
+ except Exception:
351
+ continue
352
+ eif = _eif_diff(mu1, mu0)
353
+ if abs(eif.mean()) <= threshold:
354
+ lam_und = float(lam)
355
+ mu1_und, mu0_und = mu1, mu0
356
+ eif_und = eif
357
+ break
358
+
359
+ if eif_und is None:
360
+ # Threshold never met → fall back to the smallest λ in the grid.
361
+ lam_und = float(lambdas_out.min())
362
+ mu1_und, mu0_und = _mu_pair(lam_und)
363
+ eif_und = _eif_diff(mu1_und, mu0_und)
364
+
365
+ if plot_diagnostics:
366
+ t_lams: list[float] = []
367
+ t_abs: list[float] = []
368
+ for lam in np.sort(lambdas_out):
369
+ try:
370
+ mu1, mu0 = _mu_pair(float(lam))
371
+ except Exception:
372
+ continue
373
+ eif = _eif_diff(mu1, mu0)
374
+ t_lams.append(float(lam))
375
+ t_abs.append(float(np.abs(eif.mean())))
376
+ _plot_ate_diagnostics(
377
+ cv_prop, cv_out,
378
+ np.asarray(t_lams), np.asarray(t_abs),
379
+ lam_prop_cv, lam_out_cv, lam_und, threshold,
380
+ )
381
+
382
+ # --- 5. Point estimate + (1 - alpha) Wald CI --------------------------
383
+ psi = float(np.mean(mu1_und - mu0_und))
384
+ sigma_und = float(np.std(eif_und, ddof=0))
385
+ z = float(_normal.ppf(1.0 - alpha / 2.0))
386
+ half = z * sigma_und / np.sqrt(n)
387
+
388
+ return ATEResult(estimate=psi, lower=psi - half, upper=psi + half)
389
+
390
+
391
+ __all__ = ["ATEResult", "ate_hapc"]