hapc 0.2.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hapc-0.2.0 → hapc-2.0.2}/CMakeLists.txt +14 -2
- {hapc-0.2.0/python/hapc.egg-info → hapc-2.0.2}/PKG-INFO +2 -3
- {hapc-0.2.0 → hapc-2.0.2}/pyproject.toml +2 -3
- hapc-2.0.2/python/hapc/__init__.py +99 -0
- hapc-2.0.2/python/hapc/ate.py +391 -0
- hapc-2.0.2/python/hapc/core.py +318 -0
- hapc-2.0.2/python/hapc/cv.py +545 -0
- hapc-2.0.2/python/hapc/single.py +636 -0
- {hapc-0.2.0 → hapc-2.0.2/python/hapc.egg-info}/PKG-INFO +2 -3
- {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/SOURCES.txt +5 -3
- {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/requires.txt +1 -2
- {hapc-0.2.0 → hapc-2.0.2}/setup.py +52 -3
- {hapc-0.2.0 → hapc-2.0.2}/src/bindings.cpp +52 -15
- hapc-2.0.2/src/cv_classi.cpp +113 -0
- {hapc-0.2.0 → hapc-2.0.2}/src/cv_fast_pchal.cpp +0 -6
- {hapc-0.2.0 → hapc-2.0.2}/src/fast_pchal.cpp +10 -4
- {hapc-0.2.0 → hapc-2.0.2}/src/hapc_core.hpp +32 -2
- {hapc-0.2.0 → hapc-2.0.2}/src/logistic_call.cpp +4 -8
- {hapc-0.2.0 → hapc-2.0.2}/src/pcghal_call.cpp +43 -8
- {hapc-0.2.0 → hapc-2.0.2}/src/pcghal_classi_call.cpp +43 -8
- {hapc-0.2.0 → hapc-2.0.2}/src/pcghal_cv.cpp +23 -4
- hapc-2.0.2/src/pcghal_cv_classi_cpp.cpp +312 -0
- {hapc-0.2.0 → hapc-2.0.2}/src/pcghal_cv_cpp.cpp +56 -19
- {hapc-0.2.0 → hapc-2.0.2}/src/r_bindings.cpp +115 -17
- {hapc-0.2.0 → hapc-2.0.2}/src/single_pcghal_cpp.cpp +23 -8
- {hapc-0.2.0 → hapc-2.0.2}/src/single_pchar.cpp +0 -11
- hapc-2.0.2/tests/test_api.py +76 -0
- hapc-2.0.2/tests/test_ate.py +162 -0
- hapc-2.0.2/tests/test_ate_hapc_diagnostics_example.py +180 -0
- hapc-2.0.2/tests/test_core.py +126 -0
- hapc-2.0.2/tests/test_logistic_regression.py +137 -0
- hapc-2.0.2/tests/test_r_vs_python_alpha.py +146 -0
- hapc-0.2.0/python/demo_single.py +0 -39
- hapc-0.2.0/python/hapc/__init__.py +0 -33
- hapc-0.2.0/python/hapc/core.py +0 -112
- hapc-0.2.0/python/hapc/cv.py +0 -340
- hapc-0.2.0/python/hapc/single.py +0 -259
- hapc-0.2.0/python/test_install.py +0 -65
- hapc-0.2.0/src/cv_classi.cpp +0 -314
- hapc-0.2.0/src/single_pcghal.cpp +0 -204
- hapc-0.2.0/tests/test_api.py +0 -68
- hapc-0.2.0/tests/test_core.py +0 -140
- hapc-0.2.0/tests/test_r_vs_python_alpha.py +0 -363
- {hapc-0.2.0 → hapc-2.0.2}/LICENSE +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/MANIFEST.in +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/README.md +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/dependency_links.txt +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/not-zip-safe +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/python/hapc.egg-info/top_level.txt +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/setup.cfg +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/src/cross_kernel.cpp +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/src/cv_fast_pchal_python.cpp +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/src/mkernel.cpp +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/src/pchal_design.cpp +0 -0
- {hapc-0.2.0 → hapc-2.0.2}/src/ridge_wrappers.cpp +0 -0
|
@@ -11,17 +11,28 @@ if(WIN32 AND EXISTS "C:/vcpkg")
|
|
|
11
11
|
list(APPEND CMAKE_PREFIX_PATH "C:/vcpkg/installed/x64-windows")
|
|
12
12
|
endif()
|
|
13
13
|
|
|
14
|
-
# Find Python
|
|
14
|
+
# Find Python. We intentionally use the modern FindPython3 module and pass
|
|
15
|
+
# Python3_EXECUTABLE from setup.py so the build always targets the *same*
|
|
16
|
+
# interpreter that pip is using. Without this CMake may discover a newer/
|
|
17
|
+
# older system Python and produce a .so tagged for the wrong ABI.
|
|
15
18
|
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
|
|
19
|
+
message(STATUS "Python3_EXECUTABLE: ${Python3_EXECUTABLE}")
|
|
20
|
+
message(STATUS "Python3_VERSION: ${Python3_VERSION}")
|
|
16
21
|
|
|
17
|
-
# Find Eigen3 with fallback to FetchContent for Windows
|
|
22
|
+
# Find Eigen3 with fallback to FetchContent for Windows.
|
|
23
|
+
# Disabling Eigen's tests/docs avoids long Windows configuration hangs and
|
|
24
|
+
# matches the working CI configuration in v0.2.x wheels.
|
|
18
25
|
find_package(Eigen3 QUIET NO_MODULE)
|
|
19
26
|
if(NOT Eigen3_FOUND)
|
|
20
27
|
message(STATUS "Eigen3 not found, downloading via FetchContent...")
|
|
28
|
+
set(EIGEN_BUILD_TESTING OFF CACHE BOOL "" FORCE)
|
|
29
|
+
set(EIGEN_BUILD_DOC OFF CACHE BOOL "" FORCE)
|
|
30
|
+
set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
|
|
21
31
|
FetchContent_Declare(
|
|
22
32
|
Eigen3
|
|
23
33
|
URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
|
|
24
34
|
URL_HASH SHA256=8586084f71f9bde545ee7fa6d00288b264a2b7ac3607b974e54d13e7162c1c72
|
|
35
|
+
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
|
|
25
36
|
)
|
|
26
37
|
FetchContent_MakeAvailable(Eigen3)
|
|
27
38
|
endif()
|
|
@@ -48,6 +59,7 @@ set(HAPC_CORE_SOURCES
|
|
|
48
59
|
src/cv_fast_pchal_python.cpp
|
|
49
60
|
src/single_pcghal_cpp.cpp
|
|
50
61
|
src/pcghal_cv_cpp.cpp
|
|
62
|
+
src/pcghal_cv_classi_cpp.cpp
|
|
51
63
|
)
|
|
52
64
|
|
|
53
65
|
# Python module
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hapc
|
|
3
|
-
Version: 0.2
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Summary: Highly Adaptive Principal Components
|
|
5
5
|
Home-page: https://github.com/meixide/hapc
|
|
6
6
|
Author: Carlos García Meixide
|
|
@@ -21,8 +21,7 @@ Requires-Python: >=3.8
|
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
License-File: LICENSE
|
|
23
23
|
Requires-Dist: numpy<2.3,>=1.24
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist: scikit-learn>=0.24
|
|
24
|
+
Requires-Dist: scikit-learn>=1.0
|
|
26
25
|
Provides-Extra: dev
|
|
27
26
|
Requires-Dist: pytest; extra == "dev"
|
|
28
27
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "hapc"
|
|
7
|
-
version = "0.2
|
|
7
|
+
version = "2.0.2"
|
|
8
8
|
description = "Highly Adaptive Principal Components"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -23,8 +23,7 @@ classifiers = [
|
|
|
23
23
|
]
|
|
24
24
|
dependencies = [
|
|
25
25
|
"numpy>=1.24,<2.3",
|
|
26
|
-
"
|
|
27
|
-
"scikit-learn>=0.24",
|
|
26
|
+
"scikit-learn>=1.0",
|
|
28
27
|
]
|
|
29
28
|
|
|
30
29
|
[project.optional-dependencies]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""HAPC: Highly Adaptive Principal Components.
|
|
2
|
+
|
|
3
|
+
Public API
|
|
4
|
+
----------
|
|
5
|
+
High-level entry points (mirror the R package):
|
|
6
|
+
|
|
7
|
+
- :func:`hapc` — single-λ fit (gaussian / binomial; norm in {"sv","1","2"}).
|
|
8
|
+
- :func:`cv_hapc` — k-fold cross-validated fit.
|
|
9
|
+
|
|
10
|
+
Lower-level building blocks:
|
|
11
|
+
|
|
12
|
+
- :func:`design_hapc`, :func:`kernel_hapc`, :func:`cross_kernel_hapc`
|
|
13
|
+
- :func:`ridge_regression`, :func:`fast_pchal`
|
|
14
|
+
- :func:`pcghal`, :func:`pcghal_classification`, :func:`pc_hal_classi`
|
|
15
|
+
- :func:`single_pcghal`, :func:`single_lambda_fit`,
|
|
16
|
+
:func:`single_pcghal_classification`,
|
|
17
|
+
:func:`single_pcghal_classification_ridge_only`
|
|
18
|
+
- :func:`pcghal_cv`, :func:`pcghal_cv_classi`, :func:`fasthal_cv`
|
|
19
|
+
- :func:`ate_hapc` — ATE estimate + Wald CI via HAPC + outcome undersmoothing.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
__version__ = "2.0.2"
|
|
23
|
+
|
|
24
|
+
from .core import (
|
|
25
|
+
DesignOutput,
|
|
26
|
+
OptimizerOutput,
|
|
27
|
+
cross_kernel_hapc,
|
|
28
|
+
design_hapc,
|
|
29
|
+
fast_pchal,
|
|
30
|
+
kernel_cross, # alias for cross_kernel_hapc (backward compat)
|
|
31
|
+
kernel_hapc,
|
|
32
|
+
mkernel, # alias for kernel_hapc (backward compat)
|
|
33
|
+
pchal_design, # alias for design_hapc (backward compat)
|
|
34
|
+
pcghal,
|
|
35
|
+
pcghal_classification,
|
|
36
|
+
pc_hal_classi,
|
|
37
|
+
ridge_regression,
|
|
38
|
+
)
|
|
39
|
+
from .single import (
|
|
40
|
+
SingleLambdaResult,
|
|
41
|
+
SinglePcghalClassificationResult,
|
|
42
|
+
SinglePcghalResult,
|
|
43
|
+
hapc,
|
|
44
|
+
single_lambda_fit,
|
|
45
|
+
single_pcghal,
|
|
46
|
+
single_pcghal_classification,
|
|
47
|
+
single_pcghal_classification_lasso,
|
|
48
|
+
single_pcghal_classification_ridge_only,
|
|
49
|
+
)
|
|
50
|
+
from .cv import (
|
|
51
|
+
CVResult,
|
|
52
|
+
cv_hapc,
|
|
53
|
+
fasthal_cv,
|
|
54
|
+
pcghal_cv,
|
|
55
|
+
pcghal_cv_classi,
|
|
56
|
+
pcghal_cv_classi_lasso,
|
|
57
|
+
)
|
|
58
|
+
from .ate import ATEResult, ate_hapc
|
|
59
|
+
|
|
60
|
+
__all__ = [
|
|
61
|
+
"__version__",
|
|
62
|
+
# high level
|
|
63
|
+
"hapc",
|
|
64
|
+
"cv_hapc",
|
|
65
|
+
"ate_hapc",
|
|
66
|
+
# design & kernels
|
|
67
|
+
"design_hapc",
|
|
68
|
+
"kernel_hapc",
|
|
69
|
+
"cross_kernel_hapc",
|
|
70
|
+
# solvers
|
|
71
|
+
"ridge_regression",
|
|
72
|
+
"fast_pchal",
|
|
73
|
+
"pcghal",
|
|
74
|
+
"pcghal_classification",
|
|
75
|
+
"pc_hal_classi",
|
|
76
|
+
# single-λ
|
|
77
|
+
"single_pcghal",
|
|
78
|
+
"single_lambda_fit",
|
|
79
|
+
"single_pcghal_classification",
|
|
80
|
+
"single_pcghal_classification_ridge_only",
|
|
81
|
+
"single_pcghal_classification_lasso",
|
|
82
|
+
# CV
|
|
83
|
+
"pcghal_cv",
|
|
84
|
+
"pcghal_cv_classi",
|
|
85
|
+
"pcghal_cv_classi_lasso",
|
|
86
|
+
"fasthal_cv",
|
|
87
|
+
# result types
|
|
88
|
+
"ATEResult",
|
|
89
|
+
"CVResult",
|
|
90
|
+
"DesignOutput",
|
|
91
|
+
"OptimizerOutput",
|
|
92
|
+
"SingleLambdaResult",
|
|
93
|
+
"SinglePcghalResult",
|
|
94
|
+
"SinglePcghalClassificationResult",
|
|
95
|
+
# backward-compat aliases
|
|
96
|
+
"pchal_design",
|
|
97
|
+
"mkernel",
|
|
98
|
+
"kernel_cross",
|
|
99
|
+
]
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""Average Treatment Effect estimation with HAPC + undersmoothing.
|
|
2
|
+
|
|
3
|
+
Provides :func:`ate_hapc`, a high-level convenience wrapper that:
|
|
4
|
+
|
|
5
|
+
1. Cross-validates the **propensity** model (binomial, ``A ~ W``) on a
|
|
6
|
+
log-spaced grid
|
|
7
|
+
``(log_lambda_prop_min, log_lambda_prop_max, grid_length_prop)``
|
|
8
|
+
and the **outcome** model (gaussian, ``Y ~ (A, W)``) on a separate grid
|
|
9
|
+
``(log_lambda_out_min, log_lambda_out_max, grid_length_out)``
|
|
10
|
+
(each built like :func:`hapc.cv_hapc`).
|
|
11
|
+
2. Fixes the propensity score at its CV-best λ.
|
|
12
|
+
3. Computes σ = std of the ATE efficient influence function (EIF) at the
|
|
13
|
+
CV configuration ``(π̂_CV, μ̂_CV)``.
|
|
14
|
+
4. Sweeps the **outcome** λ grid in **decreasing**
|
|
15
|
+
order (most smoothing → least smoothing) and stops at the first λ for
|
|
16
|
+
which ``|mean(EIF)| ≤ σ / (√n · log n)``. This is the **undersmoothed**
|
|
17
|
+
outcome model. If no λ in the grid meets the threshold, the smallest λ
|
|
18
|
+
is used.
|
|
19
|
+
5. Returns the plug-in ATE point estimate at the undersmoothed model and a
|
|
20
|
+
``(1 - alpha)`` Wald confidence interval based on the σ of the EIF at
|
|
21
|
+
that undersmoothed model.
|
|
22
|
+
|
|
23
|
+
The function does not implement sample splitting / cross-fitting:
|
|
24
|
+
nuisances are fit on the full sample and the EIF is evaluated on the same
|
|
25
|
+
sample. Bias control is provided by the undersmoothing step instead.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from typing import NamedTuple, Optional
|
|
29
|
+
|
|
30
|
+
import numpy as np
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
from scipy.stats import norm as _normal
|
|
34
|
+
except ImportError as _e: # pragma: no cover
|
|
35
|
+
raise ImportError(
|
|
36
|
+
"scipy is required for ate_hapc (used for normal quantiles). "
|
|
37
|
+
"It ships transitively with scikit-learn; run `pip install scipy`."
|
|
38
|
+
) from _e
|
|
39
|
+
|
|
40
|
+
from .cv import CVResult, cv_hapc
|
|
41
|
+
from .single import hapc as _hapc
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ATEResult(NamedTuple):
|
|
45
|
+
"""Output of :func:`ate_hapc`.
|
|
46
|
+
|
|
47
|
+
Attributes
|
|
48
|
+
----------
|
|
49
|
+
estimate : float
|
|
50
|
+
Plug-in ATE at the undersmoothed outcome model:
|
|
51
|
+
``mean(μ̂_1(W) - μ̂_0(W))``.
|
|
52
|
+
lower : float
|
|
53
|
+
Lower endpoint of the ``(1 - alpha)`` Wald confidence interval.
|
|
54
|
+
upper : float
|
|
55
|
+
Upper endpoint of the ``(1 - alpha)`` Wald confidence interval.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
estimate: float
|
|
59
|
+
lower: float
|
|
60
|
+
upper: float
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _plot_ate_diagnostics(
|
|
64
|
+
cv_prop: CVResult,
|
|
65
|
+
cv_out: CVResult,
|
|
66
|
+
traj_lambdas: np.ndarray,
|
|
67
|
+
traj_abs_mean_eif: np.ndarray,
|
|
68
|
+
lam_prop_cv: float,
|
|
69
|
+
lam_out_cv: float,
|
|
70
|
+
lam_undersmooth: float,
|
|
71
|
+
threshold: float,
|
|
72
|
+
) -> None:
|
|
73
|
+
"""Raise ImportError if matplotlib is missing; otherwise show diagnostic figures."""
|
|
74
|
+
try:
|
|
75
|
+
import matplotlib.pyplot as plt
|
|
76
|
+
except ImportError as e: # pragma: no cover
|
|
77
|
+
raise ImportError(
|
|
78
|
+
"plot_diagnostics=True requires matplotlib. "
|
|
79
|
+
"Install with: pip install matplotlib"
|
|
80
|
+
) from e
|
|
81
|
+
|
|
82
|
+
fig = plt.figure(figsize=(11.0, 7.5))
|
|
83
|
+
gs = fig.add_gridspec(2, 2, height_ratios=[1.0, 1.1], hspace=0.35, wspace=0.3)
|
|
84
|
+
ax_prop = fig.add_subplot(gs[0, 0])
|
|
85
|
+
ax_out = fig.add_subplot(gs[0, 1])
|
|
86
|
+
ax_traj = fig.add_subplot(gs[1, :])
|
|
87
|
+
|
|
88
|
+
lp = np.asarray(cv_prop.lambdas, dtype=float)
|
|
89
|
+
sp = np.asarray(cv_prop.mses, dtype=float)
|
|
90
|
+
lo = np.asarray(cv_out.lambdas, dtype=float)
|
|
91
|
+
so = np.asarray(cv_out.mses, dtype=float)
|
|
92
|
+
|
|
93
|
+
ax_prop.semilogx(lp, sp, "o-", color="C1", lw=1.5, ms=5)
|
|
94
|
+
ax_prop.axvline(lam_prop_cv, color="C3", ls="--", lw=1.5,
|
|
95
|
+
label=f"CV λ = {lam_prop_cv:.4g}")
|
|
96
|
+
ax_prop.set_xlabel("λ (propensity)")
|
|
97
|
+
ax_prop.set_ylabel("Mean CV logistic deviance")
|
|
98
|
+
ax_prop.set_title("Propensity CV (A ~ W, binomial)")
|
|
99
|
+
ax_prop.legend(loc="best", fontsize=8)
|
|
100
|
+
ax_prop.grid(True, alpha=0.3)
|
|
101
|
+
|
|
102
|
+
ax_out.semilogx(lo, so, "o-", color="C2", lw=1.5, ms=5)
|
|
103
|
+
ax_out.axvline(lam_out_cv, color="C3", ls="--", lw=1.5,
|
|
104
|
+
label=f"CV λ = {lam_out_cv:.4g}")
|
|
105
|
+
ax_out.set_xlabel("λ (outcome)")
|
|
106
|
+
ax_out.set_ylabel("Mean CV MSE")
|
|
107
|
+
ax_out.set_title("Outcome CV (Y ~ (A,W), gaussian)")
|
|
108
|
+
ax_out.legend(loc="best", fontsize=8)
|
|
109
|
+
ax_out.grid(True, alpha=0.3)
|
|
110
|
+
|
|
111
|
+
tv = np.asarray(traj_lambdas, dtype=float)
|
|
112
|
+
yv = np.asarray(traj_abs_mean_eif, dtype=float)
|
|
113
|
+
ok = np.isfinite(tv) & np.isfinite(yv) & (tv > 0)
|
|
114
|
+
tv, yv = tv[ok], yv[ok]
|
|
115
|
+
order = np.argsort(tv)
|
|
116
|
+
tv, yv = tv[order], yv[order]
|
|
117
|
+
|
|
118
|
+
if tv.size:
|
|
119
|
+
ax_traj.semilogx(tv, yv, "o-", color="C0", lw=2, ms=6,
|
|
120
|
+
label=r"$|\mathrm{mean}(\mathrm{EIF}_{\mathrm{ATE}})|$")
|
|
121
|
+
ax_traj.fill_between(tv, 0, threshold, alpha=0.12, color="gray")
|
|
122
|
+
else:
|
|
123
|
+
ax_traj.text(
|
|
124
|
+
0.5, 0.5, "No valid outcome fits on λ grid",
|
|
125
|
+
transform=ax_traj.transAxes, ha="center", va="center",
|
|
126
|
+
)
|
|
127
|
+
ax_traj.axhline(threshold, color="gray", lw=2, alpha=0.85,
|
|
128
|
+
label=r"Threshold $\sigma_{\mathrm{CV}}/(\sqrt{n}\log n)$")
|
|
129
|
+
ax_traj.axvline(lam_out_cv, color="C3", ls="--", lw=1.8,
|
|
130
|
+
label=f"Outcome CV λ = {lam_out_cv:.4g}")
|
|
131
|
+
ax_traj.axvline(lam_undersmooth, color="C4", ls="-", lw=2.0,
|
|
132
|
+
label=f"Undersmoothed λ = {lam_undersmooth:.4g}")
|
|
133
|
+
ax_traj.set_xlabel("Outcome λ (undersmoothing grid)")
|
|
134
|
+
ax_traj.set_ylabel(r"$|\mathrm{mean}(\mathrm{EIF})|$")
|
|
135
|
+
ax_traj.set_title("Undersmoothing trajectory (fixed propensity at its CV-λ)")
|
|
136
|
+
ax_traj.legend(loc="best", fontsize=9, ncol=2)
|
|
137
|
+
ax_traj.grid(True, alpha=0.3)
|
|
138
|
+
|
|
139
|
+
fig.suptitle("ate_hapc diagnostics", fontsize=12, y=0.98)
|
|
140
|
+
fig.subplots_adjust(top=0.92, bottom=0.08, hspace=0.4, wspace=0.3)
|
|
141
|
+
plt.show()
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _coerce_binary(A: np.ndarray) -> np.ndarray:
|
|
145
|
+
"""Return ``A`` re-encoded as floats in ``{0,1}``.
|
|
146
|
+
|
|
147
|
+
Accepts ``{0,1}``, ``{-1,+1}`` (or any pair where one value is non-positive
|
|
148
|
+
and one positive — falls back to the sign).
|
|
149
|
+
"""
|
|
150
|
+
A = np.asarray(A).ravel()
|
|
151
|
+
u = set(np.unique(A).tolist())
|
|
152
|
+
if u.issubset({0, 1, 0.0, 1.0}):
|
|
153
|
+
return A.astype(np.float64)
|
|
154
|
+
if u.issubset({-1, 1, -1.0, 1.0}):
|
|
155
|
+
return ((A > 0).astype(np.float64))
|
|
156
|
+
raise ValueError(
|
|
157
|
+
f"A must be binary in {{0,1}} or {{-1,+1}}; found {sorted(u)}"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def ate_hapc(X: np.ndarray, Y: np.ndarray, A: np.ndarray,
|
|
162
|
+
alpha: float = 0.05,
|
|
163
|
+
max_degree: int = 1,
|
|
164
|
+
npcs: Optional[int] = None,
|
|
165
|
+
log_lambda_prop_min: float = -5,
|
|
166
|
+
log_lambda_prop_max: float = -3,
|
|
167
|
+
grid_length_prop: int = 10,
|
|
168
|
+
log_lambda_out_min: float = -5,
|
|
169
|
+
log_lambda_out_max: float = -3,
|
|
170
|
+
grid_length_out: int = 10,
|
|
171
|
+
nfolds: int = 5,
|
|
172
|
+
norm: str = "sv",
|
|
173
|
+
predict: Optional[np.ndarray] = None,
|
|
174
|
+
max_iter: int = 5000,
|
|
175
|
+
tol: float = 1e-3,
|
|
176
|
+
step_factor: float = 0.8,
|
|
177
|
+
verbose: bool = False,
|
|
178
|
+
crit: str = "grad",
|
|
179
|
+
center: bool = True,
|
|
180
|
+
approx: bool = False,
|
|
181
|
+
ini: str = "1",
|
|
182
|
+
plot_diagnostics: bool = False) -> ATEResult:
|
|
183
|
+
"""ATE estimate with HAPC nuisances and outcome undersmoothing.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
X : np.ndarray, shape (n, p)
|
|
188
|
+
Covariate matrix ``W`` (do NOT include the treatment column).
|
|
189
|
+
Y : np.ndarray, shape (n,)
|
|
190
|
+
Continuous outcome.
|
|
191
|
+
A : np.ndarray, shape (n,)
|
|
192
|
+
Binary treatment in ``{0,1}`` or ``{-1,+1}``.
|
|
193
|
+
alpha : float, default 0.05
|
|
194
|
+
Significance level. The returned interval has confidence
|
|
195
|
+
``1 - alpha``.
|
|
196
|
+
max_degree, npcs, nfolds, norm, predict, max_iter, tol, step_factor,\
|
|
197
|
+
verbose, crit, center, approx, ini :
|
|
198
|
+
Same meaning and defaults as in :func:`hapc.cv_hapc` (except λ grids,
|
|
199
|
+
see below).
|
|
200
|
+
``predict`` is accepted for signature parity with :func:`cv_hapc` and
|
|
201
|
+
is currently ignored (``ate_hapc`` always evaluates the EIF on the
|
|
202
|
+
training sample).
|
|
203
|
+
log_lambda_prop_min, log_lambda_prop_max, grid_length_prop :
|
|
204
|
+
Equally spaced log-λ grid for **propensity** cross-validation
|
|
205
|
+
(``A ~ W``, binomial), same rule as :func:`cv_hapc`.
|
|
206
|
+
log_lambda_out_min, log_lambda_out_max, grid_length_out :
|
|
207
|
+
Log-λ grid for **outcome** cross-validation ``Y ~ (A, W)`` (gaussian)
|
|
208
|
+
and for the **undersmoothing** scan (same points, evaluated in
|
|
209
|
+
decreasing λ order until ``|mean(EIF)| ≤ τ``).
|
|
210
|
+
plot_diagnostics : bool, default False
|
|
211
|
+
If True, open a matplotlib figure with (1) propensity CV curve
|
|
212
|
+
(logistic deviance vs λ), (2) outcome CV curve (MSE vs λ), and (3)
|
|
213
|
+
the undersmoothing path: ``|mean(EIF)|`` vs outcome λ with the
|
|
214
|
+
threshold line and vertical markers for the CV and selected
|
|
215
|
+
undersmoothed λ. Requires ``matplotlib`` (``pip install matplotlib``).
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
ATEResult
|
|
220
|
+
Named tuple with three fields ``(estimate, lower, upper)``.
|
|
221
|
+
|
|
222
|
+
Notes
|
|
223
|
+
-----
|
|
224
|
+
The procedure is:
|
|
225
|
+
|
|
226
|
+
1. Cross-validate the propensity ``A ~ W`` (binomial) on its grid and the
|
|
227
|
+
outcome ``Y ~ (A, W)`` (gaussian) on the outcome grid (independently
|
|
228
|
+
specified).
|
|
229
|
+
2. Fix the propensity at its CV-best λ; refit on the full sample to
|
|
230
|
+
obtain ``π̂(W_i) = P(A=1 | W_i)``.
|
|
231
|
+
3. At the CV-best outcome λ, compute the ATE EIF
|
|
232
|
+
``φ̂_diff = φ̂_1 - φ̂_0`` and let ``σ = std(φ̂_diff)``.
|
|
233
|
+
4. Threshold ``τ = σ / (√n · log n)``.
|
|
234
|
+
5. Walk the **outcome** λ grid in **decreasing**
|
|
235
|
+
order; pick the first (largest) λ for which
|
|
236
|
+
``|mean(EIF_diff)| ≤ τ`` — call it ``λ_u``.
|
|
237
|
+
6. Plug-in estimate: ``ψ̂ = mean(μ̂_1(W; λ_u) - μ̂_0(W; λ_u))``.
|
|
238
|
+
CI: ``ψ̂ ± z_{1 - α/2} · σ_u / √n`` where ``σ_u = std(EIF_diff)``
|
|
239
|
+
at ``λ_u``.
|
|
240
|
+
|
|
241
|
+
Examples
|
|
242
|
+
--------
|
|
243
|
+
>>> import numpy as np
|
|
244
|
+
>>> from hapc import ate_hapc
|
|
245
|
+
>>> rng = np.random.default_rng(0)
|
|
246
|
+
>>> n = 200
|
|
247
|
+
>>> W = np.column_stack([rng.uniform(-2, 2, n), rng.normal(0, 0.5, n)])
|
|
248
|
+
>>> p = 1.0 / (1.0 + np.exp(-(W[:, 0] + 0.5 * W[:, 1])))
|
|
249
|
+
>>> A = rng.binomial(1, p, n)
|
|
250
|
+
>>> Y = 2 * W[:, 0] + 0.5 + rng.normal(0, 0.5, n) # truth: ATE=0
|
|
251
|
+
>>> res = ate_hapc(W, Y, A, alpha=0.05, max_degree=2, npcs=50,
|
|
252
|
+
... grid_length_prop=4, grid_length_out=4, nfolds=3,
|
|
253
|
+
... norm="2")
|
|
254
|
+
>>> bool(res.lower <= res.estimate <= res.upper)
|
|
255
|
+
True
|
|
256
|
+
"""
|
|
257
|
+
if not (0.0 < alpha < 1.0):
|
|
258
|
+
raise ValueError(f"alpha must be in (0,1); got {alpha}")
|
|
259
|
+
|
|
260
|
+
# --- Coerce inputs ------------------------------------------------------
|
|
261
|
+
X = np.ascontiguousarray(np.asarray(X, dtype=np.float64))
|
|
262
|
+
if X.ndim != 2:
|
|
263
|
+
raise ValueError(f"X must be 2-D; got shape {X.shape}")
|
|
264
|
+
Y = np.asarray(Y, dtype=np.float64).ravel()
|
|
265
|
+
A01 = _coerce_binary(A)
|
|
266
|
+
n, _p = X.shape
|
|
267
|
+
if Y.size != n or A01.size != n:
|
|
268
|
+
raise ValueError("X, Y, A must all have the same number of rows.")
|
|
269
|
+
|
|
270
|
+
if npcs is None:
|
|
271
|
+
npcs = int(n)
|
|
272
|
+
|
|
273
|
+
lambdas_out = np.exp(
|
|
274
|
+
np.linspace(log_lambda_out_min, log_lambda_out_max, grid_length_out))
|
|
275
|
+
|
|
276
|
+
cv_kwargs_base = dict(
|
|
277
|
+
max_degree=max_degree, npcs=npcs, nfolds=nfolds, norm=norm,
|
|
278
|
+
max_iter=max_iter, tol=tol, step_factor=step_factor,
|
|
279
|
+
verbose=verbose, crit=crit, center=center, approx=approx, ini=ini,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# --- 1. CV propensity (binomial) ---------------------------------------
|
|
283
|
+
cv_prop = cv_hapc(
|
|
284
|
+
X, A01, family="binomial",
|
|
285
|
+
log_lambda_min=log_lambda_prop_min,
|
|
286
|
+
log_lambda_max=log_lambda_prop_max,
|
|
287
|
+
grid_length=grid_length_prop,
|
|
288
|
+
**cv_kwargs_base,
|
|
289
|
+
)
|
|
290
|
+
lam_prop_cv = float(cv_prop.best_lambda)
|
|
291
|
+
|
|
292
|
+
# Refit propensity at CV λ on full data, predict in-sample probabilities.
|
|
293
|
+
prop = _hapc(
|
|
294
|
+
X, A01, family="binomial", max_degree=max_degree, npcs=npcs,
|
|
295
|
+
lambda_=lam_prop_cv, norm=norm, predict=X,
|
|
296
|
+
max_iter=max_iter, tol=tol, step_factor=step_factor,
|
|
297
|
+
verbose=verbose, crit=crit, center=center, approx=approx, ini=ini,
|
|
298
|
+
)
|
|
299
|
+
pi1 = np.clip(np.asarray(prop.probabilities).ravel(), 1e-8, 1 - 1e-8)
|
|
300
|
+
|
|
301
|
+
# --- 2. CV outcome (gaussian on [A,W]) ---------------------------------
|
|
302
|
+
Xout = np.column_stack([A01, X])
|
|
303
|
+
cv_out = cv_hapc(
|
|
304
|
+
Xout, Y, family="gaussian",
|
|
305
|
+
log_lambda_min=log_lambda_out_min,
|
|
306
|
+
log_lambda_max=log_lambda_out_max,
|
|
307
|
+
grid_length=grid_length_out,
|
|
308
|
+
**cv_kwargs_base,
|
|
309
|
+
)
|
|
310
|
+
lam_out_cv = float(cv_out.best_lambda)
|
|
311
|
+
|
|
312
|
+
# Stacked design for one-shot prediction at both arms.
|
|
313
|
+
Xmu1 = np.column_stack([np.ones(n), X])
|
|
314
|
+
Xmu0 = np.column_stack([np.zeros(n), X])
|
|
315
|
+
Xeval = np.vstack([Xmu1, Xmu0])
|
|
316
|
+
|
|
317
|
+
def _mu_pair(lam: float):
|
|
318
|
+
"""Refit outcome at λ on full data, return (μ̂_1, μ̂_0) on training W."""
|
|
319
|
+
res = _hapc(
|
|
320
|
+
Xout, Y, family="gaussian", max_degree=max_degree, npcs=npcs,
|
|
321
|
+
lambda_=float(lam), norm=norm, predict=Xeval,
|
|
322
|
+
max_iter=max_iter, tol=tol, step_factor=step_factor,
|
|
323
|
+
verbose=verbose, crit=crit, center=center, approx=approx, ini=ini,
|
|
324
|
+
)
|
|
325
|
+
p = np.asarray(res.predictions).ravel()
|
|
326
|
+
if p.size != 2 * n:
|
|
327
|
+
raise RuntimeError(
|
|
328
|
+
f"Outcome predict returned {p.size} values, expected {2 * n}."
|
|
329
|
+
)
|
|
330
|
+
return p[:n], p[n:]
|
|
331
|
+
|
|
332
|
+
def _eif_diff(mu1: np.ndarray, mu0: np.ndarray) -> np.ndarray:
|
|
333
|
+
eif1 = (A01 / pi1) * (Y - mu1) - (mu1 - mu1.mean())
|
|
334
|
+
eif0 = ((1.0 - A01) / (1.0 - pi1)) * (Y - mu0) - (mu0 - mu0.mean())
|
|
335
|
+
return eif1 - eif0
|
|
336
|
+
|
|
337
|
+
# --- 3. σ at CV configuration → threshold τ ----------------------------
|
|
338
|
+
mu1_cv, mu0_cv = _mu_pair(lam_out_cv)
|
|
339
|
+
eif_cv = _eif_diff(mu1_cv, mu0_cv)
|
|
340
|
+
sigma_cv = float(np.std(eif_cv, ddof=0))
|
|
341
|
+
threshold = sigma_cv / (np.sqrt(n) * np.log(n))
|
|
342
|
+
|
|
343
|
+
# --- 4. Undersmoothing sweep: largest λ → smallest --------------------
|
|
344
|
+
lam_und: Optional[float] = None
|
|
345
|
+
eif_und: Optional[np.ndarray] = None
|
|
346
|
+
mu1_und = mu0_und = None
|
|
347
|
+
for lam in np.sort(lambdas_out)[::-1]:
|
|
348
|
+
try:
|
|
349
|
+
mu1, mu0 = _mu_pair(float(lam))
|
|
350
|
+
except Exception:
|
|
351
|
+
continue
|
|
352
|
+
eif = _eif_diff(mu1, mu0)
|
|
353
|
+
if abs(eif.mean()) <= threshold:
|
|
354
|
+
lam_und = float(lam)
|
|
355
|
+
mu1_und, mu0_und = mu1, mu0
|
|
356
|
+
eif_und = eif
|
|
357
|
+
break
|
|
358
|
+
|
|
359
|
+
if eif_und is None:
|
|
360
|
+
# Threshold never met → fall back to the smallest λ in the grid.
|
|
361
|
+
lam_und = float(lambdas_out.min())
|
|
362
|
+
mu1_und, mu0_und = _mu_pair(lam_und)
|
|
363
|
+
eif_und = _eif_diff(mu1_und, mu0_und)
|
|
364
|
+
|
|
365
|
+
if plot_diagnostics:
|
|
366
|
+
t_lams: list[float] = []
|
|
367
|
+
t_abs: list[float] = []
|
|
368
|
+
for lam in np.sort(lambdas_out):
|
|
369
|
+
try:
|
|
370
|
+
mu1, mu0 = _mu_pair(float(lam))
|
|
371
|
+
except Exception:
|
|
372
|
+
continue
|
|
373
|
+
eif = _eif_diff(mu1, mu0)
|
|
374
|
+
t_lams.append(float(lam))
|
|
375
|
+
t_abs.append(float(np.abs(eif.mean())))
|
|
376
|
+
_plot_ate_diagnostics(
|
|
377
|
+
cv_prop, cv_out,
|
|
378
|
+
np.asarray(t_lams), np.asarray(t_abs),
|
|
379
|
+
lam_prop_cv, lam_out_cv, lam_und, threshold,
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# --- 5. Point estimate + (1 - alpha) Wald CI --------------------------
|
|
383
|
+
psi = float(np.mean(mu1_und - mu0_und))
|
|
384
|
+
sigma_und = float(np.std(eif_und, ddof=0))
|
|
385
|
+
z = float(_normal.ppf(1.0 - alpha / 2.0))
|
|
386
|
+
half = z * sigma_und / np.sqrt(n)
|
|
387
|
+
|
|
388
|
+
return ATEResult(estimate=psi, lower=psi - half, upper=psi + half)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
__all__ = ["ATEResult", "ate_hapc"]
|