hapc 2.3.1__tar.gz → 2.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {hapc-2.3.1/python/hapc.egg-info → hapc-2.5.0}/PKG-INFO +124 -2
  2. hapc-2.5.0/README.md +321 -0
  3. {hapc-2.3.1 → hapc-2.5.0}/pyproject.toml +3 -2
  4. {hapc-2.3.1 → hapc-2.5.0}/python/hapc/__init__.py +10 -3
  5. hapc-2.5.0/python/hapc/ate.py +735 -0
  6. {hapc-2.3.1 → hapc-2.5.0}/python/hapc/cv.py +37 -4
  7. hapc-2.5.0/python/hapc/hazard.py +351 -0
  8. {hapc-2.3.1 → hapc-2.5.0}/python/hapc/single.py +14 -5
  9. {hapc-2.3.1 → hapc-2.5.0/python/hapc.egg-info}/PKG-INFO +124 -2
  10. {hapc-2.3.1 → hapc-2.5.0}/python/hapc.egg-info/SOURCES.txt +1 -0
  11. {hapc-2.3.1 → hapc-2.5.0}/python/hapc.egg-info/requires.txt +5 -0
  12. {hapc-2.3.1 → hapc-2.5.0}/tests/test_ate.py +56 -0
  13. {hapc-2.3.1 → hapc-2.5.0}/tests/test_ate_hapc_diagnostics_example.py +8 -4
  14. hapc-2.3.1/README.md +0 -203
  15. hapc-2.3.1/python/hapc/ate.py +0 -425
  16. {hapc-2.3.1 → hapc-2.5.0}/CMakeLists.txt +0 -0
  17. {hapc-2.3.1 → hapc-2.5.0}/LICENSE +0 -0
  18. {hapc-2.3.1 → hapc-2.5.0}/MANIFEST.in +0 -0
  19. {hapc-2.3.1 → hapc-2.5.0}/python/hapc/core.py +0 -0
  20. {hapc-2.3.1 → hapc-2.5.0}/python/hapc.egg-info/dependency_links.txt +0 -0
  21. {hapc-2.3.1 → hapc-2.5.0}/python/hapc.egg-info/not-zip-safe +0 -0
  22. {hapc-2.3.1 → hapc-2.5.0}/python/hapc.egg-info/top_level.txt +0 -0
  23. {hapc-2.3.1 → hapc-2.5.0}/setup.cfg +0 -0
  24. {hapc-2.3.1 → hapc-2.5.0}/setup.py +0 -0
  25. {hapc-2.3.1 → hapc-2.5.0}/src/bindings.cpp +0 -0
  26. {hapc-2.3.1 → hapc-2.5.0}/src/cross_kernel.cpp +0 -0
  27. {hapc-2.3.1 → hapc-2.5.0}/src/cv_classi.cpp +0 -0
  28. {hapc-2.3.1 → hapc-2.5.0}/src/cv_fast_pchal.cpp +0 -0
  29. {hapc-2.3.1 → hapc-2.5.0}/src/cv_fast_pchal_python.cpp +0 -0
  30. {hapc-2.3.1 → hapc-2.5.0}/src/fast_pchal.cpp +0 -0
  31. {hapc-2.3.1 → hapc-2.5.0}/src/hapc_core.hpp +0 -0
  32. {hapc-2.3.1 → hapc-2.5.0}/src/logistic_call.cpp +0 -0
  33. {hapc-2.3.1 → hapc-2.5.0}/src/mkernel.cpp +0 -0
  34. {hapc-2.3.1 → hapc-2.5.0}/src/pcghal_call.cpp +0 -0
  35. {hapc-2.3.1 → hapc-2.5.0}/src/pcghal_classi_call.cpp +0 -0
  36. {hapc-2.3.1 → hapc-2.5.0}/src/pcghal_cv.cpp +0 -0
  37. {hapc-2.3.1 → hapc-2.5.0}/src/pcghal_cv_classi_cpp.cpp +0 -0
  38. {hapc-2.3.1 → hapc-2.5.0}/src/pcghal_cv_cpp.cpp +0 -0
  39. {hapc-2.3.1 → hapc-2.5.0}/src/pchal_design.cpp +0 -0
  40. {hapc-2.3.1 → hapc-2.5.0}/src/r_bindings.cpp +0 -0
  41. {hapc-2.3.1 → hapc-2.5.0}/src/ridge_wrappers.cpp +0 -0
  42. {hapc-2.3.1 → hapc-2.5.0}/src/single_pcghal_cpp.cpp +0 -0
  43. {hapc-2.3.1 → hapc-2.5.0}/src/single_pchar.cpp +0 -0
  44. {hapc-2.3.1 → hapc-2.5.0}/tests/test_api.py +0 -0
  45. {hapc-2.3.1 → hapc-2.5.0}/tests/test_core.py +0 -0
  46. {hapc-2.3.1 → hapc-2.5.0}/tests/test_logistic_regression.py +0 -0
  47. {hapc-2.3.1 → hapc-2.5.0}/tests/test_r_vs_python_alpha.py +0 -0
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hapc
3
- Version: 2.3.1
3
+ Version: 2.5.0
4
4
  Summary: Highly Adaptive Principal Components
5
5
  Home-page: https://github.com/meixide/hapc
6
6
  Author: Carlos García Meixide
7
7
  Author-email: Carlos García Meixide <cgmeixide@gmail.com>
8
8
  License: MIT
9
9
  Project-URL: Homepage, https://github.com/meixide/hapc
10
- Project-URL: Documentation, https://github.com/meixide/hapc#readme
10
+ Project-URL: Documentation, https://hapc.readthedocs.io
11
11
  Project-URL: Repository, https://github.com/meixide/hapc.git
12
12
  Project-URL: Issues, https://github.com/meixide/hapc/issues
13
13
  Classifier: Programming Language :: Python :: 3
@@ -27,6 +27,10 @@ Requires-Dist: pytest; extra == "dev"
27
27
  Requires-Dist: pytest-cov; extra == "dev"
28
28
  Requires-Dist: black; extra == "dev"
29
29
  Requires-Dist: flake8; extra == "dev"
30
+ Provides-Extra: docs
31
+ Requires-Dist: sphinx>=7; extra == "docs"
32
+ Requires-Dist: furo; extra == "docs"
33
+ Requires-Dist: myst-parser; extra == "docs"
30
34
  Dynamic: author
31
35
  Dynamic: home-page
32
36
  Dynamic: license-file
@@ -36,6 +40,17 @@ Dynamic: requires-python
36
40
 
37
41
  A fast and flexible machine learning library for nonparametric high-dimensional regression and classification with guarantees.
38
42
 
43
+ ## Documentation
44
+
45
+ - **Python API** (rendered from docstrings): https://hapc.readthedocs.io —
46
+ configured via [`.readthedocs.yaml`](.readthedocs.yaml) and
47
+ [`docs/`](docs/) (Sphinx + autodoc). Build locally with
48
+ `pip install -e ".[docs]" && sphinx-build -b html docs docs/_build/html`.
49
+ - **R API** (rendered from roxygen): a [pkgdown](https://pkgdown.r-lib.org)
50
+ site built by [`.github/workflows/pkgdown.yaml`](.github/workflows/pkgdown.yaml)
51
+ (config in [`_pkgdown.yml`](_pkgdown.yml)). Build locally with
52
+ `Rscript -e 'pkgdown::build_site()'`.
53
+
39
54
  ## Installation
40
55
 
41
56
  ### Prerequisites
@@ -182,6 +197,113 @@ cv_result = pcghal_cv(
182
197
  print(cv_result.best_lambda)
183
198
  ```
184
199
 
200
+ ### Average Treatment Effect (ATE)
201
+
202
+ Estimate the ATE `E[Y(1)] − E[Y(0)]` with HAPC nuisance models and a
203
+ doubly-robust (AIPW) efficient influence function. `ate_hapc` returns a point
204
+ estimate and a `(1 − alpha)` Wald confidence interval.
205
+
206
+ ```python
207
+ from hapc import ate_hapc
208
+
209
+ # W: covariates (n, p); A: binary treatment in {0,1} or {-1,+1}; Y: outcome
210
+ res = ate_hapc(W, Y, A, alpha=0.05, method="undersmooth")
211
+ print(res.estimate, res.lower, res.upper)
212
+ ```
213
+
214
+ Two bias-control strategies are available through `method`:
215
+
216
+ - **`method="undersmooth"`** (default) — single-sample estimator. The outcome
217
+ model is undersmoothed (λ pushed below the CV-optimal value) until the
218
+ empirical influence function is within `σ / (√n · log n)`. This requires the
219
+ **full PC basis** (`npcs = n`, the default) and a λ grid that reaches small λ
220
+ (defaults `log_lambda_out_min = -10`); otherwise the gate never reaches the
221
+ low-bias regime and `ate_hapc` emits a warning. Pass
222
+ `report_undersmoothing=True` to print the `|mean(EIF)|`-vs-λ path.
223
+ - **`method="crossfit"`** — DML-style K-fold cross-fitting (`cf_folds`, default
224
+ 5, stratified by treatment). Both nuisances are fit on the training folds and
225
+ the influence function is evaluated out-of-fold, giving honest point estimates
226
+ and coverage without undersmoothing. Recommended under good overlap.
227
+
228
+ ### Discrete-time survival (`family = "logit-hazard"`)
229
+
230
+ Fit a discrete-time **logistic hazard** model with HAPC. You supply only the
231
+ observed right-censored data — baseline covariates `X`, the observed time
232
+ `T = min(T_event, C)`, and the event indicator `Delta = 1(T_event <= C)` — and
233
+ the wrapper performs the person-period expansion (one row per
234
+ subject-per-interval-at-risk, hazard label = 1 at the event interval), prepends
235
+ the visit time as the first HAL covariate, and cross-validates the binomial fit.
236
+
237
+ **Model.** The discrete hazard is the conditional event probability in interval
238
+ `t` given survival up to `t`, modelled on the logit scale by a HAPC fit `f` of
239
+ the augmented covariate `(t, x)`:
240
+
241
+ ```text
242
+ lambda(t | x) = P(T_event = t | T_event >= t, X = x)
243
+ logit lambda(t | x) = f(t, x)
244
+ ```
245
+
246
+ **Person-period likelihood.** Under independent right-censoring the observed-data
247
+ likelihood factorises over the at-risk intervals,
248
+
249
+ ```text
250
+ prod_i prod_{t <= T_i} lambda(t|x_i)^Y_it * (1 - lambda(t|x_i))^(1 - Y_it),
251
+ with Y_it = 1(T_event_i = t),
252
+ ```
253
+
254
+ which is exactly the Bernoulli (logistic) likelihood of the expanded
255
+ person-period table — so a binomial HAPC fit of `Y_it` on `(t, x_i)` estimates
256
+ the discrete hazard (Cox 1972; Brown 1975; Allison 1982).
257
+
258
+ **Survival.** The conditional survival function follows by the product-limit
259
+ relation `S(t | x) = prod_{s <= t} (1 - lambda(s | x))`, returned for new
260
+ subjects when `predict=` is supplied.
261
+
262
+ ```python
263
+ from hapc import hazard_hapc
264
+ import numpy as np
265
+
266
+ # X: baseline covariates (n, p); T: observed times; Delta: 0/1 event indicator
267
+ fit = hazard_hapc(X, T, Delta, norm="1", max_degree=2, time_grid=np.arange(1, 7))
268
+ fit.hazard # estimated hazard per person-period row (CV predictions)
269
+ fit.best_lambda, fit.interior # CV-selected lambda; is it interior to the grid?
270
+
271
+ # survival curves S(t|x) for new subjects
272
+ fit = hazard_hapc(X, T, Delta, norm="1", predict=X_new)
273
+ fit.predict_survival # (m, K) survival probabilities over the grid
274
+ ```
275
+
276
+ ```r
277
+ library(hapc)
278
+ # equivalent to cv.hapc(X, T, family = "logit-hazard", Delta = Delta, norm = "1")
279
+ fit <- hazard.hapc(X, T, Delta, norm = "1", max_degree = 2, time_grid = 1:6)
280
+ fit$hazard; fit$best_lambda; fit$interior
281
+ ```
282
+
283
+ `norm` must be `"1"` (logistic LASSO) or `"2"` (logistic ridge); `norm = "sv"`
284
+ is **not implemented** for this family and is flagged.
285
+
286
+ **Returns** (Python `HazardResult` / R `hapc_hazard`):
287
+
288
+ - `hazard` — cross-validated discrete hazard for each person-period row
289
+ - `lambdas`, `risk`, `best_lambda` — CV grid, mean logistic deviance, selected λ
290
+ - `interior` — whether `best_lambda` is strictly inside the grid (sanity check)
291
+ - `time_grid`, `ids`/`id`, `Y` — the discrete grid and person-period bookkeeping
292
+ - `predict_hazard`, `predict_survival` — hazard surface and survival curves for
293
+ new subjects (only when `predict=` is given)
294
+ - `cv` — the underlying cross-validation result
295
+
296
+ Worked end-to-end examples (five hazard data-generating processes, with
297
+ true-vs-estimated hazard scatters and CV risk-vs-λ curves verifying an interior
298
+ optimum) are in
299
+ [`examples/hazard_logit_hazard_examples.R`](examples/hazard_logit_hazard_examples.R)
300
+ and
301
+ [`examples/hazard_logit_hazard_examples.py`](examples/hazard_logit_hazard_examples.py).
302
+
303
+ **References.** Cox (1972, *JRSS B*); Brown (1975, *Biometrics*); Allison (1982,
304
+ *Sociological Methodology*); Singer & Willett (2003, *Applied Longitudinal Data
305
+ Analysis*); Benkeser & van der Laan (2016, *IEEE DSAA*).
306
+
185
307
  ## API Reference
186
308
 
187
309
  ### `hapc.single.single_pcghal()`
hapc-2.5.0/README.md ADDED
@@ -0,0 +1,321 @@
1
+ # HAPC: Highly Adaptive Prinicipal Components
2
+
3
+ A fast and flexible machine learning library for nonparametric high-dimensional regression and classification with guarantees.
4
+
5
+ ## Documentation
6
+
7
+ - **Python API** (rendered from docstrings): https://hapc.readthedocs.io —
8
+ configured via [`.readthedocs.yaml`](.readthedocs.yaml) and
9
+ [`docs/`](docs/) (Sphinx + autodoc). Build locally with
10
+ `pip install -e ".[docs]" && sphinx-build -b html docs docs/_build/html`.
11
+ - **R API** (rendered from roxygen): a [pkgdown](https://pkgdown.r-lib.org)
12
+ site built by [`.github/workflows/pkgdown.yaml`](.github/workflows/pkgdown.yaml)
13
+ (config in [`_pkgdown.yml`](_pkgdown.yml)). Build locally with
14
+ `Rscript -e 'pkgdown::build_site()'`.
15
+
16
+ ## Installation
17
+
18
+ ### Prerequisites
19
+
20
+ - Python 3.8+
21
+ - C++ compiler (g++, clang, or MSVC)
22
+ - CMake 3.15+
23
+ - Eigen3
24
+
25
+ ### Quick Install
26
+
27
+ ```bash
28
+ pip install hapc
29
+ ```
30
+
31
+ Prebuilt wheels are published for Linux (manylinux2014, x86_64), macOS
32
+ (Intel + Apple Silicon) and Windows, for CPython 3.8–3.12. No compiler,
33
+ CMake or Eigen is needed when a wheel is available.
34
+
35
+ ### Linux / HPC clusters
36
+
37
+ The Linux wheels use the **manylinux2014** baseline (glibc 2.17), so
38
+ `pip install hapc` works out of the box on HPC login/compute nodes —
39
+ no `conda` toolchain, `devtoolset`, or sysroot setup required:
40
+
41
+ ```bash
42
+ pip install hapc
43
+ ```
44
+
45
+ If you must build from the source distribution (niche architecture, very
46
+ old Python, or an air-gapped node), provide a C++17 compiler and either
47
+ let CMake fetch Eigen automatically (needs network) or install Eigen and
48
+ let `find_package(Eigen3)` find it:
49
+
50
+ ```bash
51
+ # with conda compilers (recommended on HPC)
52
+ conda install -c conda-forge cxx-compiler cmake eigen
53
+ pip install hapc --no-binary hapc
54
+ ```
55
+
56
+ ### Install from GitHub (latest development version)
57
+
58
+ ```bash
59
+ pip install git+https://github.com/meixide/hapc.git
60
+ ```
61
+
62
+ Or with editable install for development:
63
+
64
+ ```bash
65
+ git clone https://github.com/meixide/hapc.git
66
+ cd hapc
67
+ pip install -e .
68
+ ```
69
+
70
+ ### Install build dependencies
71
+
72
+ If installation fails, you may need to install build dependencies:
73
+
74
+ **macOS:**
75
+ ```bash
76
+ brew install cmake eigen
77
+ ```
78
+
79
+ **Ubuntu/Debian:**
80
+ ```bash
81
+ sudo apt-get install cmake libeigen3-dev build-essential
82
+ ```
83
+
84
+ **Windows:**
85
+ ```bash
86
+ pip install cmake
87
+ # Install Visual Studio Build Tools or use conda
88
+ conda install -c conda-forge eigen
89
+ ```
90
+
91
+ ## Quick Start
92
+
93
+ ```python
94
+ import numpy as np
95
+ from hapc.single import single_pcghal
96
+ from hapc.cv import pcghal_cv
97
+
98
+ # Generate sample data
99
+ X = np.random.randn(100, 5)
100
+ Y = X[:, 0] + 0.5 * X[:, 1] + np.random.randn(100) * 0.1
101
+
102
+ # Single fit with fixed lambda
103
+ result = single_pcghal(X, Y, maxdeg=2, npc=5, single_lambda=0.01)
104
+ print(f"Risk: {result.optimizer_output.risk:.6f}")
105
+
106
+ # Cross-validation to select lambda
107
+ lambdas = np.logspace(-4, 0, 10)
108
+ cv_result = pcghal_cv(X, Y, maxdeg=2, npc=5, lambdas=lambdas, nfolds=5)
109
+ print(f"Best lambda: {cv_result.best_lambda:.6f}")
110
+
111
+ # Make predictions
112
+ X_test = np.random.randn(20, 5)
113
+ result = single_pcghal(X, Y, maxdeg=2, npc=5, single_lambda=0.01, predict=X_test)
114
+ print(f"Predictions: {result.predictions}")
115
+ ```
116
+
117
+ ## Usage
118
+
119
+ ### Regression
120
+
121
+ ```python
122
+ from hapc.single import single_pcghal
123
+
124
+ result = single_pcghal(
125
+ X, Y,
126
+ maxdeg=2, # Maximum degree of interactions
127
+ npc=10, # Number of principal components
128
+ single_lambda=0.01,
129
+ predict=X_test # Optional: test data for predictions
130
+ )
131
+ ```
132
+
133
+ ### Classification
134
+
135
+ ```python
136
+ from hapc.single import single_pcghal
137
+
138
+ result = single_pcghal(
139
+ X, Y_binary,
140
+ maxdeg=2,
141
+ npc=10,
142
+ single_lambda=0.01,
143
+ predict=X_test
144
+ )
145
+ ```
146
+
147
+ ### Cross-Validation
148
+
149
+ ```python
150
+ from hapc.cv import pcghal_cv
151
+
152
+ cv_result = pcghal_cv(
153
+ X, Y,
154
+ maxdeg=2,
155
+ npc=10,
156
+ lambdas=np.logspace(-4, 0, 20),
157
+ nfolds=5
158
+ )
159
+ print(cv_result.best_lambda)
160
+ ```
161
+
162
+ ### Average Treatment Effect (ATE)
163
+
164
+ Estimate the ATE `E[Y(1)] − E[Y(0)]` with HAPC nuisance models and a
165
+ doubly-robust (AIPW) efficient influence function. `ate_hapc` returns a point
166
+ estimate and a `(1 − alpha)` Wald confidence interval.
167
+
168
+ ```python
169
+ from hapc import ate_hapc
170
+
171
+ # W: covariates (n, p); A: binary treatment in {0,1} or {-1,+1}; Y: outcome
172
+ res = ate_hapc(W, Y, A, alpha=0.05, method="undersmooth")
173
+ print(res.estimate, res.lower, res.upper)
174
+ ```
175
+
176
+ Two bias-control strategies are available through `method`:
177
+
178
+ - **`method="undersmooth"`** (default) — single-sample estimator. The outcome
179
+ model is undersmoothed (λ pushed below the CV-optimal value) until the
180
+ empirical influence function is within `σ / (√n · log n)`. This requires the
181
+ **full PC basis** (`npcs = n`, the default) and a λ grid that reaches small λ
182
+ (defaults `log_lambda_out_min = -10`); otherwise the gate never reaches the
183
+ low-bias regime and `ate_hapc` emits a warning. Pass
184
+ `report_undersmoothing=True` to print the `|mean(EIF)|`-vs-λ path.
185
+ - **`method="crossfit"`** — DML-style K-fold cross-fitting (`cf_folds`, default
186
+ 5, stratified by treatment). Both nuisances are fit on the training folds and
187
+ the influence function is evaluated out-of-fold, giving honest point estimates
188
+ and coverage without undersmoothing. Recommended under good overlap.
189
+
190
+ ### Discrete-time survival (`family = "logit-hazard"`)
191
+
192
+ Fit a discrete-time **logistic hazard** model with HAPC. You supply only the
193
+ observed right-censored data — baseline covariates `X`, the observed time
194
+ `T = min(T_event, C)`, and the event indicator `Delta = 1(T_event <= C)` — and
195
+ the wrapper performs the person-period expansion (one row per
196
+ subject-per-interval-at-risk, hazard label = 1 at the event interval), prepends
197
+ the visit time as the first HAL covariate, and cross-validates the binomial fit.
198
+
199
+ **Model.** The discrete hazard is the conditional event probability in interval
200
+ `t` given survival up to `t`, modelled on the logit scale by a HAPC fit `f` of
201
+ the augmented covariate `(t, x)`:
202
+
203
+ ```text
204
+ lambda(t | x) = P(T_event = t | T_event >= t, X = x)
205
+ logit lambda(t | x) = f(t, x)
206
+ ```
207
+
208
+ **Person-period likelihood.** Under independent right-censoring the observed-data
209
+ likelihood factorises over the at-risk intervals,
210
+
211
+ ```text
212
+ prod_i prod_{t <= T_i} lambda(t|x_i)^Y_it * (1 - lambda(t|x_i))^(1 - Y_it),
213
+ with Y_it = 1(T_event_i = t),
214
+ ```
215
+
216
+ which is exactly the Bernoulli (logistic) likelihood of the expanded
217
+ person-period table — so a binomial HAPC fit of `Y_it` on `(t, x_i)` estimates
218
+ the discrete hazard (Cox 1972; Brown 1975; Allison 1982).
219
+
220
+ **Survival.** The conditional survival function follows by the product-limit
221
+ relation `S(t | x) = prod_{s <= t} (1 - lambda(s | x))`, returned for new
222
+ subjects when `predict=` is supplied.
223
+
224
+ ```python
225
+ from hapc import hazard_hapc
226
+ import numpy as np
227
+
228
+ # X: baseline covariates (n, p); T: observed times; Delta: 0/1 event indicator
229
+ fit = hazard_hapc(X, T, Delta, norm="1", max_degree=2, time_grid=np.arange(1, 7))
230
+ fit.hazard # estimated hazard per person-period row (CV predictions)
231
+ fit.best_lambda, fit.interior # CV-selected lambda; is it interior to the grid?
232
+
233
+ # survival curves S(t|x) for new subjects
234
+ fit = hazard_hapc(X, T, Delta, norm="1", predict=X_new)
235
+ fit.predict_survival # (m, K) survival probabilities over the grid
236
+ ```
237
+
238
+ ```r
239
+ library(hapc)
240
+ # equivalent to cv.hapc(X, T, family = "logit-hazard", Delta = Delta, norm = "1")
241
+ fit <- hazard.hapc(X, T, Delta, norm = "1", max_degree = 2, time_grid = 1:6)
242
+ fit$hazard; fit$best_lambda; fit$interior
243
+ ```
244
+
245
+ `norm` must be `"1"` (logistic LASSO) or `"2"` (logistic ridge); `norm = "sv"`
246
+ is **not implemented** for this family and is flagged.
247
+
248
+ **Returns** (Python `HazardResult` / R `hapc_hazard`):
249
+
250
+ - `hazard` — cross-validated discrete hazard for each person-period row
251
+ - `lambdas`, `risk`, `best_lambda` — CV grid, mean logistic deviance, selected λ
252
+ - `interior` — whether `best_lambda` is strictly inside the grid (sanity check)
253
+ - `time_grid`, `ids`/`id`, `Y` — the discrete grid and person-period bookkeeping
254
+ - `predict_hazard`, `predict_survival` — hazard surface and survival curves for
255
+ new subjects (only when `predict=` is given)
256
+ - `cv` — the underlying cross-validation result
257
+
258
+ Worked end-to-end examples (five hazard data-generating processes, with
259
+ true-vs-estimated hazard scatters and CV risk-vs-λ curves verifying an interior
260
+ optimum) are in
261
+ [`examples/hazard_logit_hazard_examples.R`](examples/hazard_logit_hazard_examples.R)
262
+ and
263
+ [`examples/hazard_logit_hazard_examples.py`](examples/hazard_logit_hazard_examples.py).
264
+
265
+ **References.** Cox (1972, *JRSS B*); Brown (1975, *Biometrics*); Allison (1982,
266
+ *Sociological Methodology*); Singer & Willett (2003, *Applied Longitudinal Data
267
+ Analysis*); Benkeser & van der Laan (2016, *IEEE DSAA*).
268
+
269
+ ## API Reference
270
+
271
+ ### `hapc.single.single_pcghal()`
272
+
273
+ Fit PC-GHAL with a single lambda value.
274
+
275
+ **Parameters:**
276
+ - `X` (ndarray, shape (n, p)): Input features
277
+ - `Y` (ndarray, shape (n,)): Response variable
278
+ - `maxdeg` (int): Maximum degree of interactions
279
+ - `npc` (int): Number of principal components
280
+ - `single_lambda` (float): Regularization parameter
281
+ - `max_iter` (int, default=100): Maximum iterations
282
+ - `tol` (float, default=1e-6): Convergence tolerance
283
+ - `verbose` (bool, default=False): Print progress
284
+ - `predict` (ndarray, optional): Test data for predictions
285
+ - `center` (bool, default=True): Center the design matrix
286
+
287
+ **Returns:**
288
+ - `result.optimizer_output.alpha`: Coefficients
289
+ - `result.optimizer_output.risk`: Final risk
290
+ - `result.optimizer_output.iter`: Iterations until convergence
291
+ - `result.predictions`: Predictions on test data (if provided)
292
+
293
+ ### `hapc.cv.pcghal_cv()`
294
+
295
+ Cross-validation to select lambda.
296
+
297
+ **Parameters:**
298
+ - `lambdas` (ndarray): Grid of lambda values to test
299
+ - `nfolds` (int, default=5): Number of CV folds
300
+ - ...other parameters same as `single_pcghal`
301
+
302
+ **Returns:**
303
+ - `cv_result.best_lambda`: Optimal lambda
304
+ - `cv_result.mses`: CV errors for each lambda
305
+ - `cv_result.best_model`: Fitted model with best lambda
306
+ - `cv_result.predictions`: Predictions on test data (if provided)
307
+
308
+ ## Contributing
309
+
310
+ Contributions welcome! The C++ core is shared between R and Python packages.
311
+
312
+ ```bash
313
+ git clone https://github.com/meixide/hapc.git
314
+ cd hapc
315
+ pip install -e .
316
+ pytest
317
+ ```
318
+
319
+ ## License
320
+
321
+ MIT License - see LICENSE file
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hapc"
7
- version = "2.3.1"
7
+ version = "2.5.0"
8
8
  description = "Highly Adaptive Principal Components"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -28,10 +28,11 @@ dependencies = [
28
28
 
29
29
  [project.optional-dependencies]
30
30
  dev = ["pytest", "pytest-cov", "black", "flake8"]
31
+ docs = ["sphinx>=7", "furo", "myst-parser"]
31
32
 
32
33
  [project.urls]
33
34
  Homepage = "https://github.com/meixide/hapc"
34
- Documentation = "https://github.com/meixide/hapc#readme"
35
+ Documentation = "https://hapc.readthedocs.io"
35
36
  Repository = "https://github.com/meixide/hapc.git"
36
37
  Issues = "https://github.com/meixide/hapc/issues"
37
38
 
@@ -5,7 +5,10 @@ Public API
5
5
  High-level entry points (mirror the R package):
6
6
 
7
7
  - :func:`hapc` — single-λ fit (gaussian / binomial; norm in {"sv","1","2"}).
8
- - :func:`cv_hapc` — k-fold cross-validated fit.
8
+ - :func:`cv_hapc` — k-fold cross-validated fit (also dispatches
9
+ ``family="logit-hazard"``).
10
+ - :func:`hazard_hapc` — discrete-time logistic hazard from right-censored
11
+ survival data ``(X, T, Delta)`` (norm in {"1","2"}).
9
12
 
10
13
  Lower-level building blocks:
11
14
 
@@ -16,10 +19,11 @@ Lower-level building blocks:
16
19
  :func:`single_pcghal_classification`,
17
20
  :func:`single_pcghal_classification_ridge_only`
18
21
  - :func:`pcghal_cv`, :func:`pcghal_cv_classi`, :func:`fasthal_cv`
19
- - :func:`ate_hapc` — ATE estimate + Wald CI via HAPC + outcome undersmoothing.
22
+ - :func:`ate_hapc` — doubly-robust ATE estimate + Wald CI via HAPC nuisances,
23
+ with ``method="undersmooth"`` (default) or ``method="crossfit"`` (DML-style).
20
24
  """
21
25
 
22
- __version__ = "2.3.1"
26
+ __version__ = "2.5.0"
23
27
 
24
28
  from .core import (
25
29
  DesignOutput,
@@ -56,6 +60,7 @@ from .cv import (
56
60
  pcghal_cv_classi_lasso,
57
61
  )
58
62
  from .ate import ATEResult, ate_hapc
63
+ from .hazard import HazardResult, hazard_hapc
59
64
 
60
65
  __all__ = [
61
66
  "__version__",
@@ -63,6 +68,7 @@ __all__ = [
63
68
  "hapc",
64
69
  "cv_hapc",
65
70
  "ate_hapc",
71
+ "hazard_hapc",
66
72
  # design & kernels
67
73
  "design_hapc",
68
74
  "kernel_hapc",
@@ -87,6 +93,7 @@ __all__ = [
87
93
  # result types
88
94
  "ATEResult",
89
95
  "CVResult",
96
+ "HazardResult",
90
97
  "DesignOutput",
91
98
  "OptimizerOutput",
92
99
  "SingleLambdaResult",