hapc 0.1.2__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hapc/__init__.py ADDED
@@ -0,0 +1,33 @@
1
+ """HAPC: Highly Adaptive Principal Components."""
2
+
3
+ __version__ = "0.1.2"
4
+
5
+ from .core import (
6
+ pchal_design,
7
+ ridge_regression,
8
+ mkernel,
9
+ kernel_cross,
10
+ pcghal,
11
+ pcghal_classification,
12
+ fast_pchal,
13
+ )
14
+ from .single import single_lambda_fit, single_pcghal, hapc, SinglePcghalResult
15
+ from .cv import pcghal_cv, CVResult, fasthal_cv, cv_hapc
16
+
17
+ __all__ = [
18
+ "pchal_design",
19
+ "ridge_regression",
20
+ "mkernel",
21
+ "kernel_cross",
22
+ "pcghal",
23
+ "pcghal_classification",
24
+ "fast_pchal",
25
+ "single_lambda_fit",
26
+ "single_pcghal",
27
+ "hapc",
28
+ "SinglePcghalResult",
29
+ "pcghal_cv",
30
+ "cv_hapc",
31
+ "fasthal_cv",
32
+ "CVResult",
33
+ ]
hapc/core.py ADDED
@@ -0,0 +1,148 @@
1
+ """Python interface to shared HAPC C++ library."""
2
+
3
+ import numpy as np
4
+ from typing import NamedTuple, Optional
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ # Try to import hapc_core module
9
+ hapc_core = None
10
+
11
+ # Try direct import
12
+ try:
13
+ import hapc_core
14
+ except ImportError:
15
+ pass
16
+
17
+ # Try relative import
18
+ if hapc_core is None:
19
+ try:
20
+ from .. import hapc_core
21
+ except ImportError:
22
+ pass
23
+
24
+ # Try to find it in known locations
25
+ if hapc_core is None:
26
+ search_paths = [
27
+ Path(__file__).parent, # Same directory as this file
28
+ Path(__file__).parent.parent.parent / "build", # Build directory
29
+ ]
30
+
31
+ for path in search_paths:
32
+ if path.exists():
33
+ sys.path.insert(0, str(path))
34
+ try:
35
+ import hapc_core
36
+ break
37
+ except ImportError:
38
+ continue
39
+
40
+ if hapc_core is None:
41
+ raise ImportError(
42
+ "hapc_core module not found. The C++ extension may not be built.\n"
43
+ "Try: pip install -e . --force-reinstall --no-cache-dir"
44
+ )
45
+
46
+ class DesignOutput(NamedTuple):
47
+ """Output from pchal_design."""
48
+ H: np.ndarray
49
+ U: np.ndarray
50
+ d: np.ndarray
51
+ V: np.ndarray
52
+
53
+ class OptimizerOutput(NamedTuple):
54
+ """Output from optimizer functions."""
55
+ alpha: np.ndarray
56
+ alphaiters: np.ndarray
57
+ beta: np.ndarray
58
+ risk: float
59
+ iter: int
60
+
61
+ def _ensure_c_contiguous(arr: np.ndarray, dtype=np.float64) -> np.ndarray:
62
+ """Ensure array is C-contiguous double."""
63
+ arr = np.asarray(arr, dtype=dtype)
64
+ return np.ascontiguousarray(arr) if not arr.flags['C_CONTIGUOUS'] else arr
65
+
66
+ def pchal_design(X: np.ndarray, maxdeg: int, npc: int, center: bool = True) -> DesignOutput:
67
+ """Generate PC-HAL design components."""
68
+ X = _ensure_c_contiguous(X)
69
+ n = X.shape[0]
70
+
71
+ # Cap npc at n-1 when center=True (rank reduction due to centering)
72
+ # Cap npc at n when center=False
73
+ max_npc = n - 1 if center else n
74
+ npc = min(npc, max_npc)
75
+ npc = max(1, npc) # At least 1
76
+
77
+ result = hapc_core.pchal_des(X, int(maxdeg), int(npc), bool(center))
78
+ return DesignOutput(result.H, result.U, result.d, result.V)
79
+
80
+ def ridge_regression(Y: np.ndarray, U: np.ndarray, D2: np.ndarray,
81
+ lambda_: float) -> np.ndarray:
82
+ """Ridge regression solver."""
83
+ Y = _ensure_c_contiguous(Y, np.float64)
84
+ U = _ensure_c_contiguous(U, np.float64)
85
+ D2 = _ensure_c_contiguous(D2, np.float64)
86
+ return hapc_core.ridge_call(Y, U, D2, float(lambda_))
87
+
88
+ def mkernel(X: np.ndarray, m: int, center: bool = True) -> np.ndarray:
89
+ """Compute Haar-like kernel matrix."""
90
+ X = _ensure_c_contiguous(X)
91
+ return hapc_core.mkernel_call(X, int(m), bool(center))
92
+
93
+ def kernel_cross(Xtr: np.ndarray, Xte: np.ndarray, m: int,
94
+ center: bool = True) -> np.ndarray:
95
+ """Compute cross-kernel between training and test data."""
96
+ Xtr = _ensure_c_contiguous(Xtr)
97
+ Xte = _ensure_c_contiguous(Xte)
98
+ return hapc_core.kernel_cross_call(Xtr, Xte, int(m), bool(center))
99
+
100
+ def pcghal(Y: np.ndarray, Xtilde: np.ndarray, ENn: np.ndarray,
101
+ alpha0: np.ndarray, max_iter: int = 100, tol: float = 1e-6,
102
+ step_factor: float = 1.0, verbose: bool = False,
103
+ crit: str = "grad") -> OptimizerOutput:
104
+ """PC-GHAL optimizer (regression)."""
105
+ Y = _ensure_c_contiguous(Y)
106
+ Xtilde = _ensure_c_contiguous(Xtilde)
107
+ ENn = _ensure_c_contiguous(ENn)
108
+ alpha0 = _ensure_c_contiguous(alpha0)
109
+
110
+ result = hapc_core.pcghal_call(Y, Xtilde, ENn, alpha0, int(max_iter),
111
+ float(tol), float(step_factor), bool(verbose), str(crit))
112
+ return OptimizerOutput(result.alpha, result.alphaiters, result.beta,
113
+ result.risk, result.iter)
114
+
115
+ def pcghal_classification(Y: np.ndarray, Xtilde: np.ndarray, ENn: np.ndarray,
116
+ alpha0: np.ndarray, max_iter: int = 100,
117
+ tol: float = 1e-6, step_factor: float = 1.0,
118
+ verbose: bool = False) -> OptimizerOutput:
119
+ """PC-GHAL optimizer (classification)."""
120
+ Y = _ensure_c_contiguous(Y)
121
+ Xtilde = _ensure_c_contiguous(Xtilde)
122
+ ENn = _ensure_c_contiguous(ENn)
123
+ alpha0 = _ensure_c_contiguous(alpha0)
124
+
125
+ result = hapc_core.pcghal_classi_call(Y, Xtilde, ENn, alpha0, int(max_iter),
126
+ float(tol), float(step_factor), bool(verbose))
127
+ return OptimizerOutput(result.alpha, result.alphaiters, result.beta,
128
+ result.risk, result.iter)
129
+
130
+ def fast_pchal(U: np.ndarray, D2: np.ndarray, Y: np.ndarray,
131
+ lambda_: float) -> np.ndarray:
132
+ """Fast LASSO-type solver."""
133
+ U = _ensure_c_contiguous(U)
134
+ D2 = _ensure_c_contiguous(D2)
135
+ Y = _ensure_c_contiguous(Y)
136
+ return hapc_core.fast_pchal_call(U, D2, Y, float(lambda_))
137
+
138
+ __all__ = [
139
+ "DesignOutput",
140
+ "OptimizerOutput",
141
+ "pchal_design",
142
+ "ridge_regression",
143
+ "mkernel",
144
+ "kernel_cross",
145
+ "pcghal",
146
+ "pcghal_classification",
147
+ "fast_pchal",
148
+ ]
hapc/cv.py ADDED
@@ -0,0 +1,340 @@
1
+ """Cross-validation functions - calls C++ fasthal_cv_call."""
2
+
3
+ import numpy as np
4
+ from typing import Optional, NamedTuple
5
+ import ctypes
6
+ from .core import kernel_cross, mkernel
7
+
8
+ class CVResult(NamedTuple):
9
+ """Cross-validation result."""
10
+ mses: np.ndarray
11
+ lambdas: np.ndarray
12
+ best_lambda: float
13
+ best_model_alpha: np.ndarray
14
+ predictions: Optional[np.ndarray] = None
15
+
16
+ def pcghal_cv(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
17
+ lambdas: Optional[np.ndarray] = None,
18
+ log_lambda_min: float = -5,
19
+ log_lambda_max: float = -3,
20
+ grid_length: int = 10,
21
+ nfolds: int = 5,
22
+ predict: Optional[np.ndarray] = None,
23
+ center: bool = True, verbose: bool = False,
24
+ max_iter: int = 100, tol: float = 1e-6) -> CVResult:
25
+ """
26
+ Cross-validation for PC-GHAL with gradient descent optimizer.
27
+ Calls C++ pcghal_cv_fit directly (matches R pchal_cv_call).
28
+
29
+ Parameters
30
+ ----------
31
+ X : np.ndarray, shape (n, p)
32
+ Input features
33
+ Y : np.ndarray, shape (n,)
34
+ Response variable
35
+ maxdeg : int
36
+ Maximum degree of interactions
37
+ npc : int
38
+ Number of principal components
39
+ lambdas : np.ndarray, optional
40
+ Array of lambda regularization parameters to test.
41
+ If None, generates grid from log_lambda_min to log_lambda_max.
42
+ log_lambda_min : float, default=-5
43
+ Minimum log(lambda) for grid generation (if lambdas is None)
44
+ log_lambda_max : float, default=-3
45
+ Maximum log(lambda) for grid generation (if lambdas is None)
46
+ grid_length : int, default=10
47
+ Number of lambda values to generate (if lambdas is None)
48
+ nfolds : int, default=5
49
+ Number of folds for cross-validation
50
+ predict : np.ndarray, optional
51
+ Test data for predictions
52
+ center : bool, default=True
53
+ Center the design matrix
54
+ verbose : bool, default=False
55
+ Print progress information
56
+ max_iter : int, default=100
57
+ Maximum iterations for optimizer
58
+ tol : float, default=1e-6
59
+ Convergence tolerance
60
+
61
+ Returns
62
+ -------
63
+ CVResult
64
+ Cross-validation results with best lambda and predictions
65
+ """
66
+ from .core import _ensure_c_contiguous, hapc_core
67
+
68
+ X = _ensure_c_contiguous(X)
69
+ Y = _ensure_c_contiguous(Y)
70
+
71
+ # Generate lambda grid if not provided
72
+ if lambdas is None:
73
+ log_lambdas = np.linspace(log_lambda_min, log_lambda_max, grid_length)
74
+ lambdas = np.exp(log_lambdas)
75
+
76
+ lambdas = np.asarray(lambdas, dtype=np.float64)
77
+ n, p = X.shape
78
+
79
+ if predict is not None:
80
+ predict = _ensure_c_contiguous(predict)
81
+ else:
82
+ predict = np.array([], dtype=np.float64).reshape(0, p)
83
+
84
+ if verbose:
85
+ print("=" * 60)
86
+ print("PC-GHAL Cross-Validation (C++ Implementation)")
87
+ print("=" * 60)
88
+ print(f"Lambda grid: {len(lambdas)} values from {lambdas.min():.6f} to {lambdas.max():.6f}")
89
+
90
+ # Call C++ pcghal_cv_fit directly
91
+ result_cpp = hapc_core.pcghal_cv_fit(
92
+ X, Y, maxdeg, npc, lambdas.tolist(), nfolds,
93
+ predict,
94
+ max_iter, tol, 1.0, verbose, "risk", center, False
95
+ )
96
+
97
+ # Extract predictions
98
+ predictions_out = None
99
+ if predict.shape[0] > 0 and result_cpp.predictions.size > 0:
100
+ predictions_out = result_cpp.predictions
101
+
102
+ return CVResult(
103
+ mses=np.array(result_cpp.mses),
104
+ lambdas=np.array(result_cpp.lambdas),
105
+ best_lambda=result_cpp.best_lambda,
106
+ best_model_alpha=result_cpp.best_alpha,
107
+ predictions=predictions_out
108
+ )
109
+
110
+
111
+ def fasthal_cv(X: np.ndarray, Y: np.ndarray, npc: int,
112
+ lambdas: np.ndarray, nfolds: int = 5,
113
+ predict: Optional[np.ndarray] = None,
114
+ maxdeg: int = 1, center: bool = True,
115
+ approx: bool = False, l1: bool = False) -> CVResult:
116
+ """
117
+ Fast cross-validation with L1 (LASSO) or L2 (Ridge) penalties.
118
+ Matches R cv.hapc with norm="1" or norm="2".
119
+
120
+ Parameters
121
+ ----------
122
+ X : np.ndarray, shape (n, p)
123
+ Input features
124
+ Y : np.ndarray, shape (n,)
125
+ Response variable
126
+ npc : int
127
+ Number of principal components
128
+ lambdas : np.ndarray
129
+ Array of lambda regularization parameters to test
130
+ nfolds : int, default=5
131
+ Number of folds for cross-validation
132
+ predict : np.ndarray, optional
133
+ Test data for predictions
134
+ maxdeg : int, default=1
135
+ Maximum degree of interactions
136
+ center : bool, default=True
137
+ Center the design matrix
138
+ approx : bool, default=False
139
+ Use approximate eigendecomposition
140
+ l1 : bool, default=False
141
+ Use L1 penalty (LASSO), otherwise L2 (Ridge)
142
+
143
+ Returns
144
+ -------
145
+ CVResult
146
+ Cross-validation results with best lambda and predictions
147
+ """
148
+ from .single import single_lambda_fit
149
+ from .core import _ensure_c_contiguous, pchal_design
150
+ from sklearn.model_selection import KFold
151
+
152
+ X = _ensure_c_contiguous(X)
153
+ Y = _ensure_c_contiguous(Y)
154
+ lambdas = np.asarray(lambdas, dtype=np.float64)
155
+ n, p = X.shape
156
+
157
+ if predict is not None:
158
+ predict = _ensure_c_contiguous(predict)
159
+ else:
160
+ predict = np.array([], dtype=np.float64).reshape(0, p)
161
+
162
+ # CV loop
163
+ cv = KFold(n_splits=nfolds, shuffle=True, random_state=42)
164
+ cv_mses = np.zeros((nfolds, len(lambdas)))
165
+
166
+ fold_idx = 0
167
+ for train_idx, test_idx in cv.split(X):
168
+ X_train, X_test = X[train_idx], X[test_idx]
169
+ Y_train, Y_test = Y[train_idx], Y[test_idx]
170
+
171
+ for j, lam in enumerate(lambdas):
172
+ # Fit on train
173
+ result = single_lambda_fit(X_train, Y_train, maxdeg=maxdeg,
174
+ npc=npc, single_lambda=lam,
175
+ center=center, approx=approx, l1=l1)
176
+
177
+ # Predict on test
178
+ if X_test.shape[0] > 0 and result.alpha is not None:
179
+ # Make predictions on test set using predict parameter
180
+ result_test = single_lambda_fit(X_train, Y_train, maxdeg=maxdeg,
181
+ npc=npc, single_lambda=lam,
182
+ predict=X_test,
183
+ center=center, approx=approx, l1=l1)
184
+
185
+ if result_test.predictions is not None:
186
+ y_pred = result_test.predictions
187
+ cv_mses[fold_idx, j] = np.mean((Y_test - y_pred) ** 2)
188
+ else:
189
+ cv_mses[fold_idx, j] = np.inf
190
+ else:
191
+ cv_mses[fold_idx, j] = np.inf
192
+
193
+ fold_idx += 1
194
+
195
+ # Average CV MSE
196
+ mean_mses = np.nanmean(cv_mses, axis=0)
197
+ best_idx = np.nanargmin(mean_mses)
198
+ best_lambda = lambdas[best_idx]
199
+
200
+ # Refit on full data with best lambda
201
+ result_final = single_lambda_fit(X, Y, maxdeg=maxdeg, npc=npc,
202
+ single_lambda=best_lambda, center=center,
203
+ approx=approx, l1=l1)
204
+
205
+ # Predictions on test set if provided
206
+ predictions_out = None
207
+ if predict is not None and predict.shape[0] > 0:
208
+ K_pred = kernel_cross(X, predict, m=maxdeg, center=center)
209
+ K = mkernel(X, m=maxdeg, center=center)
210
+
211
+ evals, evecs = np.linalg.eigh(K)
212
+ des = pchal_design(X, maxdeg=maxdeg, npc=npc, center=center)
213
+ final_npc = des.d.shape[0]
214
+
215
+ idx = np.argsort(-evals)[:final_npc]
216
+ U = evecs[:, idx]
217
+ D = np.sqrt(evals[idx])
218
+ D_inv = np.diag(1.0 / (D + 1e-12))
219
+
220
+ predictions_out = K_pred @ U @ D_inv @ result_final.alpha
221
+
222
+ if center:
223
+ predictions_out += Y.mean()
224
+
225
+ return CVResult(
226
+ mses=mean_mses,
227
+ lambdas=lambdas,
228
+ best_lambda=best_lambda,
229
+ best_model_alpha=result_final.alpha,
230
+ predictions=predictions_out
231
+ )
232
+
233
+
234
+ def cv_hapc(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
235
+ log_lambda_min: float = -5, log_lambda_max: float = -3,
236
+ grid_length: int = 10, nfolds: int = 5,
237
+ norm: str = "sv", predict: Optional[np.ndarray] = None,
238
+ center: bool = True, approx: bool = False,
239
+ verbose: bool = False, max_iter: int = 100,
240
+ tol: float = 1e-6) -> CVResult:
241
+ """
242
+ High-level cross-validation dispatcher matching R cv.hapc().
243
+
244
+ Automatically generates lambda grid and routes to appropriate solver
245
+ based on norm parameter.
246
+
247
+ Parameters
248
+ ----------
249
+ X : np.ndarray, shape (n, p)
250
+ Input features
251
+ Y : np.ndarray, shape (n,)
252
+ Response variable
253
+ maxdeg : int
254
+ Maximum degree of interactions
255
+ npc : int
256
+ Number of principal components
257
+ log_lambda_min : float, default=-5
258
+ Minimum log(lambda) for grid generation
259
+ log_lambda_max : float, default=-3
260
+ Maximum log(lambda) for grid generation
261
+ grid_length : int, default=10
262
+ Number of lambda values to generate
263
+ nfolds : int, default=5
264
+ Number of CV folds
265
+ norm : str, default="sv"
266
+ Normalization/solver type:
267
+ - "sv": Gradient descent (PC-GHAL) via pcghal_cv
268
+ - "1": L1 penalty (LASSO) via fasthal_cv with l1=True
269
+ - "2": L2 penalty (Ridge) via fasthal_cv with l1=False
270
+ predict : np.ndarray, optional
271
+ Test data for predictions (shape: (m, p))
272
+ center : bool, default=True
273
+ Center the design matrix
274
+ approx : bool, default=False
275
+ Use approximate eigendecomposition (for norm="1" or "2")
276
+ verbose : bool, default=False
277
+ Print progress information
278
+ max_iter : int, default=100
279
+ Maximum iterations for optimizer (norm="sv" only)
280
+ tol : float, default=1e-6
281
+ Convergence tolerance (norm="sv" only)
282
+
283
+ Returns
284
+ -------
285
+ CVResult
286
+ Cross-validation results with fields:
287
+ - mses: MSE for each lambda
288
+ - lambdas: Lambda values tested
289
+ - best_lambda: Optimal lambda
290
+ - best_model_alpha: Coefficients for best model
291
+ - predictions: Predictions on test set (if predict provided)
292
+
293
+ Examples
294
+ --------
295
+ >>> # Gradient descent (PC-GHAL)
296
+ >>> cv_sv = cv_hapc(X, Y, maxdeg=2, npc=10, norm="sv")
297
+
298
+ >>> # Ridge regression
299
+ >>> cv_l2 = cv_hapc(X, Y, maxdeg=2, npc=10, norm="2")
300
+
301
+ >>> # LASSO
302
+ >>> cv_l1 = cv_hapc(X, Y, maxdeg=2, npc=10, norm="1")
303
+
304
+ >>> # With predictions
305
+ >>> cv_sv = cv_hapc(X, Y, maxdeg=2, npc=10, norm="sv", predict=Xnew)
306
+ """
307
+ # Generate lambda grid from log scale
308
+ log_lambdas = np.linspace(log_lambda_min, log_lambda_max, grid_length)
309
+ lambdas = np.exp(log_lambdas)
310
+
311
+ if verbose:
312
+ print(f"CV with norm='{norm}'")
313
+ print(f"Lambda grid: {len(lambdas)} values from {lambdas.min():.6f} to {lambdas.max():.6f}")
314
+
315
+ if norm == "sv":
316
+ # Gradient descent optimizer (PC-GHAL)
317
+ if verbose:
318
+ print("Using PC-GHAL gradient descent optimizer")
319
+ return pcghal_cv(X, Y, maxdeg, npc, lambdas=lambdas, nfolds=nfolds,
320
+ predict=predict, center=center, verbose=verbose,
321
+ max_iter=max_iter, tol=tol)
322
+
323
+ elif norm == "1":
324
+ # L1 penalty (LASSO)
325
+ if verbose:
326
+ print("Using L1 penalty (LASSO soft-thresholding)")
327
+ return fasthal_cv(X, Y, npc, lambdas, nfolds=nfolds,
328
+ predict=predict, maxdeg=maxdeg, center=center,
329
+ approx=approx, l1=True)
330
+
331
+ elif norm == "2":
332
+ # L2 penalty (Ridge)
333
+ if verbose:
334
+ print("Using L2 penalty (Ridge regression)")
335
+ return fasthal_cv(X, Y, npc, lambdas, nfolds=nfolds,
336
+ predict=predict, maxdeg=maxdeg, center=center,
337
+ approx=approx, l1=False)
338
+
339
+ else:
340
+ raise ValueError(f"Unknown norm='{norm}'. Must be 'sv', '1', or '2'")
hapc/single.py ADDED
@@ -0,0 +1,259 @@
1
+ """Single lambda model fitting - wraps C++ single_lambda_pchar."""
2
+
3
+ import numpy as np
4
+ from typing import Optional, NamedTuple
5
+ from .core import (pchal_design, ridge_regression, kernel_cross, mkernel,
6
+ pcghal, pcghal_classification)
7
+
8
+ class SingleLambdaResult(NamedTuple):
9
+ """Result from single_lambda_pchar."""
10
+ alpha: np.ndarray
11
+ predictions: Optional[np.ndarray] = None
12
+ lambda_: float = None
13
+ optimizer_output: Optional[NamedTuple] = None # Full optimizer output for norm="sv"
14
+
15
+ class SinglePcghalResult(NamedTuple):
16
+ """Result from single_pcghal (gradient descent optimizer)."""
17
+ alpha: np.ndarray
18
+ predictions: Optional[np.ndarray] = None
19
+ lambda_: float = None
20
+ optimizer_output: Optional[NamedTuple] = None
21
+ risk: float = None
22
+ iter: int = None
23
+
24
+ def single_lambda_fit(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
25
+ single_lambda: float, predict: Optional[np.ndarray] = None,
26
+ center: bool = True, approx: bool = False, l1: bool = False) -> SingleLambdaResult:
27
+ """
28
+ Fit model with single lambda using either L1 or L2 penalty.
29
+ Mirrors C++ single_lambda_pchar implementation.
30
+
31
+ Parameters
32
+ ----------
33
+ X : np.ndarray, shape (n, p)
34
+ Input features
35
+ Y : np.ndarray, shape (n,)
36
+ Response variable
37
+ maxdeg : int
38
+ Maximum degree of interactions
39
+ npc : int
40
+ Number of principal components
41
+ single_lambda : float
42
+ Regularization parameter
43
+ predict : np.ndarray, optional
44
+ Test data for predictions
45
+ center : bool, default=True
46
+ Center the design matrix
47
+ approx : bool, default=False
48
+ Use approximate eigendecomposition
49
+ l1 : bool, default=False
50
+ Use L1 penalty (LASSO), otherwise L2 (Ridge)
51
+
52
+ Returns
53
+ -------
54
+ SingleLambdaResult
55
+ Named tuple with alpha coefficients and optional predictions
56
+ """
57
+ X = np.asarray(X, dtype=np.float64, order='C')
58
+ Y = np.asarray(Y, dtype=np.float64, order='C')
59
+ n, p = X.shape
60
+
61
+ # Generate design
62
+ des = pchal_design(X, maxdeg, npc, center=center)
63
+ final_npc = des.d.shape[0]
64
+
65
+ # Kernel matrix
66
+ K = mkernel(X, maxdeg, center=center)
67
+
68
+ # Eigendecomposition
69
+ evals, evecs = np.linalg.eigh(K)
70
+ idx = np.argsort(-evals)[:final_npc]
71
+ U = evecs[:, idx]
72
+ D = np.sqrt(evals[idx]) # Singular values (square root of eigenvalues)
73
+ D2 = evals[idx] # Eigenvalues for ridge_call
74
+
75
+ # Xtilde = U * D (singular values)
76
+ Xtilde = U @ np.diag(D)
77
+
78
+ # Center Y
79
+ ymean = Y.mean() if center else 0.0
80
+ Y_centered = Y - ymean if center else Y
81
+
82
+ # Solve
83
+ if l1:
84
+ # LASSO: use fast_pchal_call logic
85
+ from .core import fast_pchal
86
+ alpha = fast_pchal(U, D2, Y_centered, single_lambda)
87
+ else:
88
+ # Ridge: use ridge_call logic
89
+ from .core import ridge_regression
90
+ alpha = ridge_regression(Y_centered, U, D2, single_lambda)
91
+
92
+ # Predictions
93
+ predictions_out = None
94
+ if predict is not None:
95
+ predict = np.asarray(predict, dtype=np.float64, order='C')
96
+ if predict.shape[1] != p:
97
+ raise ValueError(f"predict must have {p} columns")
98
+
99
+ Ktest = kernel_cross(X, predict, maxdeg, center=center)
100
+ D2_inv_sqrt = np.diag(1.0 / np.sqrt(D2 + 1e-12))
101
+ predictions_out = Ktest @ U @ D2_inv_sqrt @ alpha
102
+
103
+ if center:
104
+ predictions_out += ymean
105
+
106
+ return SingleLambdaResult(alpha=alpha, predictions=predictions_out, lambda_=single_lambda)
107
+
108
+
109
+ def single_pcghal(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
110
+ single_lambda: float, predict: Optional[np.ndarray] = None,
111
+ center: bool = True, approx: bool = False, verbose: bool = False,
112
+ max_iter: int = 100, tol: float = 1e-6) -> SinglePcghalResult:
113
+ """
114
+ Fit model with single lambda using gradient descent optimizer (PC-GHAL).
115
+ This is the high-level interface matching R's hapc() with norm="sv".
116
+ **Calls the C++ single_pcghal_fit function directly.**
117
+
118
+ Parameters
119
+ ----------
120
+ X : np.ndarray, shape (n, p)
121
+ Input features
122
+ Y : np.ndarray, shape (n,)
123
+ Response variable
124
+ maxdeg : int
125
+ Maximum degree of interactions
126
+ npc : int
127
+ Number of principal components
128
+ single_lambda : float
129
+ Regularization parameter
130
+ predict : np.ndarray, optional
131
+ Test data for predictions
132
+ center : bool, default=True
133
+ Center the design matrix
134
+ approx : bool, default=False
135
+ Use approximate eigendecomposition
136
+ verbose : bool, default=False
137
+ Print iteration details
138
+ max_iter : int, default=100
139
+ Maximum iterations for gradient descent
140
+ tol : float, default=1e-6
141
+ Convergence tolerance
142
+
143
+ Returns
144
+ -------
145
+ SinglePcghalResult
146
+ Named tuple with alpha, predictions, optimizer output, and convergence info
147
+ """
148
+ from .core import _ensure_c_contiguous, hapc_core
149
+
150
+ X = _ensure_c_contiguous(X)
151
+ Y = _ensure_c_contiguous(Y)
152
+ n, p = X.shape
153
+
154
+ # Prepare prediction data
155
+ if predict is not None:
156
+ predict = _ensure_c_contiguous(predict)
157
+ if predict.shape[1] != p:
158
+ raise ValueError(f"predict must have {p} columns")
159
+ predict_data = predict
160
+ else:
161
+ # Empty matrix for no predictions
162
+ predict_data = np.array([], dtype=np.float64).reshape(0, p)
163
+
164
+ if verbose:
165
+ print("=" * 60)
166
+ print("PC-GHAL Single Lambda Optimization (C++ Implementation)")
167
+ print("=" * 60)
168
+ print()
169
+
170
+ # Call C++ single_pcghal_fit directly
171
+ result_cpp = hapc_core.single_pcghal_fit(
172
+ X, Y, maxdeg, npc, single_lambda, predict_data,
173
+ max_iter, tol, 1.0, verbose, "grad", center, approx
174
+ )
175
+
176
+ # Extract predictions
177
+ predictions_out = None
178
+ if predict is not None and result_cpp.predictions.size > 0:
179
+ predictions_out = result_cpp.predictions
180
+
181
+ return SinglePcghalResult(
182
+ alpha=result_cpp.alpha,
183
+ predictions=predictions_out,
184
+ lambda_=single_lambda,
185
+ optimizer_output=None, # Not available from C++ output
186
+ risk=result_cpp.risk,
187
+ iter=result_cpp.iter
188
+ )
189
+
190
+
191
+
192
+ def hapc(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
193
+ single_lambda: float, norm: str = "sv", predict: Optional[np.ndarray] = None,
194
+ center: bool = True, approx: bool = False, verbose: bool = False,
195
+ max_iter: int = 100, tol: float = 1e-6) -> SinglePcghalResult:
196
+ """
197
+ High-level interface matching R's hapc() function.
198
+ Dispatches to appropriate solver based on norm parameter.
199
+
200
+ Parameters
201
+ ----------
202
+ X : np.ndarray, shape (n, p)
203
+ Input features
204
+ Y : np.ndarray, shape (n,)
205
+ Response variable
206
+ maxdeg : int
207
+ Maximum degree of interactions
208
+ npc : int
209
+ Number of principal components
210
+ single_lambda : float
211
+ Regularization parameter
212
+ norm : str, default="sv"
213
+ Normalization/solver type:
214
+ - "1": L1 penalty (LASSO soft-thresholding)
215
+ - "2": L2 penalty (Ridge regression, closed-form)
216
+ - "sv": Supervised (gradient descent optimizer PC-GHAL)
217
+ predict : np.ndarray, optional
218
+ Test data for predictions
219
+ center : bool, default=True
220
+ Center the design matrix
221
+ approx : bool, default=False
222
+ Use approximate eigendecomposition
223
+ verbose : bool, default=False
224
+ Print iteration details
225
+ max_iter : int, default=100
226
+ Maximum iterations (for norm="sv")
227
+ tol : float, default=1e-6
228
+ Convergence tolerance (for norm="sv")
229
+
230
+ Returns
231
+ -------
232
+ SinglePcghalResult or SingleLambdaResult
233
+ Fit results with predictions
234
+ """
235
+ if verbose:
236
+ print(f"HAPC with norm='{norm}'")
237
+
238
+ if norm == "1":
239
+ # L1 (LASSO)
240
+ if verbose:
241
+ print("Using L1 penalty (soft-thresholding)")
242
+ return single_lambda_fit(X, Y, maxdeg, npc, single_lambda, predict=predict,
243
+ center=center, approx=approx, l1=True)
244
+ elif norm == "2":
245
+ # L2 (Ridge)
246
+ if verbose:
247
+ print("Using L2 penalty (ridge regression)")
248
+ return single_lambda_fit(X, Y, maxdeg, npc, single_lambda, predict=predict,
249
+ center=center, approx=approx, l1=False)
250
+ elif norm == "sv":
251
+ # Supervised (Gradient Descent)
252
+ if verbose:
253
+ print("Using gradient descent optimizer (PC-GHAL)")
254
+ return single_pcghal(X, Y, maxdeg, npc, single_lambda, predict=predict,
255
+ center=center, approx=approx, verbose=verbose,
256
+ max_iter=max_iter, tol=tol)
257
+ else:
258
+ raise ValueError(f"Unknown norm='{norm}'. Must be '1', '2', or 'sv'")
259
+
@@ -0,0 +1,2 @@
1
+ YEAR: 2025
2
+ COPYRIGHT HOLDER: Carlos García Meixide
@@ -0,0 +1,210 @@
1
+ Metadata-Version: 2.1
2
+ Name: hapc
3
+ Version: 0.1.2
4
+ Summary: Hierarchical Additive Polynomial Complexity regression
5
+ Home-page: https://github.com/meixide/hapc
6
+ Author: Carlos García Meixide
7
+ Author-email: Carlos García Meixide <cgmeixide@gmail.com>
8
+ License: YEAR: 2025
9
+ COPYRIGHT HOLDER: Carlos García Meixide
10
+ Project-URL: Homepage, https://github.com/meixide/hapc
11
+ Project-URL: Documentation, https://github.com/meixide/hapc#readme
12
+ Project-URL: Repository, https://github.com/meixide/hapc.git
13
+ Project-URL: Issues, https://github.com/meixide/hapc/issues
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Operating System :: OS Independent
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: numpy<2.3,>=1.24
25
+ Requires-Dist: scipy>=1.7
26
+ Requires-Dist: scikit-learn>=0.24
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest; extra == "dev"
29
+ Requires-Dist: pytest-cov; extra == "dev"
30
+ Requires-Dist: black; extra == "dev"
31
+ Requires-Dist: flake8; extra == "dev"
32
+
33
+ # HAPC: Highly Adaptive Prinicipal Components
34
+
35
+ A fast and flexible machine learning library for nonparametric high-dimensional regression and classification with guarantees.
36
+
37
+ ## Installation
38
+
39
+ ### Prerequisites
40
+
41
+ - Python 3.8+
42
+ - C++ compiler (g++, clang, or MSVC)
43
+ - CMake 3.15+
44
+ - Eigen3
45
+
46
+ ### Quick Install
47
+
48
+ ```bash
49
+ pip install hapc
50
+ ```
51
+
52
+ ### Install from GitHub (latest development version)
53
+
54
+ ```bash
55
+ pip install git+https://github.com/yourusername/hapc.git
56
+ ```
57
+
58
+ Or with editable install for development:
59
+
60
+ ```bash
61
+ git clone https://github.com/yourusername/hapc.git
62
+ cd hapc
63
+ pip install -e .
64
+ ```
65
+
66
+ ### Install build dependencies
67
+
68
+ If installation fails, you may need to install build dependencies:
69
+
70
+ **macOS:**
71
+ ```bash
72
+ brew install cmake eigen
73
+ ```
74
+
75
+ **Ubuntu/Debian:**
76
+ ```bash
77
+ sudo apt-get install cmake libeigen3-dev build-essential
78
+ ```
79
+
80
+ **Windows:**
81
+ ```bash
82
+ pip install cmake
83
+ # Install Visual Studio Build Tools or use conda
84
+ conda install -c conda-forge eigen
85
+ ```
86
+
87
+ ## Quick Start
88
+
89
+ ```python
90
+ import numpy as np
91
+ from hapc.single import single_pcghal
92
+ from hapc.cv import pcghal_cv
93
+
94
+ # Generate sample data
95
+ X = np.random.randn(100, 5)
96
+ Y = X[:, 0] + 0.5 * X[:, 1] + np.random.randn(100) * 0.1
97
+
98
+ # Single fit with fixed lambda
99
+ result = single_pcghal(X, Y, maxdeg=2, npc=5, single_lambda=0.01)
100
+ print(f"Risk: {result.optimizer_output.risk:.6f}")
101
+
102
+ # Cross-validation to select lambda
103
+ lambdas = np.logspace(-4, 0, 10)
104
+ cv_result = pcghal_cv(X, Y, maxdeg=2, npc=5, lambdas=lambdas, nfolds=5)
105
+ print(f"Best lambda: {cv_result.best_lambda:.6f}")
106
+
107
+ # Make predictions
108
+ X_test = np.random.randn(20, 5)
109
+ result = single_pcghal(X, Y, maxdeg=2, npc=5, single_lambda=0.01, predict=X_test)
110
+ print(f"Predictions: {result.predictions}")
111
+ ```
112
+
113
+ ## Usage
114
+
115
+ ### Regression
116
+
117
+ ```python
118
+ from hapc.single import single_pcghal
119
+
120
+ result = single_pcghal(
121
+ X, Y,
122
+ maxdeg=2, # Maximum degree of interactions
123
+ npc=10, # Number of principal components
124
+ single_lambda=0.01,
125
+ predict=X_test # Optional: test data for predictions
126
+ )
127
+ ```
128
+
129
+ ### Classification
130
+
131
+ ```python
132
+ from hapc.single import single_pcghal
133
+
134
+ result = single_pcghal(
135
+ X, Y_binary,
136
+ maxdeg=2,
137
+ npc=10,
138
+ single_lambda=0.01,
139
+ predict=X_test
140
+ )
141
+ ```
142
+
143
+ ### Cross-Validation
144
+
145
+ ```python
146
+ from hapc.cv import pcghal_cv
147
+
148
+ cv_result = pcghal_cv(
149
+ X, Y,
150
+ maxdeg=2,
151
+ npc=10,
152
+ lambdas=np.logspace(-4, 0, 20),
153
+ nfolds=5
154
+ )
155
+ print(cv_result.best_lambda)
156
+ ```
157
+
158
+ ## API Reference
159
+
160
+ ### `hapc.single.single_pcghal()`
161
+
162
+ Fit PC-GHAL with a single lambda value.
163
+
164
+ **Parameters:**
165
+ - `X` (ndarray, shape (n, p)): Input features
166
+ - `Y` (ndarray, shape (n,)): Response variable
167
+ - `maxdeg` (int): Maximum degree of interactions
168
+ - `npc` (int): Number of principal components
169
+ - `single_lambda` (float): Regularization parameter
170
+ - `max_iter` (int, default=100): Maximum iterations
171
+ - `tol` (float, default=1e-6): Convergence tolerance
172
+ - `verbose` (bool, default=False): Print progress
173
+ - `predict` (ndarray, optional): Test data for predictions
174
+ - `center` (bool, default=True): Center the design matrix
175
+
176
+ **Returns:**
177
+ - `result.optimizer_output.alpha`: Coefficients
178
+ - `result.optimizer_output.risk`: Final risk
179
+ - `result.optimizer_output.iter`: Iterations until convergence
180
+ - `result.predictions`: Predictions on test data (if provided)
181
+
182
+ ### `hapc.cv.pcghal_cv()`
183
+
184
+ Cross-validation to select lambda.
185
+
186
+ **Parameters:**
187
+ - `lambdas` (ndarray): Grid of lambda values to test
188
+ - `nfolds` (int, default=5): Number of CV folds
189
+ - ...other parameters same as `single_pcghal`
190
+
191
+ **Returns:**
192
+ - `cv_result.best_lambda`: Optimal lambda
193
+ - `cv_result.mses`: CV errors for each lambda
194
+ - `cv_result.best_model`: Fitted model with best lambda
195
+ - `cv_result.predictions`: Predictions on test data (if provided)
196
+
197
+ ## Contributing
198
+
199
+ Contributions welcome! The C++ core is shared between R and Python packages.
200
+
201
+ ```bash
202
+ git clone https://github.com/yourusername/hapc.git
203
+ cd hapc
204
+ pip install -e .
205
+ pytest
206
+ ```
207
+
208
+ ## License
209
+
210
+ MIT License - see LICENSE file
@@ -0,0 +1,9 @@
1
+ hapc/__init__.py,sha256=88Qoy-X9m1I-CyiJaHP0NZaZEok4_dcX9SKTuyQEtzA,696
2
+ hapc/core.py,sha256=Uw75VwMOQ-4RoNjNAAA34TDTQxYtFxeRKOe8ZIToB2c,5184
3
+ hapc/cv.py,sha256=M5_TB7oPyaZBoILY2cf6R2CMoXhHUSnGanNgAl6g3rk,12424
4
+ hapc/single.py,sha256=_KWfTPLEcvMc8hWllyjGARkxYznSDNhhOn8qv0kURVA,9149
5
+ hapc-0.1.2.dist-info/LICENSE,sha256=xsHdXeHSuT_rM0lZucaImzebBKKguXPMxSfXZSv_Tto,52
6
+ hapc-0.1.2.dist-info/METADATA,sha256=9O0ByFtDKJOhi_J83LpOzEkOYG8HCg4obqZBOpb8QM8,5413
7
+ hapc-0.1.2.dist-info/WHEEL,sha256=q-8g9petFnV9NMO9vJ2udYt2PFlnDjFGlAmDhUgQ79c,99
8
+ hapc-0.1.2.dist-info/top_level.txt,sha256=wlXqf3dR7pUrCp1U_cLzGyKzGcVoinK6DjtIMqHWGDA,5
9
+ hapc-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.3.3)
3
+ Root-Is-Purelib: false
4
+ Tag: cp38-cp38-win_amd64
5
+
@@ -0,0 +1 @@
1
+ hapc