hapc 0.1.2__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hapc/__init__.py +33 -0
- hapc/core.py +148 -0
- hapc/cv.py +340 -0
- hapc/single.py +259 -0
- hapc-0.1.2.dist-info/METADATA +214 -0
- hapc-0.1.2.dist-info/RECORD +9 -0
- hapc-0.1.2.dist-info/WHEEL +5 -0
- hapc-0.1.2.dist-info/licenses/LICENSE +2 -0
- hapc-0.1.2.dist-info/top_level.txt +1 -0
hapc/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""HAPC: Highly Adaptive Principal Components."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.2"
|
|
4
|
+
|
|
5
|
+
from .core import (
|
|
6
|
+
pchal_design,
|
|
7
|
+
ridge_regression,
|
|
8
|
+
mkernel,
|
|
9
|
+
kernel_cross,
|
|
10
|
+
pcghal,
|
|
11
|
+
pcghal_classification,
|
|
12
|
+
fast_pchal,
|
|
13
|
+
)
|
|
14
|
+
from .single import single_lambda_fit, single_pcghal, hapc, SinglePcghalResult
|
|
15
|
+
from .cv import pcghal_cv, CVResult, fasthal_cv, cv_hapc
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"pchal_design",
|
|
19
|
+
"ridge_regression",
|
|
20
|
+
"mkernel",
|
|
21
|
+
"kernel_cross",
|
|
22
|
+
"pcghal",
|
|
23
|
+
"pcghal_classification",
|
|
24
|
+
"fast_pchal",
|
|
25
|
+
"single_lambda_fit",
|
|
26
|
+
"single_pcghal",
|
|
27
|
+
"hapc",
|
|
28
|
+
"SinglePcghalResult",
|
|
29
|
+
"pcghal_cv",
|
|
30
|
+
"cv_hapc",
|
|
31
|
+
"fasthal_cv",
|
|
32
|
+
"CVResult",
|
|
33
|
+
]
|
hapc/core.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Python interface to shared HAPC C++ library."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from typing import NamedTuple, Optional
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# Try to import hapc_core module
|
|
9
|
+
hapc_core = None
|
|
10
|
+
|
|
11
|
+
# Try direct import
|
|
12
|
+
try:
|
|
13
|
+
import hapc_core
|
|
14
|
+
except ImportError:
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
# Try relative import
|
|
18
|
+
if hapc_core is None:
|
|
19
|
+
try:
|
|
20
|
+
from .. import hapc_core
|
|
21
|
+
except ImportError:
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
# Try to find it in known locations
|
|
25
|
+
if hapc_core is None:
|
|
26
|
+
search_paths = [
|
|
27
|
+
Path(__file__).parent, # Same directory as this file
|
|
28
|
+
Path(__file__).parent.parent.parent / "build", # Build directory
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
for path in search_paths:
|
|
32
|
+
if path.exists():
|
|
33
|
+
sys.path.insert(0, str(path))
|
|
34
|
+
try:
|
|
35
|
+
import hapc_core
|
|
36
|
+
break
|
|
37
|
+
except ImportError:
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
if hapc_core is None:
|
|
41
|
+
raise ImportError(
|
|
42
|
+
"hapc_core module not found. The C++ extension may not be built.\n"
|
|
43
|
+
"Try: pip install -e . --force-reinstall --no-cache-dir"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
class DesignOutput(NamedTuple):
|
|
47
|
+
"""Output from pchal_design."""
|
|
48
|
+
H: np.ndarray
|
|
49
|
+
U: np.ndarray
|
|
50
|
+
d: np.ndarray
|
|
51
|
+
V: np.ndarray
|
|
52
|
+
|
|
53
|
+
class OptimizerOutput(NamedTuple):
|
|
54
|
+
"""Output from optimizer functions."""
|
|
55
|
+
alpha: np.ndarray
|
|
56
|
+
alphaiters: np.ndarray
|
|
57
|
+
beta: np.ndarray
|
|
58
|
+
risk: float
|
|
59
|
+
iter: int
|
|
60
|
+
|
|
61
|
+
def _ensure_c_contiguous(arr: np.ndarray, dtype=np.float64) -> np.ndarray:
|
|
62
|
+
"""Ensure array is C-contiguous double."""
|
|
63
|
+
arr = np.asarray(arr, dtype=dtype)
|
|
64
|
+
return np.ascontiguousarray(arr) if not arr.flags['C_CONTIGUOUS'] else arr
|
|
65
|
+
|
|
66
|
+
def pchal_design(X: np.ndarray, maxdeg: int, npc: int, center: bool = True) -> DesignOutput:
|
|
67
|
+
"""Generate PC-HAL design components."""
|
|
68
|
+
X = _ensure_c_contiguous(X)
|
|
69
|
+
n = X.shape[0]
|
|
70
|
+
|
|
71
|
+
# Cap npc at n-1 when center=True (rank reduction due to centering)
|
|
72
|
+
# Cap npc at n when center=False
|
|
73
|
+
max_npc = n - 1 if center else n
|
|
74
|
+
npc = min(npc, max_npc)
|
|
75
|
+
npc = max(1, npc) # At least 1
|
|
76
|
+
|
|
77
|
+
result = hapc_core.pchal_des(X, int(maxdeg), int(npc), bool(center))
|
|
78
|
+
return DesignOutput(result.H, result.U, result.d, result.V)
|
|
79
|
+
|
|
80
|
+
def ridge_regression(Y: np.ndarray, U: np.ndarray, D2: np.ndarray,
|
|
81
|
+
lambda_: float) -> np.ndarray:
|
|
82
|
+
"""Ridge regression solver."""
|
|
83
|
+
Y = _ensure_c_contiguous(Y, np.float64)
|
|
84
|
+
U = _ensure_c_contiguous(U, np.float64)
|
|
85
|
+
D2 = _ensure_c_contiguous(D2, np.float64)
|
|
86
|
+
return hapc_core.ridge_call(Y, U, D2, float(lambda_))
|
|
87
|
+
|
|
88
|
+
def mkernel(X: np.ndarray, m: int, center: bool = True) -> np.ndarray:
|
|
89
|
+
"""Compute Haar-like kernel matrix."""
|
|
90
|
+
X = _ensure_c_contiguous(X)
|
|
91
|
+
return hapc_core.mkernel_call(X, int(m), bool(center))
|
|
92
|
+
|
|
93
|
+
def kernel_cross(Xtr: np.ndarray, Xte: np.ndarray, m: int,
|
|
94
|
+
center: bool = True) -> np.ndarray:
|
|
95
|
+
"""Compute cross-kernel between training and test data."""
|
|
96
|
+
Xtr = _ensure_c_contiguous(Xtr)
|
|
97
|
+
Xte = _ensure_c_contiguous(Xte)
|
|
98
|
+
return hapc_core.kernel_cross_call(Xtr, Xte, int(m), bool(center))
|
|
99
|
+
|
|
100
|
+
def pcghal(Y: np.ndarray, Xtilde: np.ndarray, ENn: np.ndarray,
|
|
101
|
+
alpha0: np.ndarray, max_iter: int = 100, tol: float = 1e-6,
|
|
102
|
+
step_factor: float = 1.0, verbose: bool = False,
|
|
103
|
+
crit: str = "grad") -> OptimizerOutput:
|
|
104
|
+
"""PC-GHAL optimizer (regression)."""
|
|
105
|
+
Y = _ensure_c_contiguous(Y)
|
|
106
|
+
Xtilde = _ensure_c_contiguous(Xtilde)
|
|
107
|
+
ENn = _ensure_c_contiguous(ENn)
|
|
108
|
+
alpha0 = _ensure_c_contiguous(alpha0)
|
|
109
|
+
|
|
110
|
+
result = hapc_core.pcghal_call(Y, Xtilde, ENn, alpha0, int(max_iter),
|
|
111
|
+
float(tol), float(step_factor), bool(verbose), str(crit))
|
|
112
|
+
return OptimizerOutput(result.alpha, result.alphaiters, result.beta,
|
|
113
|
+
result.risk, result.iter)
|
|
114
|
+
|
|
115
|
+
def pcghal_classification(Y: np.ndarray, Xtilde: np.ndarray, ENn: np.ndarray,
|
|
116
|
+
alpha0: np.ndarray, max_iter: int = 100,
|
|
117
|
+
tol: float = 1e-6, step_factor: float = 1.0,
|
|
118
|
+
verbose: bool = False) -> OptimizerOutput:
|
|
119
|
+
"""PC-GHAL optimizer (classification)."""
|
|
120
|
+
Y = _ensure_c_contiguous(Y)
|
|
121
|
+
Xtilde = _ensure_c_contiguous(Xtilde)
|
|
122
|
+
ENn = _ensure_c_contiguous(ENn)
|
|
123
|
+
alpha0 = _ensure_c_contiguous(alpha0)
|
|
124
|
+
|
|
125
|
+
result = hapc_core.pcghal_classi_call(Y, Xtilde, ENn, alpha0, int(max_iter),
|
|
126
|
+
float(tol), float(step_factor), bool(verbose))
|
|
127
|
+
return OptimizerOutput(result.alpha, result.alphaiters, result.beta,
|
|
128
|
+
result.risk, result.iter)
|
|
129
|
+
|
|
130
|
+
def fast_pchal(U: np.ndarray, D2: np.ndarray, Y: np.ndarray,
|
|
131
|
+
lambda_: float) -> np.ndarray:
|
|
132
|
+
"""Fast LASSO-type solver."""
|
|
133
|
+
U = _ensure_c_contiguous(U)
|
|
134
|
+
D2 = _ensure_c_contiguous(D2)
|
|
135
|
+
Y = _ensure_c_contiguous(Y)
|
|
136
|
+
return hapc_core.fast_pchal_call(U, D2, Y, float(lambda_))
|
|
137
|
+
|
|
138
|
+
__all__ = [
|
|
139
|
+
"DesignOutput",
|
|
140
|
+
"OptimizerOutput",
|
|
141
|
+
"pchal_design",
|
|
142
|
+
"ridge_regression",
|
|
143
|
+
"mkernel",
|
|
144
|
+
"kernel_cross",
|
|
145
|
+
"pcghal",
|
|
146
|
+
"pcghal_classification",
|
|
147
|
+
"fast_pchal",
|
|
148
|
+
]
|
hapc/cv.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
"""Cross-validation functions - calls C++ fasthal_cv_call."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from typing import Optional, NamedTuple
|
|
5
|
+
import ctypes
|
|
6
|
+
from .core import kernel_cross, mkernel
|
|
7
|
+
|
|
8
|
+
class CVResult(NamedTuple):
|
|
9
|
+
"""Cross-validation result."""
|
|
10
|
+
mses: np.ndarray
|
|
11
|
+
lambdas: np.ndarray
|
|
12
|
+
best_lambda: float
|
|
13
|
+
best_model_alpha: np.ndarray
|
|
14
|
+
predictions: Optional[np.ndarray] = None
|
|
15
|
+
|
|
16
|
+
def pcghal_cv(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
|
|
17
|
+
lambdas: Optional[np.ndarray] = None,
|
|
18
|
+
log_lambda_min: float = -5,
|
|
19
|
+
log_lambda_max: float = -3,
|
|
20
|
+
grid_length: int = 10,
|
|
21
|
+
nfolds: int = 5,
|
|
22
|
+
predict: Optional[np.ndarray] = None,
|
|
23
|
+
center: bool = True, verbose: bool = False,
|
|
24
|
+
max_iter: int = 100, tol: float = 1e-6) -> CVResult:
|
|
25
|
+
"""
|
|
26
|
+
Cross-validation for PC-GHAL with gradient descent optimizer.
|
|
27
|
+
Calls C++ pcghal_cv_fit directly (matches R pchal_cv_call).
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
X : np.ndarray, shape (n, p)
|
|
32
|
+
Input features
|
|
33
|
+
Y : np.ndarray, shape (n,)
|
|
34
|
+
Response variable
|
|
35
|
+
maxdeg : int
|
|
36
|
+
Maximum degree of interactions
|
|
37
|
+
npc : int
|
|
38
|
+
Number of principal components
|
|
39
|
+
lambdas : np.ndarray, optional
|
|
40
|
+
Array of lambda regularization parameters to test.
|
|
41
|
+
If None, generates grid from log_lambda_min to log_lambda_max.
|
|
42
|
+
log_lambda_min : float, default=-5
|
|
43
|
+
Minimum log(lambda) for grid generation (if lambdas is None)
|
|
44
|
+
log_lambda_max : float, default=-3
|
|
45
|
+
Maximum log(lambda) for grid generation (if lambdas is None)
|
|
46
|
+
grid_length : int, default=10
|
|
47
|
+
Number of lambda values to generate (if lambdas is None)
|
|
48
|
+
nfolds : int, default=5
|
|
49
|
+
Number of folds for cross-validation
|
|
50
|
+
predict : np.ndarray, optional
|
|
51
|
+
Test data for predictions
|
|
52
|
+
center : bool, default=True
|
|
53
|
+
Center the design matrix
|
|
54
|
+
verbose : bool, default=False
|
|
55
|
+
Print progress information
|
|
56
|
+
max_iter : int, default=100
|
|
57
|
+
Maximum iterations for optimizer
|
|
58
|
+
tol : float, default=1e-6
|
|
59
|
+
Convergence tolerance
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
CVResult
|
|
64
|
+
Cross-validation results with best lambda and predictions
|
|
65
|
+
"""
|
|
66
|
+
from .core import _ensure_c_contiguous, hapc_core
|
|
67
|
+
|
|
68
|
+
X = _ensure_c_contiguous(X)
|
|
69
|
+
Y = _ensure_c_contiguous(Y)
|
|
70
|
+
|
|
71
|
+
# Generate lambda grid if not provided
|
|
72
|
+
if lambdas is None:
|
|
73
|
+
log_lambdas = np.linspace(log_lambda_min, log_lambda_max, grid_length)
|
|
74
|
+
lambdas = np.exp(log_lambdas)
|
|
75
|
+
|
|
76
|
+
lambdas = np.asarray(lambdas, dtype=np.float64)
|
|
77
|
+
n, p = X.shape
|
|
78
|
+
|
|
79
|
+
if predict is not None:
|
|
80
|
+
predict = _ensure_c_contiguous(predict)
|
|
81
|
+
else:
|
|
82
|
+
predict = np.array([], dtype=np.float64).reshape(0, p)
|
|
83
|
+
|
|
84
|
+
if verbose:
|
|
85
|
+
print("=" * 60)
|
|
86
|
+
print("PC-GHAL Cross-Validation (C++ Implementation)")
|
|
87
|
+
print("=" * 60)
|
|
88
|
+
print(f"Lambda grid: {len(lambdas)} values from {lambdas.min():.6f} to {lambdas.max():.6f}")
|
|
89
|
+
|
|
90
|
+
# Call C++ pcghal_cv_fit directly
|
|
91
|
+
result_cpp = hapc_core.pcghal_cv_fit(
|
|
92
|
+
X, Y, maxdeg, npc, lambdas.tolist(), nfolds,
|
|
93
|
+
predict,
|
|
94
|
+
max_iter, tol, 1.0, verbose, "risk", center, False
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Extract predictions
|
|
98
|
+
predictions_out = None
|
|
99
|
+
if predict.shape[0] > 0 and result_cpp.predictions.size > 0:
|
|
100
|
+
predictions_out = result_cpp.predictions
|
|
101
|
+
|
|
102
|
+
return CVResult(
|
|
103
|
+
mses=np.array(result_cpp.mses),
|
|
104
|
+
lambdas=np.array(result_cpp.lambdas),
|
|
105
|
+
best_lambda=result_cpp.best_lambda,
|
|
106
|
+
best_model_alpha=result_cpp.best_alpha,
|
|
107
|
+
predictions=predictions_out
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def fasthal_cv(X: np.ndarray, Y: np.ndarray, npc: int,
|
|
112
|
+
lambdas: np.ndarray, nfolds: int = 5,
|
|
113
|
+
predict: Optional[np.ndarray] = None,
|
|
114
|
+
maxdeg: int = 1, center: bool = True,
|
|
115
|
+
approx: bool = False, l1: bool = False) -> CVResult:
|
|
116
|
+
"""
|
|
117
|
+
Fast cross-validation with L1 (LASSO) or L2 (Ridge) penalties.
|
|
118
|
+
Matches R cv.hapc with norm="1" or norm="2".
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
X : np.ndarray, shape (n, p)
|
|
123
|
+
Input features
|
|
124
|
+
Y : np.ndarray, shape (n,)
|
|
125
|
+
Response variable
|
|
126
|
+
npc : int
|
|
127
|
+
Number of principal components
|
|
128
|
+
lambdas : np.ndarray
|
|
129
|
+
Array of lambda regularization parameters to test
|
|
130
|
+
nfolds : int, default=5
|
|
131
|
+
Number of folds for cross-validation
|
|
132
|
+
predict : np.ndarray, optional
|
|
133
|
+
Test data for predictions
|
|
134
|
+
maxdeg : int, default=1
|
|
135
|
+
Maximum degree of interactions
|
|
136
|
+
center : bool, default=True
|
|
137
|
+
Center the design matrix
|
|
138
|
+
approx : bool, default=False
|
|
139
|
+
Use approximate eigendecomposition
|
|
140
|
+
l1 : bool, default=False
|
|
141
|
+
Use L1 penalty (LASSO), otherwise L2 (Ridge)
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
CVResult
|
|
146
|
+
Cross-validation results with best lambda and predictions
|
|
147
|
+
"""
|
|
148
|
+
from .single import single_lambda_fit
|
|
149
|
+
from .core import _ensure_c_contiguous, pchal_design
|
|
150
|
+
from sklearn.model_selection import KFold
|
|
151
|
+
|
|
152
|
+
X = _ensure_c_contiguous(X)
|
|
153
|
+
Y = _ensure_c_contiguous(Y)
|
|
154
|
+
lambdas = np.asarray(lambdas, dtype=np.float64)
|
|
155
|
+
n, p = X.shape
|
|
156
|
+
|
|
157
|
+
if predict is not None:
|
|
158
|
+
predict = _ensure_c_contiguous(predict)
|
|
159
|
+
else:
|
|
160
|
+
predict = np.array([], dtype=np.float64).reshape(0, p)
|
|
161
|
+
|
|
162
|
+
# CV loop
|
|
163
|
+
cv = KFold(n_splits=nfolds, shuffle=True, random_state=42)
|
|
164
|
+
cv_mses = np.zeros((nfolds, len(lambdas)))
|
|
165
|
+
|
|
166
|
+
fold_idx = 0
|
|
167
|
+
for train_idx, test_idx in cv.split(X):
|
|
168
|
+
X_train, X_test = X[train_idx], X[test_idx]
|
|
169
|
+
Y_train, Y_test = Y[train_idx], Y[test_idx]
|
|
170
|
+
|
|
171
|
+
for j, lam in enumerate(lambdas):
|
|
172
|
+
# Fit on train
|
|
173
|
+
result = single_lambda_fit(X_train, Y_train, maxdeg=maxdeg,
|
|
174
|
+
npc=npc, single_lambda=lam,
|
|
175
|
+
center=center, approx=approx, l1=l1)
|
|
176
|
+
|
|
177
|
+
# Predict on test
|
|
178
|
+
if X_test.shape[0] > 0 and result.alpha is not None:
|
|
179
|
+
# Make predictions on test set using predict parameter
|
|
180
|
+
result_test = single_lambda_fit(X_train, Y_train, maxdeg=maxdeg,
|
|
181
|
+
npc=npc, single_lambda=lam,
|
|
182
|
+
predict=X_test,
|
|
183
|
+
center=center, approx=approx, l1=l1)
|
|
184
|
+
|
|
185
|
+
if result_test.predictions is not None:
|
|
186
|
+
y_pred = result_test.predictions
|
|
187
|
+
cv_mses[fold_idx, j] = np.mean((Y_test - y_pred) ** 2)
|
|
188
|
+
else:
|
|
189
|
+
cv_mses[fold_idx, j] = np.inf
|
|
190
|
+
else:
|
|
191
|
+
cv_mses[fold_idx, j] = np.inf
|
|
192
|
+
|
|
193
|
+
fold_idx += 1
|
|
194
|
+
|
|
195
|
+
# Average CV MSE
|
|
196
|
+
mean_mses = np.nanmean(cv_mses, axis=0)
|
|
197
|
+
best_idx = np.nanargmin(mean_mses)
|
|
198
|
+
best_lambda = lambdas[best_idx]
|
|
199
|
+
|
|
200
|
+
# Refit on full data with best lambda
|
|
201
|
+
result_final = single_lambda_fit(X, Y, maxdeg=maxdeg, npc=npc,
|
|
202
|
+
single_lambda=best_lambda, center=center,
|
|
203
|
+
approx=approx, l1=l1)
|
|
204
|
+
|
|
205
|
+
# Predictions on test set if provided
|
|
206
|
+
predictions_out = None
|
|
207
|
+
if predict is not None and predict.shape[0] > 0:
|
|
208
|
+
K_pred = kernel_cross(X, predict, m=maxdeg, center=center)
|
|
209
|
+
K = mkernel(X, m=maxdeg, center=center)
|
|
210
|
+
|
|
211
|
+
evals, evecs = np.linalg.eigh(K)
|
|
212
|
+
des = pchal_design(X, maxdeg=maxdeg, npc=npc, center=center)
|
|
213
|
+
final_npc = des.d.shape[0]
|
|
214
|
+
|
|
215
|
+
idx = np.argsort(-evals)[:final_npc]
|
|
216
|
+
U = evecs[:, idx]
|
|
217
|
+
D = np.sqrt(evals[idx])
|
|
218
|
+
D_inv = np.diag(1.0 / (D + 1e-12))
|
|
219
|
+
|
|
220
|
+
predictions_out = K_pred @ U @ D_inv @ result_final.alpha
|
|
221
|
+
|
|
222
|
+
if center:
|
|
223
|
+
predictions_out += Y.mean()
|
|
224
|
+
|
|
225
|
+
return CVResult(
|
|
226
|
+
mses=mean_mses,
|
|
227
|
+
lambdas=lambdas,
|
|
228
|
+
best_lambda=best_lambda,
|
|
229
|
+
best_model_alpha=result_final.alpha,
|
|
230
|
+
predictions=predictions_out
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def cv_hapc(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
|
|
235
|
+
log_lambda_min: float = -5, log_lambda_max: float = -3,
|
|
236
|
+
grid_length: int = 10, nfolds: int = 5,
|
|
237
|
+
norm: str = "sv", predict: Optional[np.ndarray] = None,
|
|
238
|
+
center: bool = True, approx: bool = False,
|
|
239
|
+
verbose: bool = False, max_iter: int = 100,
|
|
240
|
+
tol: float = 1e-6) -> CVResult:
|
|
241
|
+
"""
|
|
242
|
+
High-level cross-validation dispatcher matching R cv.hapc().
|
|
243
|
+
|
|
244
|
+
Automatically generates lambda grid and routes to appropriate solver
|
|
245
|
+
based on norm parameter.
|
|
246
|
+
|
|
247
|
+
Parameters
|
|
248
|
+
----------
|
|
249
|
+
X : np.ndarray, shape (n, p)
|
|
250
|
+
Input features
|
|
251
|
+
Y : np.ndarray, shape (n,)
|
|
252
|
+
Response variable
|
|
253
|
+
maxdeg : int
|
|
254
|
+
Maximum degree of interactions
|
|
255
|
+
npc : int
|
|
256
|
+
Number of principal components
|
|
257
|
+
log_lambda_min : float, default=-5
|
|
258
|
+
Minimum log(lambda) for grid generation
|
|
259
|
+
log_lambda_max : float, default=-3
|
|
260
|
+
Maximum log(lambda) for grid generation
|
|
261
|
+
grid_length : int, default=10
|
|
262
|
+
Number of lambda values to generate
|
|
263
|
+
nfolds : int, default=5
|
|
264
|
+
Number of CV folds
|
|
265
|
+
norm : str, default="sv"
|
|
266
|
+
Normalization/solver type:
|
|
267
|
+
- "sv": Gradient descent (PC-GHAL) via pcghal_cv
|
|
268
|
+
- "1": L1 penalty (LASSO) via fasthal_cv with l1=True
|
|
269
|
+
- "2": L2 penalty (Ridge) via fasthal_cv with l1=False
|
|
270
|
+
predict : np.ndarray, optional
|
|
271
|
+
Test data for predictions (shape: (m, p))
|
|
272
|
+
center : bool, default=True
|
|
273
|
+
Center the design matrix
|
|
274
|
+
approx : bool, default=False
|
|
275
|
+
Use approximate eigendecomposition (for norm="1" or "2")
|
|
276
|
+
verbose : bool, default=False
|
|
277
|
+
Print progress information
|
|
278
|
+
max_iter : int, default=100
|
|
279
|
+
Maximum iterations for optimizer (norm="sv" only)
|
|
280
|
+
tol : float, default=1e-6
|
|
281
|
+
Convergence tolerance (norm="sv" only)
|
|
282
|
+
|
|
283
|
+
Returns
|
|
284
|
+
-------
|
|
285
|
+
CVResult
|
|
286
|
+
Cross-validation results with fields:
|
|
287
|
+
- mses: MSE for each lambda
|
|
288
|
+
- lambdas: Lambda values tested
|
|
289
|
+
- best_lambda: Optimal lambda
|
|
290
|
+
- best_model_alpha: Coefficients for best model
|
|
291
|
+
- predictions: Predictions on test set (if predict provided)
|
|
292
|
+
|
|
293
|
+
Examples
|
|
294
|
+
--------
|
|
295
|
+
>>> # Gradient descent (PC-GHAL)
|
|
296
|
+
>>> cv_sv = cv_hapc(X, Y, maxdeg=2, npc=10, norm="sv")
|
|
297
|
+
|
|
298
|
+
>>> # Ridge regression
|
|
299
|
+
>>> cv_l2 = cv_hapc(X, Y, maxdeg=2, npc=10, norm="2")
|
|
300
|
+
|
|
301
|
+
>>> # LASSO
|
|
302
|
+
>>> cv_l1 = cv_hapc(X, Y, maxdeg=2, npc=10, norm="1")
|
|
303
|
+
|
|
304
|
+
>>> # With predictions
|
|
305
|
+
>>> cv_sv = cv_hapc(X, Y, maxdeg=2, npc=10, norm="sv", predict=Xnew)
|
|
306
|
+
"""
|
|
307
|
+
# Generate lambda grid from log scale
|
|
308
|
+
log_lambdas = np.linspace(log_lambda_min, log_lambda_max, grid_length)
|
|
309
|
+
lambdas = np.exp(log_lambdas)
|
|
310
|
+
|
|
311
|
+
if verbose:
|
|
312
|
+
print(f"CV with norm='{norm}'")
|
|
313
|
+
print(f"Lambda grid: {len(lambdas)} values from {lambdas.min():.6f} to {lambdas.max():.6f}")
|
|
314
|
+
|
|
315
|
+
if norm == "sv":
|
|
316
|
+
# Gradient descent optimizer (PC-GHAL)
|
|
317
|
+
if verbose:
|
|
318
|
+
print("Using PC-GHAL gradient descent optimizer")
|
|
319
|
+
return pcghal_cv(X, Y, maxdeg, npc, lambdas=lambdas, nfolds=nfolds,
|
|
320
|
+
predict=predict, center=center, verbose=verbose,
|
|
321
|
+
max_iter=max_iter, tol=tol)
|
|
322
|
+
|
|
323
|
+
elif norm == "1":
|
|
324
|
+
# L1 penalty (LASSO)
|
|
325
|
+
if verbose:
|
|
326
|
+
print("Using L1 penalty (LASSO soft-thresholding)")
|
|
327
|
+
return fasthal_cv(X, Y, npc, lambdas, nfolds=nfolds,
|
|
328
|
+
predict=predict, maxdeg=maxdeg, center=center,
|
|
329
|
+
approx=approx, l1=True)
|
|
330
|
+
|
|
331
|
+
elif norm == "2":
|
|
332
|
+
# L2 penalty (Ridge)
|
|
333
|
+
if verbose:
|
|
334
|
+
print("Using L2 penalty (Ridge regression)")
|
|
335
|
+
return fasthal_cv(X, Y, npc, lambdas, nfolds=nfolds,
|
|
336
|
+
predict=predict, maxdeg=maxdeg, center=center,
|
|
337
|
+
approx=approx, l1=False)
|
|
338
|
+
|
|
339
|
+
else:
|
|
340
|
+
raise ValueError(f"Unknown norm='{norm}'. Must be 'sv', '1', or '2'")
|
hapc/single.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Single lambda model fitting - wraps C++ single_lambda_pchar."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from typing import Optional, NamedTuple
|
|
5
|
+
from .core import (pchal_design, ridge_regression, kernel_cross, mkernel,
|
|
6
|
+
pcghal, pcghal_classification)
|
|
7
|
+
|
|
8
|
+
class SingleLambdaResult(NamedTuple):
|
|
9
|
+
"""Result from single_lambda_pchar."""
|
|
10
|
+
alpha: np.ndarray
|
|
11
|
+
predictions: Optional[np.ndarray] = None
|
|
12
|
+
lambda_: float = None
|
|
13
|
+
optimizer_output: Optional[NamedTuple] = None # Full optimizer output for norm="sv"
|
|
14
|
+
|
|
15
|
+
class SinglePcghalResult(NamedTuple):
|
|
16
|
+
"""Result from single_pcghal (gradient descent optimizer)."""
|
|
17
|
+
alpha: np.ndarray
|
|
18
|
+
predictions: Optional[np.ndarray] = None
|
|
19
|
+
lambda_: float = None
|
|
20
|
+
optimizer_output: Optional[NamedTuple] = None
|
|
21
|
+
risk: float = None
|
|
22
|
+
iter: int = None
|
|
23
|
+
|
|
24
|
+
def single_lambda_fit(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
|
|
25
|
+
single_lambda: float, predict: Optional[np.ndarray] = None,
|
|
26
|
+
center: bool = True, approx: bool = False, l1: bool = False) -> SingleLambdaResult:
|
|
27
|
+
"""
|
|
28
|
+
Fit model with single lambda using either L1 or L2 penalty.
|
|
29
|
+
Mirrors C++ single_lambda_pchar implementation.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
X : np.ndarray, shape (n, p)
|
|
34
|
+
Input features
|
|
35
|
+
Y : np.ndarray, shape (n,)
|
|
36
|
+
Response variable
|
|
37
|
+
maxdeg : int
|
|
38
|
+
Maximum degree of interactions
|
|
39
|
+
npc : int
|
|
40
|
+
Number of principal components
|
|
41
|
+
single_lambda : float
|
|
42
|
+
Regularization parameter
|
|
43
|
+
predict : np.ndarray, optional
|
|
44
|
+
Test data for predictions
|
|
45
|
+
center : bool, default=True
|
|
46
|
+
Center the design matrix
|
|
47
|
+
approx : bool, default=False
|
|
48
|
+
Use approximate eigendecomposition
|
|
49
|
+
l1 : bool, default=False
|
|
50
|
+
Use L1 penalty (LASSO), otherwise L2 (Ridge)
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
SingleLambdaResult
|
|
55
|
+
Named tuple with alpha coefficients and optional predictions
|
|
56
|
+
"""
|
|
57
|
+
X = np.asarray(X, dtype=np.float64, order='C')
|
|
58
|
+
Y = np.asarray(Y, dtype=np.float64, order='C')
|
|
59
|
+
n, p = X.shape
|
|
60
|
+
|
|
61
|
+
# Generate design
|
|
62
|
+
des = pchal_design(X, maxdeg, npc, center=center)
|
|
63
|
+
final_npc = des.d.shape[0]
|
|
64
|
+
|
|
65
|
+
# Kernel matrix
|
|
66
|
+
K = mkernel(X, maxdeg, center=center)
|
|
67
|
+
|
|
68
|
+
# Eigendecomposition
|
|
69
|
+
evals, evecs = np.linalg.eigh(K)
|
|
70
|
+
idx = np.argsort(-evals)[:final_npc]
|
|
71
|
+
U = evecs[:, idx]
|
|
72
|
+
D = np.sqrt(evals[idx]) # Singular values (square root of eigenvalues)
|
|
73
|
+
D2 = evals[idx] # Eigenvalues for ridge_call
|
|
74
|
+
|
|
75
|
+
# Xtilde = U * D (singular values)
|
|
76
|
+
Xtilde = U @ np.diag(D)
|
|
77
|
+
|
|
78
|
+
# Center Y
|
|
79
|
+
ymean = Y.mean() if center else 0.0
|
|
80
|
+
Y_centered = Y - ymean if center else Y
|
|
81
|
+
|
|
82
|
+
# Solve
|
|
83
|
+
if l1:
|
|
84
|
+
# LASSO: use fast_pchal_call logic
|
|
85
|
+
from .core import fast_pchal
|
|
86
|
+
alpha = fast_pchal(U, D2, Y_centered, single_lambda)
|
|
87
|
+
else:
|
|
88
|
+
# Ridge: use ridge_call logic
|
|
89
|
+
from .core import ridge_regression
|
|
90
|
+
alpha = ridge_regression(Y_centered, U, D2, single_lambda)
|
|
91
|
+
|
|
92
|
+
# Predictions
|
|
93
|
+
predictions_out = None
|
|
94
|
+
if predict is not None:
|
|
95
|
+
predict = np.asarray(predict, dtype=np.float64, order='C')
|
|
96
|
+
if predict.shape[1] != p:
|
|
97
|
+
raise ValueError(f"predict must have {p} columns")
|
|
98
|
+
|
|
99
|
+
Ktest = kernel_cross(X, predict, maxdeg, center=center)
|
|
100
|
+
D2_inv_sqrt = np.diag(1.0 / np.sqrt(D2 + 1e-12))
|
|
101
|
+
predictions_out = Ktest @ U @ D2_inv_sqrt @ alpha
|
|
102
|
+
|
|
103
|
+
if center:
|
|
104
|
+
predictions_out += ymean
|
|
105
|
+
|
|
106
|
+
return SingleLambdaResult(alpha=alpha, predictions=predictions_out, lambda_=single_lambda)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def single_pcghal(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
|
|
110
|
+
single_lambda: float, predict: Optional[np.ndarray] = None,
|
|
111
|
+
center: bool = True, approx: bool = False, verbose: bool = False,
|
|
112
|
+
max_iter: int = 100, tol: float = 1e-6) -> SinglePcghalResult:
|
|
113
|
+
"""
|
|
114
|
+
Fit model with single lambda using gradient descent optimizer (PC-GHAL).
|
|
115
|
+
This is the high-level interface matching R's hapc() with norm="sv".
|
|
116
|
+
**Calls the C++ single_pcghal_fit function directly.**
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
X : np.ndarray, shape (n, p)
|
|
121
|
+
Input features
|
|
122
|
+
Y : np.ndarray, shape (n,)
|
|
123
|
+
Response variable
|
|
124
|
+
maxdeg : int
|
|
125
|
+
Maximum degree of interactions
|
|
126
|
+
npc : int
|
|
127
|
+
Number of principal components
|
|
128
|
+
single_lambda : float
|
|
129
|
+
Regularization parameter
|
|
130
|
+
predict : np.ndarray, optional
|
|
131
|
+
Test data for predictions
|
|
132
|
+
center : bool, default=True
|
|
133
|
+
Center the design matrix
|
|
134
|
+
approx : bool, default=False
|
|
135
|
+
Use approximate eigendecomposition
|
|
136
|
+
verbose : bool, default=False
|
|
137
|
+
Print iteration details
|
|
138
|
+
max_iter : int, default=100
|
|
139
|
+
Maximum iterations for gradient descent
|
|
140
|
+
tol : float, default=1e-6
|
|
141
|
+
Convergence tolerance
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
SinglePcghalResult
|
|
146
|
+
Named tuple with alpha, predictions, optimizer output, and convergence info
|
|
147
|
+
"""
|
|
148
|
+
from .core import _ensure_c_contiguous, hapc_core
|
|
149
|
+
|
|
150
|
+
X = _ensure_c_contiguous(X)
|
|
151
|
+
Y = _ensure_c_contiguous(Y)
|
|
152
|
+
n, p = X.shape
|
|
153
|
+
|
|
154
|
+
# Prepare prediction data
|
|
155
|
+
if predict is not None:
|
|
156
|
+
predict = _ensure_c_contiguous(predict)
|
|
157
|
+
if predict.shape[1] != p:
|
|
158
|
+
raise ValueError(f"predict must have {p} columns")
|
|
159
|
+
predict_data = predict
|
|
160
|
+
else:
|
|
161
|
+
# Empty matrix for no predictions
|
|
162
|
+
predict_data = np.array([], dtype=np.float64).reshape(0, p)
|
|
163
|
+
|
|
164
|
+
if verbose:
|
|
165
|
+
print("=" * 60)
|
|
166
|
+
print("PC-GHAL Single Lambda Optimization (C++ Implementation)")
|
|
167
|
+
print("=" * 60)
|
|
168
|
+
print()
|
|
169
|
+
|
|
170
|
+
# Call C++ single_pcghal_fit directly
|
|
171
|
+
result_cpp = hapc_core.single_pcghal_fit(
|
|
172
|
+
X, Y, maxdeg, npc, single_lambda, predict_data,
|
|
173
|
+
max_iter, tol, 1.0, verbose, "grad", center, approx
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Extract predictions
|
|
177
|
+
predictions_out = None
|
|
178
|
+
if predict is not None and result_cpp.predictions.size > 0:
|
|
179
|
+
predictions_out = result_cpp.predictions
|
|
180
|
+
|
|
181
|
+
return SinglePcghalResult(
|
|
182
|
+
alpha=result_cpp.alpha,
|
|
183
|
+
predictions=predictions_out,
|
|
184
|
+
lambda_=single_lambda,
|
|
185
|
+
optimizer_output=None, # Not available from C++ output
|
|
186
|
+
risk=result_cpp.risk,
|
|
187
|
+
iter=result_cpp.iter
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def hapc(X: np.ndarray, Y: np.ndarray, maxdeg: int, npc: int,
|
|
193
|
+
single_lambda: float, norm: str = "sv", predict: Optional[np.ndarray] = None,
|
|
194
|
+
center: bool = True, approx: bool = False, verbose: bool = False,
|
|
195
|
+
max_iter: int = 100, tol: float = 1e-6) -> SinglePcghalResult:
|
|
196
|
+
"""
|
|
197
|
+
High-level interface matching R's hapc() function.
|
|
198
|
+
Dispatches to appropriate solver based on norm parameter.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
X : np.ndarray, shape (n, p)
|
|
203
|
+
Input features
|
|
204
|
+
Y : np.ndarray, shape (n,)
|
|
205
|
+
Response variable
|
|
206
|
+
maxdeg : int
|
|
207
|
+
Maximum degree of interactions
|
|
208
|
+
npc : int
|
|
209
|
+
Number of principal components
|
|
210
|
+
single_lambda : float
|
|
211
|
+
Regularization parameter
|
|
212
|
+
norm : str, default="sv"
|
|
213
|
+
Normalization/solver type:
|
|
214
|
+
- "1": L1 penalty (LASSO soft-thresholding)
|
|
215
|
+
- "2": L2 penalty (Ridge regression, closed-form)
|
|
216
|
+
- "sv": Supervised (gradient descent optimizer PC-GHAL)
|
|
217
|
+
predict : np.ndarray, optional
|
|
218
|
+
Test data for predictions
|
|
219
|
+
center : bool, default=True
|
|
220
|
+
Center the design matrix
|
|
221
|
+
approx : bool, default=False
|
|
222
|
+
Use approximate eigendecomposition
|
|
223
|
+
verbose : bool, default=False
|
|
224
|
+
Print iteration details
|
|
225
|
+
max_iter : int, default=100
|
|
226
|
+
Maximum iterations (for norm="sv")
|
|
227
|
+
tol : float, default=1e-6
|
|
228
|
+
Convergence tolerance (for norm="sv")
|
|
229
|
+
|
|
230
|
+
Returns
|
|
231
|
+
-------
|
|
232
|
+
SinglePcghalResult or SingleLambdaResult
|
|
233
|
+
Fit results with predictions
|
|
234
|
+
"""
|
|
235
|
+
if verbose:
|
|
236
|
+
print(f"HAPC with norm='{norm}'")
|
|
237
|
+
|
|
238
|
+
if norm == "1":
|
|
239
|
+
# L1 (LASSO)
|
|
240
|
+
if verbose:
|
|
241
|
+
print("Using L1 penalty (soft-thresholding)")
|
|
242
|
+
return single_lambda_fit(X, Y, maxdeg, npc, single_lambda, predict=predict,
|
|
243
|
+
center=center, approx=approx, l1=True)
|
|
244
|
+
elif norm == "2":
|
|
245
|
+
# L2 (Ridge)
|
|
246
|
+
if verbose:
|
|
247
|
+
print("Using L2 penalty (ridge regression)")
|
|
248
|
+
return single_lambda_fit(X, Y, maxdeg, npc, single_lambda, predict=predict,
|
|
249
|
+
center=center, approx=approx, l1=False)
|
|
250
|
+
elif norm == "sv":
|
|
251
|
+
# Supervised (Gradient Descent)
|
|
252
|
+
if verbose:
|
|
253
|
+
print("Using gradient descent optimizer (PC-GHAL)")
|
|
254
|
+
return single_pcghal(X, Y, maxdeg, npc, single_lambda, predict=predict,
|
|
255
|
+
center=center, approx=approx, verbose=verbose,
|
|
256
|
+
max_iter=max_iter, tol=tol)
|
|
257
|
+
else:
|
|
258
|
+
raise ValueError(f"Unknown norm='{norm}'. Must be '1', '2', or 'sv'")
|
|
259
|
+
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hapc
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Hierarchical Additive Polynomial Complexity regression
|
|
5
|
+
Home-page: https://github.com/meixide/hapc
|
|
6
|
+
Author: Carlos García Meixide
|
|
7
|
+
Author-email: Carlos García Meixide <cgmeixide@gmail.com>
|
|
8
|
+
License: YEAR: 2025
|
|
9
|
+
COPYRIGHT HOLDER: Carlos García Meixide
|
|
10
|
+
Project-URL: Homepage, https://github.com/meixide/hapc
|
|
11
|
+
Project-URL: Documentation, https://github.com/meixide/hapc#readme
|
|
12
|
+
Project-URL: Repository, https://github.com/meixide/hapc.git
|
|
13
|
+
Project-URL: Issues, https://github.com/meixide/hapc/issues
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy<2.3,>=1.24
|
|
25
|
+
Requires-Dist: scipy>=1.7
|
|
26
|
+
Requires-Dist: scikit-learn>=0.24
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
30
|
+
Requires-Dist: black; extra == "dev"
|
|
31
|
+
Requires-Dist: flake8; extra == "dev"
|
|
32
|
+
Dynamic: author
|
|
33
|
+
Dynamic: home-page
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
Dynamic: requires-python
|
|
36
|
+
|
|
37
|
+
# HAPC: Highly Adaptive Prinicipal Components
|
|
38
|
+
|
|
39
|
+
A fast and flexible machine learning library for nonparametric high-dimensional regression and classification with guarantees.
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
### Prerequisites
|
|
44
|
+
|
|
45
|
+
- Python 3.8+
|
|
46
|
+
- C++ compiler (g++, clang, or MSVC)
|
|
47
|
+
- CMake 3.15+
|
|
48
|
+
- Eigen3
|
|
49
|
+
|
|
50
|
+
### Quick Install
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install hapc
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Install from GitHub (latest development version)
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install git+https://github.com/yourusername/hapc.git
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Or with editable install for development:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
git clone https://github.com/yourusername/hapc.git
|
|
66
|
+
cd hapc
|
|
67
|
+
pip install -e .
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Install build dependencies
|
|
71
|
+
|
|
72
|
+
If installation fails, you may need to install build dependencies:
|
|
73
|
+
|
|
74
|
+
**macOS:**
|
|
75
|
+
```bash
|
|
76
|
+
brew install cmake eigen
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**Ubuntu/Debian:**
|
|
80
|
+
```bash
|
|
81
|
+
sudo apt-get install cmake libeigen3-dev build-essential
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Windows:**
|
|
85
|
+
```bash
|
|
86
|
+
pip install cmake
|
|
87
|
+
# Install Visual Studio Build Tools or use conda
|
|
88
|
+
conda install -c conda-forge eigen
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Quick Start
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
import numpy as np
|
|
95
|
+
from hapc.single import single_pcghal
|
|
96
|
+
from hapc.cv import pcghal_cv
|
|
97
|
+
|
|
98
|
+
# Generate sample data
|
|
99
|
+
X = np.random.randn(100, 5)
|
|
100
|
+
Y = X[:, 0] + 0.5 * X[:, 1] + np.random.randn(100) * 0.1
|
|
101
|
+
|
|
102
|
+
# Single fit with fixed lambda
|
|
103
|
+
result = single_pcghal(X, Y, maxdeg=2, npc=5, single_lambda=0.01)
|
|
104
|
+
print(f"Risk: {result.optimizer_output.risk:.6f}")
|
|
105
|
+
|
|
106
|
+
# Cross-validation to select lambda
|
|
107
|
+
lambdas = np.logspace(-4, 0, 10)
|
|
108
|
+
cv_result = pcghal_cv(X, Y, maxdeg=2, npc=5, lambdas=lambdas, nfolds=5)
|
|
109
|
+
print(f"Best lambda: {cv_result.best_lambda:.6f}")
|
|
110
|
+
|
|
111
|
+
# Make predictions
|
|
112
|
+
X_test = np.random.randn(20, 5)
|
|
113
|
+
result = single_pcghal(X, Y, maxdeg=2, npc=5, single_lambda=0.01, predict=X_test)
|
|
114
|
+
print(f"Predictions: {result.predictions}")
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Usage
|
|
118
|
+
|
|
119
|
+
### Regression
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from hapc.single import single_pcghal
|
|
123
|
+
|
|
124
|
+
result = single_pcghal(
|
|
125
|
+
X, Y,
|
|
126
|
+
maxdeg=2, # Maximum degree of interactions
|
|
127
|
+
npc=10, # Number of principal components
|
|
128
|
+
single_lambda=0.01,
|
|
129
|
+
predict=X_test # Optional: test data for predictions
|
|
130
|
+
)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Classification
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
from hapc.single import single_pcghal
|
|
137
|
+
|
|
138
|
+
result = single_pcghal(
|
|
139
|
+
X, Y_binary,
|
|
140
|
+
maxdeg=2,
|
|
141
|
+
npc=10,
|
|
142
|
+
single_lambda=0.01,
|
|
143
|
+
predict=X_test
|
|
144
|
+
)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Cross-Validation
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
from hapc.cv import pcghal_cv
|
|
151
|
+
|
|
152
|
+
cv_result = pcghal_cv(
|
|
153
|
+
X, Y,
|
|
154
|
+
maxdeg=2,
|
|
155
|
+
npc=10,
|
|
156
|
+
lambdas=np.logspace(-4, 0, 20),
|
|
157
|
+
nfolds=5
|
|
158
|
+
)
|
|
159
|
+
print(cv_result.best_lambda)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## API Reference
|
|
163
|
+
|
|
164
|
+
### `hapc.single.single_pcghal()`
|
|
165
|
+
|
|
166
|
+
Fit PC-GHAL with a single lambda value.
|
|
167
|
+
|
|
168
|
+
**Parameters:**
|
|
169
|
+
- `X` (ndarray, shape (n, p)): Input features
|
|
170
|
+
- `Y` (ndarray, shape (n,)): Response variable
|
|
171
|
+
- `maxdeg` (int): Maximum degree of interactions
|
|
172
|
+
- `npc` (int): Number of principal components
|
|
173
|
+
- `single_lambda` (float): Regularization parameter
|
|
174
|
+
- `max_iter` (int, default=100): Maximum iterations
|
|
175
|
+
- `tol` (float, default=1e-6): Convergence tolerance
|
|
176
|
+
- `verbose` (bool, default=False): Print progress
|
|
177
|
+
- `predict` (ndarray, optional): Test data for predictions
|
|
178
|
+
- `center` (bool, default=True): Center the design matrix
|
|
179
|
+
|
|
180
|
+
**Returns:**
|
|
181
|
+
- `result.optimizer_output.alpha`: Coefficients
|
|
182
|
+
- `result.optimizer_output.risk`: Final risk
|
|
183
|
+
- `result.optimizer_output.iter`: Iterations until convergence
|
|
184
|
+
- `result.predictions`: Predictions on test data (if provided)
|
|
185
|
+
|
|
186
|
+
### `hapc.cv.pcghal_cv()`
|
|
187
|
+
|
|
188
|
+
Cross-validation to select lambda.
|
|
189
|
+
|
|
190
|
+
**Parameters:**
|
|
191
|
+
- `lambdas` (ndarray): Grid of lambda values to test
|
|
192
|
+
- `nfolds` (int, default=5): Number of CV folds
|
|
193
|
+
- ...other parameters same as `single_pcghal`
|
|
194
|
+
|
|
195
|
+
**Returns:**
|
|
196
|
+
- `cv_result.best_lambda`: Optimal lambda
|
|
197
|
+
- `cv_result.mses`: CV errors for each lambda
|
|
198
|
+
- `cv_result.best_model`: Fitted model with best lambda
|
|
199
|
+
- `cv_result.predictions`: Predictions on test data (if provided)
|
|
200
|
+
|
|
201
|
+
## Contributing
|
|
202
|
+
|
|
203
|
+
Contributions welcome! The C++ core is shared between R and Python packages.
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
git clone https://github.com/yourusername/hapc.git
|
|
207
|
+
cd hapc
|
|
208
|
+
pip install -e .
|
|
209
|
+
pytest
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## License
|
|
213
|
+
|
|
214
|
+
MIT License - see LICENSE file
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
hapc/__init__.py,sha256=88Qoy-X9m1I-CyiJaHP0NZaZEok4_dcX9SKTuyQEtzA,696
|
|
2
|
+
hapc/core.py,sha256=Uw75VwMOQ-4RoNjNAAA34TDTQxYtFxeRKOe8ZIToB2c,5184
|
|
3
|
+
hapc/cv.py,sha256=M5_TB7oPyaZBoILY2cf6R2CMoXhHUSnGanNgAl6g3rk,12424
|
|
4
|
+
hapc/single.py,sha256=_KWfTPLEcvMc8hWllyjGARkxYznSDNhhOn8qv0kURVA,9149
|
|
5
|
+
hapc-0.1.2.dist-info/licenses/LICENSE,sha256=xsHdXeHSuT_rM0lZucaImzebBKKguXPMxSfXZSv_Tto,52
|
|
6
|
+
hapc-0.1.2.dist-info/METADATA,sha256=tLG5p8lpTFPe8a_pdRT3SeV0jmuWhR9Fi8C3ByqBHMM,5499
|
|
7
|
+
hapc-0.1.2.dist-info/WHEEL,sha256=XkFE14KmFh7mutkkb-qn_ueuH2lwfT8rLdfc5xpQ7wE,99
|
|
8
|
+
hapc-0.1.2.dist-info/top_level.txt,sha256=wlXqf3dR7pUrCp1U_cLzGyKzGcVoinK6DjtIMqHWGDA,5
|
|
9
|
+
hapc-0.1.2.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
hapc
|