hapc 2.1.0__tar.gz → 2.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hapc-2.1.0 → hapc-2.3.1}/CMakeLists.txt +8 -2
- {hapc-2.1.0/python/hapc.egg-info → hapc-2.3.1}/PKG-INFO +29 -4
- {hapc-2.1.0 → hapc-2.3.1}/README.md +28 -3
- hapc-2.3.1/pyproject.toml +70 -0
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc/__init__.py +1 -1
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc/cv.py +12 -5
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc/single.py +96 -23
- {hapc-2.1.0 → hapc-2.3.1/python/hapc.egg-info}/PKG-INFO +29 -4
- {hapc-2.1.0 → hapc-2.3.1}/setup.py +10 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/bindings.cpp +5 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/hapc_core.hpp +8 -1
- {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_cv_classi_cpp.cpp +60 -33
- {hapc-2.1.0 → hapc-2.3.1}/src/r_bindings.cpp +9 -10
- hapc-2.1.0/pyproject.toml +0 -36
- {hapc-2.1.0 → hapc-2.3.1}/LICENSE +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/MANIFEST.in +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc/ate.py +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc/core.py +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/SOURCES.txt +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/dependency_links.txt +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/not-zip-safe +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/requires.txt +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/top_level.txt +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/setup.cfg +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/cross_kernel.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/cv_classi.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/cv_fast_pchal.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/cv_fast_pchal_python.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/fast_pchal.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/logistic_call.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/mkernel.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_call.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_classi_call.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_cv.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_cv_cpp.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/pchal_design.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/ridge_wrappers.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/single_pcghal_cpp.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/src/single_pchar.cpp +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/tests/test_api.py +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/tests/test_ate.py +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/tests/test_ate_hapc_diagnostics_example.py +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/tests/test_core.py +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/tests/test_logistic_regression.py +0 -0
- {hapc-2.1.0 → hapc-2.3.1}/tests/test_r_vs_python_alpha.py +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
cmake_minimum_required(VERSION 3.
|
|
1
|
+
cmake_minimum_required(VERSION 3.18)
|
|
2
2
|
project(hapc)
|
|
3
3
|
|
|
4
4
|
set(CMAKE_CXX_STANDARD 17)
|
|
@@ -15,7 +15,13 @@ endif()
|
|
|
15
15
|
# Python3_EXECUTABLE from setup.py so the build always targets the *same*
|
|
16
16
|
# interpreter that pip is using. Without this CMake may discover a newer/
|
|
17
17
|
# older system Python and produce a .so tagged for the wrong ABI.
|
|
18
|
-
|
|
18
|
+
#
|
|
19
|
+
# Use Development.Module (headers only), NOT the full Development component:
|
|
20
|
+
# the latter also requires Development.Embed -> libpython, which manylinux
|
|
21
|
+
# images deliberately do not ship (extension modules must not link libpython).
|
|
22
|
+
# Requiring full Development makes the manylinux build fail with
|
|
23
|
+
# "Could NOT find Python3 (missing: Python3_LIBRARIES Development.Embed)".
|
|
24
|
+
find_package(Python3 COMPONENTS Interpreter Development.Module REQUIRED)
|
|
19
25
|
message(STATUS "Python3_EXECUTABLE: ${Python3_EXECUTABLE}")
|
|
20
26
|
message(STATUS "Python3_VERSION: ${Python3_VERSION}")
|
|
21
27
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hapc
|
|
3
|
-
Version: 2.1
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Summary: Highly Adaptive Principal Components
|
|
5
5
|
Home-page: https://github.com/meixide/hapc
|
|
6
6
|
Author: Carlos García Meixide
|
|
@@ -51,16 +51,41 @@ A fast and flexible machine learning library for nonparametric high-dimensional
|
|
|
51
51
|
pip install hapc
|
|
52
52
|
```
|
|
53
53
|
|
|
54
|
+
Prebuilt wheels are published for Linux (manylinux2014, x86_64), macOS
|
|
55
|
+
(Intel + Apple Silicon) and Windows, for CPython 3.8–3.12. No compiler,
|
|
56
|
+
CMake or Eigen is needed when a wheel is available.
|
|
57
|
+
|
|
58
|
+
### Linux / HPC clusters
|
|
59
|
+
|
|
60
|
+
The Linux wheels use the **manylinux2014** baseline (glibc 2.17), so
|
|
61
|
+
`pip install hapc` works out of the box on HPC login/compute nodes —
|
|
62
|
+
no `conda` toolchain, `devtoolset`, or sysroot setup required:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install hapc
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
If you must build from the source distribution (niche architecture, very
|
|
69
|
+
old Python, or an air-gapped node), provide a C++17 compiler and either
|
|
70
|
+
let CMake fetch Eigen automatically (needs network) or install Eigen and
|
|
71
|
+
let `find_package(Eigen3)` find it:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# with conda compilers (recommended on HPC)
|
|
75
|
+
conda install -c conda-forge cxx-compiler cmake eigen
|
|
76
|
+
pip install hapc --no-binary hapc
|
|
77
|
+
```
|
|
78
|
+
|
|
54
79
|
### Install from GitHub (latest development version)
|
|
55
80
|
|
|
56
81
|
```bash
|
|
57
|
-
pip install git+https://github.com/
|
|
82
|
+
pip install git+https://github.com/meixide/hapc.git
|
|
58
83
|
```
|
|
59
84
|
|
|
60
85
|
Or with editable install for development:
|
|
61
86
|
|
|
62
87
|
```bash
|
|
63
|
-
git clone https://github.com/
|
|
88
|
+
git clone https://github.com/meixide/hapc.git
|
|
64
89
|
cd hapc
|
|
65
90
|
pip install -e .
|
|
66
91
|
```
|
|
@@ -201,7 +226,7 @@ Cross-validation to select lambda.
|
|
|
201
226
|
Contributions welcome! The C++ core is shared between R and Python packages.
|
|
202
227
|
|
|
203
228
|
```bash
|
|
204
|
-
git clone https://github.com/
|
|
229
|
+
git clone https://github.com/meixide/hapc.git
|
|
205
230
|
cd hapc
|
|
206
231
|
pip install -e .
|
|
207
232
|
pytest
|
|
@@ -17,16 +17,41 @@ A fast and flexible machine learning library for nonparametric high-dimensional
|
|
|
17
17
|
pip install hapc
|
|
18
18
|
```
|
|
19
19
|
|
|
20
|
+
Prebuilt wheels are published for Linux (manylinux2014, x86_64), macOS
|
|
21
|
+
(Intel + Apple Silicon) and Windows, for CPython 3.8–3.12. No compiler,
|
|
22
|
+
CMake or Eigen is needed when a wheel is available.
|
|
23
|
+
|
|
24
|
+
### Linux / HPC clusters
|
|
25
|
+
|
|
26
|
+
The Linux wheels use the **manylinux2014** baseline (glibc 2.17), so
|
|
27
|
+
`pip install hapc` works out of the box on HPC login/compute nodes —
|
|
28
|
+
no `conda` toolchain, `devtoolset`, or sysroot setup required:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install hapc
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
If you must build from the source distribution (niche architecture, very
|
|
35
|
+
old Python, or an air-gapped node), provide a C++17 compiler and either
|
|
36
|
+
let CMake fetch Eigen automatically (needs network) or install Eigen and
|
|
37
|
+
let `find_package(Eigen3)` find it:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
# with conda compilers (recommended on HPC)
|
|
41
|
+
conda install -c conda-forge cxx-compiler cmake eigen
|
|
42
|
+
pip install hapc --no-binary hapc
|
|
43
|
+
```
|
|
44
|
+
|
|
20
45
|
### Install from GitHub (latest development version)
|
|
21
46
|
|
|
22
47
|
```bash
|
|
23
|
-
pip install git+https://github.com/
|
|
48
|
+
pip install git+https://github.com/meixide/hapc.git
|
|
24
49
|
```
|
|
25
50
|
|
|
26
51
|
Or with editable install for development:
|
|
27
52
|
|
|
28
53
|
```bash
|
|
29
|
-
git clone https://github.com/
|
|
54
|
+
git clone https://github.com/meixide/hapc.git
|
|
30
55
|
cd hapc
|
|
31
56
|
pip install -e .
|
|
32
57
|
```
|
|
@@ -167,7 +192,7 @@ Cross-validation to select lambda.
|
|
|
167
192
|
Contributions welcome! The C++ core is shared between R and Python packages.
|
|
168
193
|
|
|
169
194
|
```bash
|
|
170
|
-
git clone https://github.com/
|
|
195
|
+
git clone https://github.com/meixide/hapc.git
|
|
171
196
|
cd hapc
|
|
172
197
|
pip install -e .
|
|
173
198
|
pytest
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=65", "wheel", "cmake>=3.18", "pybind11>=2.6"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hapc"
|
|
7
|
+
version = "2.3.1"
|
|
8
|
+
description = "Highly Adaptive Principal Components"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
authors = [
|
|
12
|
+
{name = "Carlos García Meixide", email = "cgmeixide@gmail.com"}
|
|
13
|
+
]
|
|
14
|
+
license = {text = "MIT"}
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.8",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Operating System :: OS Independent",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"numpy>=1.24,<2.3",
|
|
26
|
+
"scikit-learn>=1.0",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
dev = ["pytest", "pytest-cov", "black", "flake8"]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/meixide/hapc"
|
|
34
|
+
Documentation = "https://github.com/meixide/hapc#readme"
|
|
35
|
+
Repository = "https://github.com/meixide/hapc.git"
|
|
36
|
+
Issues = "https://github.com/meixide/hapc/issues"
|
|
37
|
+
|
|
38
|
+
[tool.cibuildwheel]
|
|
39
|
+
# Build CPython 3.8–3.12 only; skip PyPy and musl (HPC/desktop targets are glibc).
|
|
40
|
+
build = "cp38-* cp39-* cp310-* cp311-* cp312-*"
|
|
41
|
+
skip = ["pp*", "*-musllinux*"]
|
|
42
|
+
build-verbosity = 1
|
|
43
|
+
# Smoke-test every wheel: install it (pulling numpy/scikit-learn) and import the
|
|
44
|
+
# compiled extension. Catches wrong-ABI / unresolved-symbol wheels before publish.
|
|
45
|
+
test-command = "python -c \"import hapc; print(hapc.__version__)\""
|
|
46
|
+
# NOTE: the latest scipy (transitive via scikit-learn) no longer ships a
|
|
47
|
+
# manylinux2014 wheel, so the in-container smoke test would try to compile it
|
|
48
|
+
# from source and fail. We force scipy to resolve from a binary wheel via
|
|
49
|
+
# PIP_ONLY_BINARY=scipy, set in the workflow and passed into the Linux container
|
|
50
|
+
# (see .github/workflows/build-and-publish.yml). It must apply to the wheel
|
|
51
|
+
# install itself, which is why it lives in the env rather than test-requires.
|
|
52
|
+
|
|
53
|
+
[tool.cibuildwheel.linux]
|
|
54
|
+
archs = ["x86_64"]
|
|
55
|
+
# manylinux2014 -> glibc 2.17 baseline + bundled libstdc++ via auditwheel, so the
|
|
56
|
+
# wheel installs and runs unmodified on any HPC cluster (glibc >= 2.17), no
|
|
57
|
+
# compiler / conda toolchain / sysroot required.
|
|
58
|
+
manylinux-x86_64-image = "manylinux2014"
|
|
59
|
+
|
|
60
|
+
[tool.cibuildwheel.macos]
|
|
61
|
+
# Build fat universal2 wheels (x86_64 + arm64) from a single runner. CMake does
|
|
62
|
+
# not honour the interpreter's arch flags, so the arch is forced explicitly via
|
|
63
|
+
# CMAKE_ARGS below (setup.py appends $CMAKE_ARGS to the cmake invocation).
|
|
64
|
+
# delocate then verifies both slices are present, which is what caught the old
|
|
65
|
+
# single-arch-but-universal2-tagged wheels.
|
|
66
|
+
archs = ["universal2"]
|
|
67
|
+
environment = { CMAKE_ARGS = "-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64" }
|
|
68
|
+
|
|
69
|
+
[tool.cibuildwheel.windows]
|
|
70
|
+
archs = ["AMD64"]
|
|
@@ -18,7 +18,11 @@ import numpy as np
|
|
|
18
18
|
|
|
19
19
|
from . import hapc_core
|
|
20
20
|
from .core import _C, cross_kernel_hapc, design_hapc
|
|
21
|
-
from .single import
|
|
21
|
+
from .single import (
|
|
22
|
+
_check_binomial_labels,
|
|
23
|
+
_to_soft01,
|
|
24
|
+
single_pcghal_classification_lasso,
|
|
25
|
+
)
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
class CVResult(NamedTuple):
|
|
@@ -376,6 +380,9 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
|
|
|
376
380
|
if not np.all(lams > 0):
|
|
377
381
|
raise ValueError("All lambdas must be > 0 for logistic LASSO.")
|
|
378
382
|
|
|
383
|
+
# Soft target in [0,1] used for the held-out cross-entropy deviance
|
|
384
|
+
# (accepts hard {0,1}/{-1,+1} or fractional EM-HAL posteriors).
|
|
385
|
+
q = _to_soft01(Y)
|
|
379
386
|
folds = _native_folds(n, int(nfolds))
|
|
380
387
|
L = lams.size
|
|
381
388
|
fold_dev = np.full((int(nfolds), L), np.nan)
|
|
@@ -386,7 +393,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
|
|
|
386
393
|
if te.size == 0 or tr.size == 0:
|
|
387
394
|
continue
|
|
388
395
|
Xtr, Ytr = X[tr], Y[tr]
|
|
389
|
-
Xte, Yte = X[te],
|
|
396
|
+
Xte, Yte = X[te], q[te]
|
|
390
397
|
|
|
391
398
|
for j, lam in enumerate(lams):
|
|
392
399
|
res = single_pcghal_classification_lasso(
|
|
@@ -395,9 +402,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
|
|
|
395
402
|
verbose=bool(verbose), max_iter=int(max_iter),
|
|
396
403
|
)
|
|
397
404
|
probs = np.clip(res.probabilities, 1e-15, 1 - 1e-15)
|
|
398
|
-
|
|
399
|
-
else (Yte > 0).astype(np.float64)
|
|
400
|
-
dev = -(yte01 * np.log(probs) + (1 - yte01) * np.log(1 - probs))
|
|
405
|
+
dev = -(Yte * np.log(probs) + (1 - Yte) * np.log(1 - probs))
|
|
401
406
|
fold_dev[k - 1, j] = float(dev.mean())
|
|
402
407
|
|
|
403
408
|
deviances = np.nanmean(fold_dev, axis=0)
|
|
@@ -500,6 +505,8 @@ def cv_hapc(X: np.ndarray, Y: np.ndarray,
|
|
|
500
505
|
lams = _grid(None, log_lambda_min, log_lambda_max, grid_length)
|
|
501
506
|
|
|
502
507
|
if family == "binomial":
|
|
508
|
+
# Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
|
|
509
|
+
_check_binomial_labels(Y, norm)
|
|
503
510
|
if norm in {"sv", "2"}:
|
|
504
511
|
return pcghal_cv_classi(
|
|
505
512
|
X, Y, max_degree=max_degree, npcs=npcs,
|
|
@@ -95,6 +95,61 @@ def _to_pm1(Y: np.ndarray, *, verbose: bool = False) -> np.ndarray:
|
|
|
95
95
|
)
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
def _label_kind(Y: np.ndarray) -> str:
|
|
99
|
+
"""Classify a binomial response vector.
|
|
100
|
+
|
|
101
|
+
Returns ``"01"`` (hard labels in ``{0,1}``), ``"pm1"`` (hard labels in
|
|
102
|
+
``{-1,+1}``), or ``"soft"`` (fractional labels in ``[0,1]``, e.g. EM-HAL
|
|
103
|
+
E-step posteriors). Raises ``ValueError`` if any value falls outside
|
|
104
|
+
``[0,1]`` and the set is not exactly ``{-1,+1}``.
|
|
105
|
+
"""
|
|
106
|
+
Y = np.asarray(Y, dtype=np.float64).ravel()
|
|
107
|
+
u = np.unique(Y[~np.isnan(Y)])
|
|
108
|
+
s = set(u.tolist())
|
|
109
|
+
if s.issubset({0.0, 1.0}):
|
|
110
|
+
return "01"
|
|
111
|
+
if s == {-1.0, 1.0}:
|
|
112
|
+
return "pm1"
|
|
113
|
+
if u.size and u.min() >= 0.0 and u.max() <= 1.0:
|
|
114
|
+
return "soft"
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"family='binomial' requires Y in {0,1}, {-1,+1}, or soft labels in "
|
|
117
|
+
"[0,1]; found values outside [0,1]."
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _to_soft01(Y: np.ndarray) -> np.ndarray:
|
|
122
|
+
"""Map a binomial response to a soft cross-entropy target in ``[0,1]``."""
|
|
123
|
+
Y = np.asarray(Y, dtype=np.float64).ravel()
|
|
124
|
+
return (Y + 1.0) / 2.0 if _label_kind(Y) == "pm1" else Y
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _check_binomial_labels(Y: np.ndarray, norm: str) -> str:
|
|
128
|
+
"""Validate labels and enforce the soft-label norm restriction.
|
|
129
|
+
|
|
130
|
+
Soft labels (any value strictly inside ``(0,1)``) are supported only for
|
|
131
|
+
``norm`` in ``{"1","2"}``; ``norm="sv"`` raises ``NotImplementedError``.
|
|
132
|
+
A warning is emitted whenever soft labels are detected. Returns the label
|
|
133
|
+
kind from :func:`_label_kind`.
|
|
134
|
+
"""
|
|
135
|
+
import warnings
|
|
136
|
+
|
|
137
|
+
kind = _label_kind(Y)
|
|
138
|
+
if kind == "soft":
|
|
139
|
+
if norm == "sv":
|
|
140
|
+
raise NotImplementedError(
|
|
141
|
+
"Soft labels (Y in (0,1)) are not implemented for norm='sv'; "
|
|
142
|
+
"use norm='1' or norm='2'."
|
|
143
|
+
)
|
|
144
|
+
warnings.warn(
|
|
145
|
+
"Non-binary labels detected in Y: treating them as soft labels in "
|
|
146
|
+
"[0,1] (cross-entropy target). Supported only for norm='1' and "
|
|
147
|
+
"norm='2'.",
|
|
148
|
+
stacklevel=2,
|
|
149
|
+
)
|
|
150
|
+
return kind
|
|
151
|
+
|
|
152
|
+
|
|
98
153
|
def _calibrate_logistic_intercept(y01: np.ndarray, eta: np.ndarray) -> float:
|
|
99
154
|
"""Newton calibration for intercept with fixed linear predictor ``eta``."""
|
|
100
155
|
y01 = np.asarray(y01, dtype=np.float64).ravel()
|
|
@@ -367,24 +422,21 @@ def single_pcghal_classification_ridge_only(
|
|
|
367
422
|
SinglePcghalClassificationResult
|
|
368
423
|
"""
|
|
369
424
|
X, Y, n, p = _check_xy(X, Y)
|
|
370
|
-
|
|
425
|
+
# Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
|
|
426
|
+
y01 = _to_soft01(Y)
|
|
371
427
|
|
|
372
428
|
des = design_hapc(X, max_degree, npcs, center=center)
|
|
373
429
|
final_npc = des.d.shape[0]
|
|
374
430
|
Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
|
|
375
431
|
|
|
376
432
|
alpha = np.asarray(
|
|
377
|
-
hapc_core.
|
|
433
|
+
hapc_core.logistic_ridge_init_y01(_C(y01), _C(Xtilde), float(lambda_))
|
|
378
434
|
).ravel()
|
|
379
435
|
|
|
380
436
|
eta = Xtilde @ alpha
|
|
381
|
-
y01 = (Y_pm1 > 0).astype(np.float64)
|
|
382
437
|
b0 = _calibrate_logistic_intercept(y01, eta)
|
|
383
|
-
|
|
384
|
-
risk = float(
|
|
385
|
-
np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
|
|
386
|
-
.mean()
|
|
387
|
-
)
|
|
438
|
+
phat = np.clip(1.0 / (1.0 + np.exp(-(eta + b0))), 1e-15, 1 - 1e-15)
|
|
439
|
+
risk = float((-(y01 * np.log(phat) + (1 - y01) * np.log(1 - phat))).mean())
|
|
388
440
|
|
|
389
441
|
predictions = probabilities = predicted_classes = None
|
|
390
442
|
if predict is not None:
|
|
@@ -480,13 +532,26 @@ def single_pcghal_classification_lasso(
|
|
|
480
532
|
raise ValueError(f"lambda_ must be > 0 for LASSO; got {lambda_}")
|
|
481
533
|
|
|
482
534
|
X, Y, n, p = _check_xy(X, Y)
|
|
483
|
-
|
|
484
|
-
|
|
535
|
+
# Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
|
|
536
|
+
q = _to_soft01(Y)
|
|
485
537
|
|
|
486
538
|
des = design_hapc(X, max_degree, npcs, center=center)
|
|
487
539
|
final_npc = des.d.shape[0]
|
|
488
540
|
Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
|
|
489
541
|
|
|
542
|
+
# For soft labels, replicate each row as a (label=1, weight=q) and
|
|
543
|
+
# (label=0, weight=1-q) pair so the sample-weighted logistic loss equals
|
|
544
|
+
# the soft cross-entropy. On hard labels this reduces to the plain fit.
|
|
545
|
+
is_soft = bool(np.any((q > 1e-12) & (q < 1.0 - 1e-12)))
|
|
546
|
+
if is_soft:
|
|
547
|
+
Xfit = _C(np.vstack([Xtilde, Xtilde]))
|
|
548
|
+
yfit = np.concatenate([np.ones(n), np.zeros(n)]).astype(np.int64)
|
|
549
|
+
wfit = np.concatenate([q, 1.0 - q]).astype(np.float64)
|
|
550
|
+
else:
|
|
551
|
+
Xfit = _C(Xtilde)
|
|
552
|
+
yfit = (q > 0.5).astype(np.int64)
|
|
553
|
+
wfit = None
|
|
554
|
+
|
|
490
555
|
C = 1.0 / (n * float(lambda_))
|
|
491
556
|
# sklearn>=1.8 deprecated penalty="l1" in favour of l1_ratio=1 with the
|
|
492
557
|
# liblinear solver; older versions still need penalty="l1". Try the new
|
|
@@ -495,24 +560,28 @@ def single_pcghal_classification_lasso(
|
|
|
495
560
|
sig_params = inspect.signature(LogisticRegression).parameters
|
|
496
561
|
common_kw = dict(solver="liblinear", C=C, fit_intercept=False,
|
|
497
562
|
max_iter=int(max_iter))
|
|
563
|
+
|
|
564
|
+
def _fit(**ctor):
|
|
565
|
+
m = LogisticRegression(**ctor, **common_kw)
|
|
566
|
+
if wfit is None:
|
|
567
|
+
m.fit(Xfit, yfit)
|
|
568
|
+
else:
|
|
569
|
+
m.fit(Xfit, yfit, sample_weight=wfit)
|
|
570
|
+
return m
|
|
571
|
+
|
|
498
572
|
if "l1_ratio" in sig_params and "penalty" in sig_params:
|
|
499
573
|
try:
|
|
500
|
-
model =
|
|
501
|
-
model.fit(_C(Xtilde), Y_01)
|
|
574
|
+
model = _fit(l1_ratio=1.0)
|
|
502
575
|
except (TypeError, ValueError):
|
|
503
|
-
model =
|
|
504
|
-
model.fit(_C(Xtilde), Y_01)
|
|
576
|
+
model = _fit(penalty="l1")
|
|
505
577
|
else: # pragma: no cover (very old sklearn)
|
|
506
|
-
model =
|
|
507
|
-
model.fit(_C(Xtilde), Y_01)
|
|
578
|
+
model = _fit(penalty="l1")
|
|
508
579
|
alpha = np.asarray(model.coef_, dtype=np.float64).ravel()
|
|
509
|
-
b0 = _calibrate_logistic_intercept(
|
|
580
|
+
b0 = _calibrate_logistic_intercept(q, Xtilde @ alpha)
|
|
510
581
|
|
|
511
582
|
eta = Xtilde @ alpha + b0
|
|
512
|
-
|
|
513
|
-
risk = float(
|
|
514
|
-
np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu))).mean()
|
|
515
|
-
)
|
|
583
|
+
phat = np.clip(1.0 / (1.0 + np.exp(-eta)), 1e-15, 1 - 1e-15)
|
|
584
|
+
risk = float((-(q * np.log(phat) + (1 - q) * np.log(1 - phat))).mean())
|
|
516
585
|
|
|
517
586
|
predictions = probabilities = predicted_classes = None
|
|
518
587
|
if predict is not None:
|
|
@@ -560,8 +629,10 @@ def hapc(X: np.ndarray, Y: np.ndarray,
|
|
|
560
629
|
X : np.ndarray, shape (n, p)
|
|
561
630
|
Features.
|
|
562
631
|
Y : np.ndarray, shape (n,)
|
|
563
|
-
Response. For ``family="binomial"
|
|
564
|
-
``{-1,+1}
|
|
632
|
+
Response. For ``family="binomial"``: hard labels in ``{0,1}`` or
|
|
633
|
+
``{-1,+1}``, or soft labels in ``[0,1]`` (e.g. EM-HAL E-step
|
|
634
|
+
posteriors). Soft labels are supported only for ``norm`` in
|
|
635
|
+
``{"1","2"}``; ``norm="sv"`` requires hard labels.
|
|
565
636
|
family : {"gaussian", "binomial"}, default "gaussian"
|
|
566
637
|
Loss family.
|
|
567
638
|
max_degree : int, default 1
|
|
@@ -617,6 +688,8 @@ def hapc(X: np.ndarray, Y: np.ndarray,
|
|
|
617
688
|
npcs = int(X.shape[0])
|
|
618
689
|
|
|
619
690
|
if family == "binomial":
|
|
691
|
+
# Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
|
|
692
|
+
_check_binomial_labels(Y, norm)
|
|
620
693
|
if norm == "sv":
|
|
621
694
|
return single_pcghal_classification(
|
|
622
695
|
X, Y, max_degree, npcs, lambda_,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hapc
|
|
3
|
-
Version: 2.1
|
|
3
|
+
Version: 2.3.1
|
|
4
4
|
Summary: Highly Adaptive Principal Components
|
|
5
5
|
Home-page: https://github.com/meixide/hapc
|
|
6
6
|
Author: Carlos García Meixide
|
|
@@ -51,16 +51,41 @@ A fast and flexible machine learning library for nonparametric high-dimensional
|
|
|
51
51
|
pip install hapc
|
|
52
52
|
```
|
|
53
53
|
|
|
54
|
+
Prebuilt wheels are published for Linux (manylinux2014, x86_64), macOS
|
|
55
|
+
(Intel + Apple Silicon) and Windows, for CPython 3.8–3.12. No compiler,
|
|
56
|
+
CMake or Eigen is needed when a wheel is available.
|
|
57
|
+
|
|
58
|
+
### Linux / HPC clusters
|
|
59
|
+
|
|
60
|
+
The Linux wheels use the **manylinux2014** baseline (glibc 2.17), so
|
|
61
|
+
`pip install hapc` works out of the box on HPC login/compute nodes —
|
|
62
|
+
no `conda` toolchain, `devtoolset`, or sysroot setup required:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install hapc
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
If you must build from the source distribution (niche architecture, very
|
|
69
|
+
old Python, or an air-gapped node), provide a C++17 compiler and either
|
|
70
|
+
let CMake fetch Eigen automatically (needs network) or install Eigen and
|
|
71
|
+
let `find_package(Eigen3)` find it:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# with conda compilers (recommended on HPC)
|
|
75
|
+
conda install -c conda-forge cxx-compiler cmake eigen
|
|
76
|
+
pip install hapc --no-binary hapc
|
|
77
|
+
```
|
|
78
|
+
|
|
54
79
|
### Install from GitHub (latest development version)
|
|
55
80
|
|
|
56
81
|
```bash
|
|
57
|
-
pip install git+https://github.com/
|
|
82
|
+
pip install git+https://github.com/meixide/hapc.git
|
|
58
83
|
```
|
|
59
84
|
|
|
60
85
|
Or with editable install for development:
|
|
61
86
|
|
|
62
87
|
```bash
|
|
63
|
-
git clone https://github.com/
|
|
88
|
+
git clone https://github.com/meixide/hapc.git
|
|
64
89
|
cd hapc
|
|
65
90
|
pip install -e .
|
|
66
91
|
```
|
|
@@ -201,7 +226,7 @@ Cross-validation to select lambda.
|
|
|
201
226
|
Contributions welcome! The C++ core is shared between R and Python packages.
|
|
202
227
|
|
|
203
228
|
```bash
|
|
204
|
-
git clone https://github.com/
|
|
229
|
+
git clone https://github.com/meixide/hapc.git
|
|
205
230
|
cd hapc
|
|
206
231
|
pip install -e .
|
|
207
232
|
pytest
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from setuptools import setup, find_packages, Extension
|
|
4
4
|
from setuptools.command.build_ext import build_ext
|
|
5
5
|
import os
|
|
6
|
+
import shlex
|
|
6
7
|
import subprocess
|
|
7
8
|
import sys
|
|
8
9
|
from pathlib import Path
|
|
@@ -44,6 +45,15 @@ class CMakeBuild(build_ext):
|
|
|
44
45
|
build_args = ['--config', cfg]
|
|
45
46
|
|
|
46
47
|
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
|
|
48
|
+
|
|
49
|
+
# Honour the conventional CMAKE_ARGS env var (set by cibuildwheel/conda).
|
|
50
|
+
# Used to force universal2 macOS builds via
|
|
51
|
+
# CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64", which CMake cannot
|
|
52
|
+
# infer from the (single-arch) build interpreter on its own.
|
|
53
|
+
extra_cmake_args = os.environ.get('CMAKE_ARGS')
|
|
54
|
+
if extra_cmake_args:
|
|
55
|
+
cmake_args += shlex.split(extra_cmake_args)
|
|
56
|
+
|
|
47
57
|
# Add parallel build flag only on non-Windows platforms
|
|
48
58
|
# On Windows, MSBuild doesn't support -j flag and handles parallelization automatically
|
|
49
59
|
if sys.platform != 'win32':
|
|
@@ -117,4 +117,9 @@ PYBIND11_MODULE(hapc_core, m) {
|
|
|
117
117
|
|
|
118
118
|
m.def("logistic_ridge_init", &logistic_ridge_init,
|
|
119
119
|
py::arg("Y"), py::arg("X"), py::arg("lambda"));
|
|
120
|
+
|
|
121
|
+
// Soft-label logistic ridge initialiser: target Y may be any value in
|
|
122
|
+
// [0,1] (hard {0,1} labels or fractional EM-HAL E-step posteriors).
|
|
123
|
+
m.def("logistic_ridge_init_y01", &logistic_ridge_init_y01,
|
|
124
|
+
py::arg("Y"), py::arg("X"), py::arg("lambda"));
|
|
120
125
|
}
|
|
@@ -91,6 +91,11 @@ FastCVOutput fasthal_cv_python(const MatrixXd& X, const VectorXd& Y, int npc,
|
|
|
91
91
|
// (internally multiplied by n, matching logistic_call).
|
|
92
92
|
VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda);
|
|
93
93
|
|
|
94
|
+
// Soft-label variant: target `y01` may take any value in [0, 1] (hard {0,1}
|
|
95
|
+
// labels or fractional EM-HAL E-step posteriors). On hard {0,1} inputs the
|
|
96
|
+
// result is identical to logistic_ridge_init. lambda has the same scaling.
|
|
97
|
+
VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda);
|
|
98
|
+
|
|
94
99
|
// Cross-validation output for binomial (logistic) HAPC.
|
|
95
100
|
struct CVClassiOutput {
|
|
96
101
|
std::vector<double> deviances;
|
|
@@ -101,7 +106,9 @@ struct CVClassiOutput {
|
|
|
101
106
|
};
|
|
102
107
|
|
|
103
108
|
// Python-friendly binomial CV (mirrors R `pchal_cv_classi_call`).
|
|
104
|
-
// Y must
|
|
109
|
+
// Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
|
|
110
|
+
// labels are supported only when with_pgd == false (norm="2"); with_pgd ==
|
|
111
|
+
// true (norm="sv") rejects soft labels.
|
|
105
112
|
//
|
|
106
113
|
// When `with_pgd == true` (default): per fold runs logistic-ridge initialiser
|
|
107
114
|
// followed by projected gradient descent on logistic loss (norm="sv").
|
|
@@ -28,10 +28,15 @@
|
|
|
28
28
|
// rule `beta := delta_beta` (i.e. solving the full normal equation each
|
|
29
29
|
// iteration, treating the IRLS working response as the regression target).
|
|
30
30
|
// ---------------------------------------------------------------------------
|
|
31
|
-
|
|
31
|
+
// Soft-label logistic ridge. The target `y01` may take any value in [0, 1]:
|
|
32
|
+
// hard {0,1} labels or fractional EM-HAL E-step posteriors. The IRLS update
|
|
33
|
+
// is unchanged; fractional targets are standard for cross-entropy
|
|
34
|
+
// minimisation, so on hard {0,1} inputs the result is bit-identical to the
|
|
35
|
+
// former {-1,+1} implementation.
|
|
36
|
+
VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda) {
|
|
32
37
|
const int n = X.rows();
|
|
33
38
|
const int p = X.cols();
|
|
34
|
-
if (
|
|
39
|
+
if (y01.size() != n) {
|
|
35
40
|
throw std::runtime_error("logistic_ridge_init: Y length must match nrow(X).");
|
|
36
41
|
}
|
|
37
42
|
// Match logistic_call: lambda is multiplied by n internally.
|
|
@@ -39,12 +44,6 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
|
|
|
39
44
|
const int max_iter = 100;
|
|
40
45
|
const double tol = 1e-8;
|
|
41
46
|
|
|
42
|
-
// logistic_call expects Y in {-1,+1} but treats it via the GLM update with
|
|
43
|
-
// the {0,1} working response. We replicate that behaviour exactly: convert
|
|
44
|
-
// back to a {0,1} response y01 = (Y_pm1 + 1) / 2 to compute mu/working z.
|
|
45
|
-
VectorXd y01(n);
|
|
46
|
-
for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
|
|
47
|
-
|
|
48
47
|
VectorXd beta = VectorXd::Zero(p);
|
|
49
48
|
for (int iter = 0; iter < max_iter; ++iter) {
|
|
50
49
|
VectorXd eta = X * beta;
|
|
@@ -66,6 +65,15 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
|
|
|
66
65
|
return beta;
|
|
67
66
|
}
|
|
68
67
|
|
|
68
|
+
// Backward-compatible wrapper: accepts Y in {-1,+1} and converts to {0,1}.
|
|
69
|
+
// Used by the PGD (norm="sv") single-fit path, which is hard-label only.
|
|
70
|
+
VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda) {
|
|
71
|
+
const int n = X.rows();
|
|
72
|
+
VectorXd y01(n);
|
|
73
|
+
for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
|
|
74
|
+
return logistic_ridge_init_y01(y01, X, lambda);
|
|
75
|
+
}
|
|
76
|
+
|
|
69
77
|
static double calibrate_logistic_intercept(const VectorXd& Y01,
|
|
70
78
|
const VectorXd& eta) {
|
|
71
79
|
const int n = (int)Y01.size();
|
|
@@ -84,16 +92,20 @@ static double calibrate_logistic_intercept(const VectorXd& Y01,
|
|
|
84
92
|
return b0;
|
|
85
93
|
}
|
|
86
94
|
|
|
87
|
-
|
|
88
|
-
|
|
95
|
+
// Soft cross-entropy risk for fractional targets y01 in [0,1], given a linear
|
|
96
|
+
// predictor `eta` (intercept already folded in). On hard {0,1} labels this
|
|
97
|
+
// equals the former {-1,+1} logistic risk, so behaviour is unchanged on
|
|
98
|
+
// binary inputs.
|
|
99
|
+
static double logistic_risk_y01(const VectorXd& y01, const VectorXd& eta) {
|
|
100
|
+
const int n = (int)y01.size();
|
|
89
101
|
if (eta.size() != n) {
|
|
90
|
-
throw std::runtime_error("
|
|
102
|
+
throw std::runtime_error("logistic_risk_y01: length mismatch");
|
|
91
103
|
}
|
|
92
104
|
double risk = 0.0;
|
|
93
105
|
for (int i = 0; i < n; ++i) {
|
|
94
|
-
const double
|
|
95
|
-
|
|
96
|
-
|
|
106
|
+
const double pi = 1.0 / (1.0 + std::exp(-eta[i]));
|
|
107
|
+
const double p = std::min(1.0 - 1e-15, std::max(1e-15, pi));
|
|
108
|
+
risk += -(y01[i] * std::log(p) + (1.0 - y01[i]) * std::log(1.0 - p));
|
|
97
109
|
}
|
|
98
110
|
return risk / n;
|
|
99
111
|
}
|
|
@@ -136,28 +148,31 @@ static std::vector<int> make_folds(int n, int K) {
|
|
|
136
148
|
// for the post-CV refit). When `with_pgd == false`, returns the logistic-ridge
|
|
137
149
|
// initialiser α directly with its training logistic risk; otherwise runs the
|
|
138
150
|
// PGD step on top of it (norm="sv").
|
|
139
|
-
static OptimizerOutput logistic_full_fit(const VectorXd&
|
|
151
|
+
static OptimizerOutput logistic_full_fit(const VectorXd& Y01,
|
|
140
152
|
const MatrixXd& Xtilde,
|
|
141
153
|
const MatrixXd& E_Nn,
|
|
142
154
|
double lambda,
|
|
143
155
|
int max_iter, double tol,
|
|
144
156
|
double step_factor, bool verbose,
|
|
145
157
|
bool with_pgd) {
|
|
146
|
-
VectorXd alpha0 =
|
|
158
|
+
VectorXd alpha0 = logistic_ridge_init_y01(Y01, Xtilde, lambda);
|
|
147
159
|
const int n = Xtilde.rows();
|
|
148
160
|
VectorXd alpha_fit;
|
|
149
161
|
if (with_pgd) {
|
|
162
|
+
// PGD (norm="sv") uses the {-1,+1} logistic loss and is reached only
|
|
163
|
+
// for hard labels (soft labels are rejected upstream), so thresholding
|
|
164
|
+
// at 0.5 recovers the exact {-1,+1} encoding.
|
|
165
|
+
VectorXd Y_pm1(n);
|
|
166
|
+
for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] > 0.5) ? 1.0 : -1.0;
|
|
150
167
|
OptimizerOutput out = pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
|
|
151
168
|
max_iter, tol, step_factor, verbose);
|
|
152
169
|
alpha_fit = out.alpha;
|
|
153
170
|
} else {
|
|
154
171
|
alpha_fit = alpha0; // logistic ridge only (norm="2")
|
|
155
172
|
}
|
|
156
|
-
VectorXd Y01(n);
|
|
157
|
-
for (int i = 0; i < n; ++i) Y01[i] = (Y_pm1[i] > 0.0) ? 1.0 : 0.0;
|
|
158
173
|
VectorXd eta = Xtilde * alpha_fit;
|
|
159
174
|
const double b0 = calibrate_logistic_intercept(Y01, eta);
|
|
160
|
-
const double risk =
|
|
175
|
+
const double risk = logistic_risk_y01(Y01, eta.array() + b0);
|
|
161
176
|
OptimizerOutput out;
|
|
162
177
|
out.alpha = alpha_fit;
|
|
163
178
|
out.alphaiters = MatrixXd::Zero(0, alpha_fit.size());
|
|
@@ -177,10 +192,21 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
177
192
|
const int n = X.rows();
|
|
178
193
|
const int p = X.cols();
|
|
179
194
|
if (Y.size() != n) throw std::runtime_error("pcghal_cv_classi: length(Y) != nrow(X)");
|
|
195
|
+
// Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
|
|
196
|
+
// labels (any value strictly inside (0,1)) are supported only for the
|
|
197
|
+
// logistic-ridge path (norm="2"); the PGD path (norm="sv", with_pgd=true)
|
|
198
|
+
// is not implemented for soft labels.
|
|
199
|
+
bool soft = false;
|
|
180
200
|
for (int i = 0; i < n; ++i) {
|
|
181
|
-
if (Y[i]
|
|
182
|
-
throw std::runtime_error("pcghal_cv_classi: Y must be 0
|
|
201
|
+
if (Y[i] < -1e-12 || Y[i] > 1.0 + 1e-12) {
|
|
202
|
+
throw std::runtime_error("pcghal_cv_classi: Y must be in [0,1]");
|
|
183
203
|
}
|
|
204
|
+
if (Y[i] > 1e-12 && Y[i] < 1.0 - 1e-12) soft = true;
|
|
205
|
+
}
|
|
206
|
+
if (soft && with_pgd) {
|
|
207
|
+
throw std::runtime_error(
|
|
208
|
+
"pcghal_cv_classi: soft labels (Y in (0,1)) are not implemented for "
|
|
209
|
+
"norm='sv'; use norm='1' or norm='2'.");
|
|
184
210
|
}
|
|
185
211
|
const int L = (int)lambdas.size();
|
|
186
212
|
if (L <= 0) throw std::runtime_error("pcghal_cv_classi: lambdas must be non-empty");
|
|
@@ -198,9 +224,9 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
198
224
|
const int final_npc = compute_classi_design(X, maxdeg, npc_eff, center,
|
|
199
225
|
Xtilde, E_Nn, U_top, d_top);
|
|
200
226
|
|
|
201
|
-
//
|
|
202
|
-
|
|
203
|
-
|
|
227
|
+
// Soft target in [0,1] used throughout (the ridge/CE machinery works
|
|
228
|
+
// directly in this space; the PGD branch builds {-1,+1} locally).
|
|
229
|
+
const VectorXd& Y01 = Y;
|
|
204
230
|
|
|
205
231
|
// Degenerate case: R `hapc(family="binomial", …)` passes nfolds=1 with a
|
|
206
232
|
// single λ — there is no proper train/test split. Fit on full data and
|
|
@@ -213,7 +239,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
213
239
|
for (int j = 0; j < L; ++j) {
|
|
214
240
|
const double lam = lambdas[j];
|
|
215
241
|
OptimizerOutput full_out = logistic_full_fit(
|
|
216
|
-
|
|
242
|
+
Y01, Xtilde, E_Nn, lam, max_iter, tol, step_factor,
|
|
217
243
|
verbose, with_pgd);
|
|
218
244
|
deviances[j] = full_out.risk;
|
|
219
245
|
if (full_out.risk < best_val) {
|
|
@@ -265,19 +291,22 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
265
291
|
if (ntr == 0 || nte == 0) continue;
|
|
266
292
|
|
|
267
293
|
MatrixXd Xtr(ntr, final_npc), Xte(nte, final_npc);
|
|
268
|
-
VectorXd
|
|
294
|
+
VectorXd Ytr01(ntr), Yte01(nte);
|
|
269
295
|
for (int i = 0; i < ntr; ++i) {
|
|
270
296
|
Xtr.row(i) = Xtilde.row(tr_idx[i]);
|
|
271
|
-
|
|
297
|
+
Ytr01[i] = Y01[tr_idx[i]];
|
|
272
298
|
}
|
|
273
299
|
for (int i = 0; i < nte; ++i) {
|
|
274
300
|
Xte.row(i) = Xtilde.row(te_idx[i]);
|
|
275
|
-
Yte01[i] =
|
|
301
|
+
Yte01[i] = Y01[te_idx[i]];
|
|
276
302
|
}
|
|
277
303
|
|
|
278
|
-
VectorXd alpha0 =
|
|
304
|
+
VectorXd alpha0 = logistic_ridge_init_y01(Ytr01, Xtr, lambda);
|
|
279
305
|
VectorXd alpha_fold;
|
|
280
306
|
if (with_pgd) {
|
|
307
|
+
// Hard-label only path (soft labels rejected upstream).
|
|
308
|
+
VectorXd Ytr_pm1(ntr);
|
|
309
|
+
for (int i = 0; i < ntr; ++i) Ytr_pm1[i] = (Ytr01[i] > 0.5) ? 1.0 : -1.0;
|
|
281
310
|
OptimizerOutput out = pcghal_classi_call(Ytr_pm1, Xtr, E_Nn, alpha0,
|
|
282
311
|
max_iter, tol, step_factor,
|
|
283
312
|
verbose);
|
|
@@ -287,15 +316,13 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
287
316
|
}
|
|
288
317
|
|
|
289
318
|
VectorXd eta_tr = Xtr * alpha_fold;
|
|
290
|
-
VectorXd Ytr01(ntr);
|
|
291
|
-
for (int i = 0; i < ntr; ++i) Ytr01[i] = (Ytr_pm1[i] > 0.0) ? 1.0 : 0.0;
|
|
292
319
|
const double b0_fold = calibrate_logistic_intercept(Ytr01, eta_tr);
|
|
293
320
|
VectorXd eta = (Xte * alpha_fold).array() + b0_fold;
|
|
294
321
|
VectorXd probs = (1.0 + (-eta.array()).exp()).inverse();
|
|
295
322
|
double dev = 0.0;
|
|
296
323
|
for (int i = 0; i < nte; ++i) {
|
|
297
324
|
double pi = std::max(1e-15, std::min(1.0 - 1e-15, probs[i]));
|
|
298
|
-
dev += (Yte01[i]
|
|
325
|
+
dev += -(Yte01[i] * std::log(pi) + (1.0 - Yte01[i]) * std::log(1.0 - pi));
|
|
299
326
|
}
|
|
300
327
|
fold_error(k - 1, j) = dev / nte;
|
|
301
328
|
}
|
|
@@ -325,7 +352,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
|
|
|
325
352
|
|
|
326
353
|
// Refit on full data at best_lambda (logistic ridge ± PGD).
|
|
327
354
|
OptimizerOutput full_out = logistic_full_fit(
|
|
328
|
-
|
|
355
|
+
Y01, Xtilde, E_Nn, best_lambda,
|
|
329
356
|
max_iter, tol, step_factor, verbose, with_pgd);
|
|
330
357
|
|
|
331
358
|
// Predict on `predict_data` if supplied (else empty vector).
|
|
@@ -347,8 +347,11 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
|
|
|
347
347
|
if (Rf_length(Y_) != n) Rf_error("length(Y) must equal nrow(X).");
|
|
348
348
|
Map<const MatrixXd> X(REAL(X_), n, p);
|
|
349
349
|
Map<const VectorXd> Y01(REAL(Y_), n);
|
|
350
|
+
// Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. The
|
|
351
|
+
// logistic-ridge fit (norm="2") supports both.
|
|
350
352
|
for (int i = 0; i < n; ++i) {
|
|
351
|
-
if (Y01[i]
|
|
353
|
+
if (Y01[i] < -1e-12 || Y01[i] > 1.0 + 1e-12)
|
|
354
|
+
Rf_error("Y must be in [0,1]");
|
|
352
355
|
}
|
|
353
356
|
int maxdeg = Rf_isInteger(maxdeg_) ? INTEGER(maxdeg_)[0] : (int)REAL(maxdeg_)[0];
|
|
354
357
|
int npc = Rf_isInteger(npc_) ? INTEGER(npc_)[0] : (int)REAL(npc_)[0];
|
|
@@ -365,9 +368,6 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
|
|
|
365
368
|
const int final_npc = (int)des.d.size();
|
|
366
369
|
MatrixXd Xtilde = des.U * des.d.asDiagonal();
|
|
367
370
|
|
|
368
|
-
VectorXd Y_pm1(n);
|
|
369
|
-
for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] == 1.0) ? 1.0 : -1.0;
|
|
370
|
-
|
|
371
371
|
auto calibrate_b0 = [](const VectorXd& y01, const VectorXd& eta) {
|
|
372
372
|
double b0 = 0.0;
|
|
373
373
|
for (int it = 0; it < 50; ++it) {
|
|
@@ -381,16 +381,15 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
|
|
|
381
381
|
return b0;
|
|
382
382
|
};
|
|
383
383
|
|
|
384
|
-
VectorXd alpha =
|
|
384
|
+
VectorXd alpha = logistic_ridge_init_y01(Y01, Xtilde, lambda);
|
|
385
385
|
VectorXd eta = Xtilde * alpha;
|
|
386
386
|
const double b0 = calibrate_b0(Y01, eta);
|
|
387
|
+
// Soft cross-entropy risk (equals the {-1,+1} logistic risk on hard labels).
|
|
387
388
|
double risk = 0.0;
|
|
388
389
|
for (int i = 0; i < n; ++i) {
|
|
389
|
-
double
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
else
|
|
393
|
-
risk += -ymu + std::log1p(std::exp(ymu));
|
|
390
|
+
const double pi = 1.0 / (1.0 + std::exp(-(eta[i] + b0)));
|
|
391
|
+
const double pp = std::min(1.0 - 1e-15, std::max(1e-15, pi));
|
|
392
|
+
risk += -(Y01[i] * std::log(pp) + (1.0 - Y01[i]) * std::log(1.0 - pp));
|
|
394
393
|
}
|
|
395
394
|
risk /= n;
|
|
396
395
|
|
hapc-2.1.0/pyproject.toml
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["setuptools>=65", "wheel", "cmake>=3.15", "pybind11>=2.6"]
|
|
3
|
-
build-backend = "setuptools.build_meta"
|
|
4
|
-
|
|
5
|
-
[project]
|
|
6
|
-
name = "hapc"
|
|
7
|
-
version = "2.1.0"
|
|
8
|
-
description = "Highly Adaptive Principal Components"
|
|
9
|
-
readme = "README.md"
|
|
10
|
-
requires-python = ">=3.8"
|
|
11
|
-
authors = [
|
|
12
|
-
{name = "Carlos García Meixide", email = "cgmeixide@gmail.com"}
|
|
13
|
-
]
|
|
14
|
-
license = {text = "MIT"}
|
|
15
|
-
classifiers = [
|
|
16
|
-
"Programming Language :: Python :: 3",
|
|
17
|
-
"Programming Language :: Python :: 3.8",
|
|
18
|
-
"Programming Language :: Python :: 3.9",
|
|
19
|
-
"Programming Language :: Python :: 3.10",
|
|
20
|
-
"Programming Language :: Python :: 3.11",
|
|
21
|
-
"Programming Language :: Python :: 3.12",
|
|
22
|
-
"Operating System :: OS Independent",
|
|
23
|
-
]
|
|
24
|
-
dependencies = [
|
|
25
|
-
"numpy>=1.24,<2.3",
|
|
26
|
-
"scikit-learn>=1.0",
|
|
27
|
-
]
|
|
28
|
-
|
|
29
|
-
[project.optional-dependencies]
|
|
30
|
-
dev = ["pytest", "pytest-cov", "black", "flake8"]
|
|
31
|
-
|
|
32
|
-
[project.urls]
|
|
33
|
-
Homepage = "https://github.com/meixide/hapc"
|
|
34
|
-
Documentation = "https://github.com/meixide/hapc#readme"
|
|
35
|
-
Repository = "https://github.com/meixide/hapc.git"
|
|
36
|
-
Issues = "https://github.com/meixide/hapc/issues"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|