hapc 2.1.0__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {hapc-2.1.0 → hapc-2.3.1}/CMakeLists.txt +8 -2
  2. {hapc-2.1.0/python/hapc.egg-info → hapc-2.3.1}/PKG-INFO +29 -4
  3. {hapc-2.1.0 → hapc-2.3.1}/README.md +28 -3
  4. hapc-2.3.1/pyproject.toml +70 -0
  5. {hapc-2.1.0 → hapc-2.3.1}/python/hapc/__init__.py +1 -1
  6. {hapc-2.1.0 → hapc-2.3.1}/python/hapc/cv.py +12 -5
  7. {hapc-2.1.0 → hapc-2.3.1}/python/hapc/single.py +96 -23
  8. {hapc-2.1.0 → hapc-2.3.1/python/hapc.egg-info}/PKG-INFO +29 -4
  9. {hapc-2.1.0 → hapc-2.3.1}/setup.py +10 -0
  10. {hapc-2.1.0 → hapc-2.3.1}/src/bindings.cpp +5 -0
  11. {hapc-2.1.0 → hapc-2.3.1}/src/hapc_core.hpp +8 -1
  12. {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_cv_classi_cpp.cpp +60 -33
  13. {hapc-2.1.0 → hapc-2.3.1}/src/r_bindings.cpp +9 -10
  14. hapc-2.1.0/pyproject.toml +0 -36
  15. {hapc-2.1.0 → hapc-2.3.1}/LICENSE +0 -0
  16. {hapc-2.1.0 → hapc-2.3.1}/MANIFEST.in +0 -0
  17. {hapc-2.1.0 → hapc-2.3.1}/python/hapc/ate.py +0 -0
  18. {hapc-2.1.0 → hapc-2.3.1}/python/hapc/core.py +0 -0
  19. {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/SOURCES.txt +0 -0
  20. {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/dependency_links.txt +0 -0
  21. {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/not-zip-safe +0 -0
  22. {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/requires.txt +0 -0
  23. {hapc-2.1.0 → hapc-2.3.1}/python/hapc.egg-info/top_level.txt +0 -0
  24. {hapc-2.1.0 → hapc-2.3.1}/setup.cfg +0 -0
  25. {hapc-2.1.0 → hapc-2.3.1}/src/cross_kernel.cpp +0 -0
  26. {hapc-2.1.0 → hapc-2.3.1}/src/cv_classi.cpp +0 -0
  27. {hapc-2.1.0 → hapc-2.3.1}/src/cv_fast_pchal.cpp +0 -0
  28. {hapc-2.1.0 → hapc-2.3.1}/src/cv_fast_pchal_python.cpp +0 -0
  29. {hapc-2.1.0 → hapc-2.3.1}/src/fast_pchal.cpp +0 -0
  30. {hapc-2.1.0 → hapc-2.3.1}/src/logistic_call.cpp +0 -0
  31. {hapc-2.1.0 → hapc-2.3.1}/src/mkernel.cpp +0 -0
  32. {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_call.cpp +0 -0
  33. {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_classi_call.cpp +0 -0
  34. {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_cv.cpp +0 -0
  35. {hapc-2.1.0 → hapc-2.3.1}/src/pcghal_cv_cpp.cpp +0 -0
  36. {hapc-2.1.0 → hapc-2.3.1}/src/pchal_design.cpp +0 -0
  37. {hapc-2.1.0 → hapc-2.3.1}/src/ridge_wrappers.cpp +0 -0
  38. {hapc-2.1.0 → hapc-2.3.1}/src/single_pcghal_cpp.cpp +0 -0
  39. {hapc-2.1.0 → hapc-2.3.1}/src/single_pchar.cpp +0 -0
  40. {hapc-2.1.0 → hapc-2.3.1}/tests/test_api.py +0 -0
  41. {hapc-2.1.0 → hapc-2.3.1}/tests/test_ate.py +0 -0
  42. {hapc-2.1.0 → hapc-2.3.1}/tests/test_ate_hapc_diagnostics_example.py +0 -0
  43. {hapc-2.1.0 → hapc-2.3.1}/tests/test_core.py +0 -0
  44. {hapc-2.1.0 → hapc-2.3.1}/tests/test_logistic_regression.py +0 -0
  45. {hapc-2.1.0 → hapc-2.3.1}/tests/test_r_vs_python_alpha.py +0 -0
@@ -1,4 +1,4 @@
1
- cmake_minimum_required(VERSION 3.15)
1
+ cmake_minimum_required(VERSION 3.18)
2
2
  project(hapc)
3
3
 
4
4
  set(CMAKE_CXX_STANDARD 17)
@@ -15,7 +15,13 @@ endif()
15
15
  # Python3_EXECUTABLE from setup.py so the build always targets the *same*
16
16
  # interpreter that pip is using. Without this CMake may discover a newer/
17
17
  # older system Python and produce a .so tagged for the wrong ABI.
18
- find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
18
+ #
19
+ # Use Development.Module (headers only), NOT the full Development component:
20
+ # the latter also requires Development.Embed -> libpython, which manylinux
21
+ # images deliberately do not ship (extension modules must not link libpython).
22
+ # Requiring full Development makes the manylinux build fail with
23
+ # "Could NOT find Python3 (missing: Python3_LIBRARIES Development.Embed)".
24
+ find_package(Python3 COMPONENTS Interpreter Development.Module REQUIRED)
19
25
  message(STATUS "Python3_EXECUTABLE: ${Python3_EXECUTABLE}")
20
26
  message(STATUS "Python3_VERSION: ${Python3_VERSION}")
21
27
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hapc
3
- Version: 2.1.0
3
+ Version: 2.3.1
4
4
  Summary: Highly Adaptive Principal Components
5
5
  Home-page: https://github.com/meixide/hapc
6
6
  Author: Carlos García Meixide
@@ -51,16 +51,41 @@ A fast and flexible machine learning library for nonparametric high-dimensional
51
51
  pip install hapc
52
52
  ```
53
53
 
54
+ Prebuilt wheels are published for Linux (manylinux2014, x86_64), macOS
55
+ (Intel + Apple Silicon) and Windows, for CPython 3.8–3.12. No compiler,
56
+ CMake or Eigen is needed when a wheel is available.
57
+
58
+ ### Linux / HPC clusters
59
+
60
+ The Linux wheels use the **manylinux2014** baseline (glibc 2.17), so
61
+ `pip install hapc` works out of the box on HPC login/compute nodes —
62
+ no `conda` toolchain, `devtoolset`, or sysroot setup required:
63
+
64
+ ```bash
65
+ pip install hapc
66
+ ```
67
+
68
+ If you must build from the source distribution (niche architecture, very
69
+ old Python, or an air-gapped node), provide a C++17 compiler and either
70
+ let CMake fetch Eigen automatically (needs network) or install Eigen and
71
+ let `find_package(Eigen3)` find it:
72
+
73
+ ```bash
74
+ # with conda compilers (recommended on HPC)
75
+ conda install -c conda-forge cxx-compiler cmake eigen
76
+ pip install hapc --no-binary hapc
77
+ ```
78
+
54
79
  ### Install from GitHub (latest development version)
55
80
 
56
81
  ```bash
57
- pip install git+https://github.com/yourusername/hapc.git
82
+ pip install git+https://github.com/meixide/hapc.git
58
83
  ```
59
84
 
60
85
  Or with editable install for development:
61
86
 
62
87
  ```bash
63
- git clone https://github.com/yourusername/hapc.git
88
+ git clone https://github.com/meixide/hapc.git
64
89
  cd hapc
65
90
  pip install -e .
66
91
  ```
@@ -201,7 +226,7 @@ Cross-validation to select lambda.
201
226
  Contributions welcome! The C++ core is shared between R and Python packages.
202
227
 
203
228
  ```bash
204
- git clone https://github.com/yourusername/hapc.git
229
+ git clone https://github.com/meixide/hapc.git
205
230
  cd hapc
206
231
  pip install -e .
207
232
  pytest
@@ -17,16 +17,41 @@ A fast and flexible machine learning library for nonparametric high-dimensional
17
17
  pip install hapc
18
18
  ```
19
19
 
20
+ Prebuilt wheels are published for Linux (manylinux2014, x86_64), macOS
21
+ (Intel + Apple Silicon) and Windows, for CPython 3.8–3.12. No compiler,
22
+ CMake or Eigen is needed when a wheel is available.
23
+
24
+ ### Linux / HPC clusters
25
+
26
+ The Linux wheels use the **manylinux2014** baseline (glibc 2.17), so
27
+ `pip install hapc` works out of the box on HPC login/compute nodes —
28
+ no `conda` toolchain, `devtoolset`, or sysroot setup required:
29
+
30
+ ```bash
31
+ pip install hapc
32
+ ```
33
+
34
+ If you must build from the source distribution (niche architecture, very
35
+ old Python, or an air-gapped node), provide a C++17 compiler and either
36
+ let CMake fetch Eigen automatically (needs network) or install Eigen and
37
+ let `find_package(Eigen3)` find it:
38
+
39
+ ```bash
40
+ # with conda compilers (recommended on HPC)
41
+ conda install -c conda-forge cxx-compiler cmake eigen
42
+ pip install hapc --no-binary hapc
43
+ ```
44
+
20
45
  ### Install from GitHub (latest development version)
21
46
 
22
47
  ```bash
23
- pip install git+https://github.com/yourusername/hapc.git
48
+ pip install git+https://github.com/meixide/hapc.git
24
49
  ```
25
50
 
26
51
  Or with editable install for development:
27
52
 
28
53
  ```bash
29
- git clone https://github.com/yourusername/hapc.git
54
+ git clone https://github.com/meixide/hapc.git
30
55
  cd hapc
31
56
  pip install -e .
32
57
  ```
@@ -167,7 +192,7 @@ Cross-validation to select lambda.
167
192
  Contributions welcome! The C++ core is shared between R and Python packages.
168
193
 
169
194
  ```bash
170
- git clone https://github.com/yourusername/hapc.git
195
+ git clone https://github.com/meixide/hapc.git
171
196
  cd hapc
172
197
  pip install -e .
173
198
  pytest
@@ -0,0 +1,70 @@
1
+ [build-system]
2
+ requires = ["setuptools>=65", "wheel", "cmake>=3.18", "pybind11>=2.6"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "hapc"
7
+ version = "2.3.1"
8
+ description = "Highly Adaptive Principal Components"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ authors = [
12
+ {name = "Carlos García Meixide", email = "cgmeixide@gmail.com"}
13
+ ]
14
+ license = {text = "MIT"}
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.8",
18
+ "Programming Language :: Python :: 3.9",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Operating System :: OS Independent",
23
+ ]
24
+ dependencies = [
25
+ "numpy>=1.24,<2.3",
26
+ "scikit-learn>=1.0",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ dev = ["pytest", "pytest-cov", "black", "flake8"]
31
+
32
+ [project.urls]
33
+ Homepage = "https://github.com/meixide/hapc"
34
+ Documentation = "https://github.com/meixide/hapc#readme"
35
+ Repository = "https://github.com/meixide/hapc.git"
36
+ Issues = "https://github.com/meixide/hapc/issues"
37
+
38
+ [tool.cibuildwheel]
39
+ # Build CPython 3.8–3.12 only; skip PyPy and musl (HPC/desktop targets are glibc).
40
+ build = "cp38-* cp39-* cp310-* cp311-* cp312-*"
41
+ skip = ["pp*", "*-musllinux*"]
42
+ build-verbosity = 1
43
+ # Smoke-test every wheel: install it (pulling numpy/scikit-learn) and import the
44
+ # compiled extension. Catches wrong-ABI / unresolved-symbol wheels before publish.
45
+ test-command = "python -c \"import hapc; print(hapc.__version__)\""
46
+ # NOTE: the latest scipy (transitive via scikit-learn) no longer ships a
47
+ # manylinux2014 wheel, so the in-container smoke test would try to compile it
48
+ # from source and fail. We force scipy to resolve from a binary wheel via
49
+ # PIP_ONLY_BINARY=scipy, set in the workflow and passed into the Linux container
50
+ # (see .github/workflows/build-and-publish.yml). It must apply to the wheel
51
+ # install itself, which is why it lives in the env rather than test-requires.
52
+
53
+ [tool.cibuildwheel.linux]
54
+ archs = ["x86_64"]
55
+ # manylinux2014 -> glibc 2.17 baseline + bundled libstdc++ via auditwheel, so the
56
+ # wheel installs and runs unmodified on any HPC cluster (glibc >= 2.17), no
57
+ # compiler / conda toolchain / sysroot required.
58
+ manylinux-x86_64-image = "manylinux2014"
59
+
60
+ [tool.cibuildwheel.macos]
61
+ # Build fat universal2 wheels (x86_64 + arm64) from a single runner. CMake does
62
+ # not honour the interpreter's arch flags, so the arch is forced explicitly via
63
+ # CMAKE_ARGS below (setup.py appends $CMAKE_ARGS to the cmake invocation).
64
+ # delocate then verifies both slices are present, which is what caught the old
65
+ # single-arch-but-universal2-tagged wheels.
66
+ archs = ["universal2"]
67
+ environment = { CMAKE_ARGS = "-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64" }
68
+
69
+ [tool.cibuildwheel.windows]
70
+ archs = ["AMD64"]
@@ -19,7 +19,7 @@ Lower-level building blocks:
19
19
  - :func:`ate_hapc` — ATE estimate + Wald CI via HAPC + outcome undersmoothing.
20
20
  """
21
21
 
22
- __version__ = "2.1.0"
22
+ __version__ = "2.3.1"
23
23
 
24
24
  from .core import (
25
25
  DesignOutput,
@@ -18,7 +18,11 @@ import numpy as np
18
18
 
19
19
  from . import hapc_core
20
20
  from .core import _C, cross_kernel_hapc, design_hapc
21
- from .single import single_pcghal_classification_lasso
21
+ from .single import (
22
+ _check_binomial_labels,
23
+ _to_soft01,
24
+ single_pcghal_classification_lasso,
25
+ )
22
26
 
23
27
 
24
28
  class CVResult(NamedTuple):
@@ -376,6 +380,9 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
376
380
  if not np.all(lams > 0):
377
381
  raise ValueError("All lambdas must be > 0 for logistic LASSO.")
378
382
 
383
+ # Soft target in [0,1] used for the held-out cross-entropy deviance
384
+ # (accepts hard {0,1}/{-1,+1} or fractional EM-HAL posteriors).
385
+ q = _to_soft01(Y)
379
386
  folds = _native_folds(n, int(nfolds))
380
387
  L = lams.size
381
388
  fold_dev = np.full((int(nfolds), L), np.nan)
@@ -386,7 +393,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
386
393
  if te.size == 0 or tr.size == 0:
387
394
  continue
388
395
  Xtr, Ytr = X[tr], Y[tr]
389
- Xte, Yte = X[te], Y[te]
396
+ Xte, Yte = X[te], q[te]
390
397
 
391
398
  for j, lam in enumerate(lams):
392
399
  res = single_pcghal_classification_lasso(
@@ -395,9 +402,7 @@ def pcghal_cv_classi_lasso(X: np.ndarray, Y: np.ndarray,
395
402
  verbose=bool(verbose), max_iter=int(max_iter),
396
403
  )
397
404
  probs = np.clip(res.probabilities, 1e-15, 1 - 1e-15)
398
- yte01 = (Yte == 1).astype(np.float64) if set(np.unique(Yte).tolist()).issubset({0.0, 1.0}) \
399
- else (Yte > 0).astype(np.float64)
400
- dev = -(yte01 * np.log(probs) + (1 - yte01) * np.log(1 - probs))
405
+ dev = -(Yte * np.log(probs) + (1 - Yte) * np.log(1 - probs))
401
406
  fold_dev[k - 1, j] = float(dev.mean())
402
407
 
403
408
  deviances = np.nanmean(fold_dev, axis=0)
@@ -500,6 +505,8 @@ def cv_hapc(X: np.ndarray, Y: np.ndarray,
500
505
  lams = _grid(None, log_lambda_min, log_lambda_max, grid_length)
501
506
 
502
507
  if family == "binomial":
508
+ # Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
509
+ _check_binomial_labels(Y, norm)
503
510
  if norm in {"sv", "2"}:
504
511
  return pcghal_cv_classi(
505
512
  X, Y, max_degree=max_degree, npcs=npcs,
@@ -95,6 +95,61 @@ def _to_pm1(Y: np.ndarray, *, verbose: bool = False) -> np.ndarray:
95
95
  )
96
96
 
97
97
 
98
+ def _label_kind(Y: np.ndarray) -> str:
99
+ """Classify a binomial response vector.
100
+
101
+ Returns ``"01"`` (hard labels in ``{0,1}``), ``"pm1"`` (hard labels in
102
+ ``{-1,+1}``), or ``"soft"`` (fractional labels in ``[0,1]``, e.g. EM-HAL
103
+ E-step posteriors). Raises ``ValueError`` if any value falls outside
104
+ ``[0,1]`` and the set is not exactly ``{-1,+1}``.
105
+ """
106
+ Y = np.asarray(Y, dtype=np.float64).ravel()
107
+ u = np.unique(Y[~np.isnan(Y)])
108
+ s = set(u.tolist())
109
+ if s.issubset({0.0, 1.0}):
110
+ return "01"
111
+ if s == {-1.0, 1.0}:
112
+ return "pm1"
113
+ if u.size and u.min() >= 0.0 and u.max() <= 1.0:
114
+ return "soft"
115
+ raise ValueError(
116
+ "family='binomial' requires Y in {0,1}, {-1,+1}, or soft labels in "
117
+ "[0,1]; found values outside [0,1]."
118
+ )
119
+
120
+
121
+ def _to_soft01(Y: np.ndarray) -> np.ndarray:
122
+ """Map a binomial response to a soft cross-entropy target in ``[0,1]``."""
123
+ Y = np.asarray(Y, dtype=np.float64).ravel()
124
+ return (Y + 1.0) / 2.0 if _label_kind(Y) == "pm1" else Y
125
+
126
+
127
+ def _check_binomial_labels(Y: np.ndarray, norm: str) -> str:
128
+ """Validate labels and enforce the soft-label norm restriction.
129
+
130
+ Soft labels (any value strictly inside ``(0,1)``) are supported only for
131
+ ``norm`` in ``{"1","2"}``; ``norm="sv"`` raises ``NotImplementedError``.
132
+ A warning is emitted whenever soft labels are detected. Returns the label
133
+ kind from :func:`_label_kind`.
134
+ """
135
+ import warnings
136
+
137
+ kind = _label_kind(Y)
138
+ if kind == "soft":
139
+ if norm == "sv":
140
+ raise NotImplementedError(
141
+ "Soft labels (Y in (0,1)) are not implemented for norm='sv'; "
142
+ "use norm='1' or norm='2'."
143
+ )
144
+ warnings.warn(
145
+ "Non-binary labels detected in Y: treating them as soft labels in "
146
+ "[0,1] (cross-entropy target). Supported only for norm='1' and "
147
+ "norm='2'.",
148
+ stacklevel=2,
149
+ )
150
+ return kind
151
+
152
+
98
153
  def _calibrate_logistic_intercept(y01: np.ndarray, eta: np.ndarray) -> float:
99
154
  """Newton calibration for intercept with fixed linear predictor ``eta``."""
100
155
  y01 = np.asarray(y01, dtype=np.float64).ravel()
@@ -367,24 +422,21 @@ def single_pcghal_classification_ridge_only(
367
422
  SinglePcghalClassificationResult
368
423
  """
369
424
  X, Y, n, p = _check_xy(X, Y)
370
- Y_pm1 = _to_pm1(Y, verbose=verbose)
425
+ # Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
426
+ y01 = _to_soft01(Y)
371
427
 
372
428
  des = design_hapc(X, max_degree, npcs, center=center)
373
429
  final_npc = des.d.shape[0]
374
430
  Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
375
431
 
376
432
  alpha = np.asarray(
377
- hapc_core.logistic_ridge_init(_C(Y_pm1), _C(Xtilde), float(lambda_))
433
+ hapc_core.logistic_ridge_init_y01(_C(y01), _C(Xtilde), float(lambda_))
378
434
  ).ravel()
379
435
 
380
436
  eta = Xtilde @ alpha
381
- y01 = (Y_pm1 > 0).astype(np.float64)
382
437
  b0 = _calibrate_logistic_intercept(y01, eta)
383
- ymu = Y_pm1 * (eta + b0)
384
- risk = float(
385
- np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu)))
386
- .mean()
387
- )
438
+ phat = np.clip(1.0 / (1.0 + np.exp(-(eta + b0))), 1e-15, 1 - 1e-15)
439
+ risk = float((-(y01 * np.log(phat) + (1 - y01) * np.log(1 - phat))).mean())
388
440
 
389
441
  predictions = probabilities = predicted_classes = None
390
442
  if predict is not None:
@@ -480,13 +532,26 @@ def single_pcghal_classification_lasso(
480
532
  raise ValueError(f"lambda_ must be > 0 for LASSO; got {lambda_}")
481
533
 
482
534
  X, Y, n, p = _check_xy(X, Y)
483
- Y_pm1 = _to_pm1(Y, verbose=verbose)
484
- Y_01 = (Y_pm1 > 0).astype(np.int64)
535
+ # Accept hard {0,1}/{-1,+1} or soft [0,1] labels (cross-entropy target).
536
+ q = _to_soft01(Y)
485
537
 
486
538
  des = design_hapc(X, max_degree, npcs, center=center)
487
539
  final_npc = des.d.shape[0]
488
540
  Xtilde = des.U[:, :final_npc] * des.d[:final_npc]
489
541
 
542
+ # For soft labels, replicate each row as a (label=1, weight=q) and
543
+ # (label=0, weight=1-q) pair so the sample-weighted logistic loss equals
544
+ # the soft cross-entropy. On hard labels this reduces to the plain fit.
545
+ is_soft = bool(np.any((q > 1e-12) & (q < 1.0 - 1e-12)))
546
+ if is_soft:
547
+ Xfit = _C(np.vstack([Xtilde, Xtilde]))
548
+ yfit = np.concatenate([np.ones(n), np.zeros(n)]).astype(np.int64)
549
+ wfit = np.concatenate([q, 1.0 - q]).astype(np.float64)
550
+ else:
551
+ Xfit = _C(Xtilde)
552
+ yfit = (q > 0.5).astype(np.int64)
553
+ wfit = None
554
+
490
555
  C = 1.0 / (n * float(lambda_))
491
556
  # sklearn>=1.8 deprecated penalty="l1" in favour of l1_ratio=1 with the
492
557
  # liblinear solver; older versions still need penalty="l1". Try the new
@@ -495,24 +560,28 @@ def single_pcghal_classification_lasso(
495
560
  sig_params = inspect.signature(LogisticRegression).parameters
496
561
  common_kw = dict(solver="liblinear", C=C, fit_intercept=False,
497
562
  max_iter=int(max_iter))
563
+
564
+ def _fit(**ctor):
565
+ m = LogisticRegression(**ctor, **common_kw)
566
+ if wfit is None:
567
+ m.fit(Xfit, yfit)
568
+ else:
569
+ m.fit(Xfit, yfit, sample_weight=wfit)
570
+ return m
571
+
498
572
  if "l1_ratio" in sig_params and "penalty" in sig_params:
499
573
  try:
500
- model = LogisticRegression(l1_ratio=1.0, **common_kw)
501
- model.fit(_C(Xtilde), Y_01)
574
+ model = _fit(l1_ratio=1.0)
502
575
  except (TypeError, ValueError):
503
- model = LogisticRegression(penalty="l1", **common_kw)
504
- model.fit(_C(Xtilde), Y_01)
576
+ model = _fit(penalty="l1")
505
577
  else: # pragma: no cover (very old sklearn)
506
- model = LogisticRegression(penalty="l1", **common_kw)
507
- model.fit(_C(Xtilde), Y_01)
578
+ model = _fit(penalty="l1")
508
579
  alpha = np.asarray(model.coef_, dtype=np.float64).ravel()
509
- b0 = _calibrate_logistic_intercept(Y_01.astype(np.float64), Xtilde @ alpha)
580
+ b0 = _calibrate_logistic_intercept(q, Xtilde @ alpha)
510
581
 
511
582
  eta = Xtilde @ alpha + b0
512
- ymu = Y_pm1 * eta
513
- risk = float(
514
- np.where(ymu > 0, np.log1p(np.exp(-ymu)), -ymu + np.log1p(np.exp(ymu))).mean()
515
- )
583
+ phat = np.clip(1.0 / (1.0 + np.exp(-eta)), 1e-15, 1 - 1e-15)
584
+ risk = float((-(q * np.log(phat) + (1 - q) * np.log(1 - phat))).mean())
516
585
 
517
586
  predictions = probabilities = predicted_classes = None
518
587
  if predict is not None:
@@ -560,8 +629,10 @@ def hapc(X: np.ndarray, Y: np.ndarray,
560
629
  X : np.ndarray, shape (n, p)
561
630
  Features.
562
631
  Y : np.ndarray, shape (n,)
563
- Response. For ``family="binomial"`` must contain only ``{0,1}`` or
564
- ``{-1,+1}``.
632
+ Response. For ``family="binomial"``: hard labels in ``{0,1}`` or
633
+ ``{-1,+1}``, or soft labels in ``[0,1]`` (e.g. EM-HAL E-step
634
+ posteriors). Soft labels are supported only for ``norm`` in
635
+ ``{"1","2"}``; ``norm="sv"`` requires hard labels.
565
636
  family : {"gaussian", "binomial"}, default "gaussian"
566
637
  Loss family.
567
638
  max_degree : int, default 1
@@ -617,6 +688,8 @@ def hapc(X: np.ndarray, Y: np.ndarray,
617
688
  npcs = int(X.shape[0])
618
689
 
619
690
  if family == "binomial":
691
+ # Validate labels; allow soft labels in [0,1] only for norm in {"1","2"}.
692
+ _check_binomial_labels(Y, norm)
620
693
  if norm == "sv":
621
694
  return single_pcghal_classification(
622
695
  X, Y, max_degree, npcs, lambda_,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hapc
3
- Version: 2.1.0
3
+ Version: 2.3.1
4
4
  Summary: Highly Adaptive Principal Components
5
5
  Home-page: https://github.com/meixide/hapc
6
6
  Author: Carlos García Meixide
@@ -51,16 +51,41 @@ A fast and flexible machine learning library for nonparametric high-dimensional
51
51
  pip install hapc
52
52
  ```
53
53
 
54
+ Prebuilt wheels are published for Linux (manylinux2014, x86_64), macOS
55
+ (Intel + Apple Silicon) and Windows, for CPython 3.8–3.12. No compiler,
56
+ CMake or Eigen is needed when a wheel is available.
57
+
58
+ ### Linux / HPC clusters
59
+
60
+ The Linux wheels use the **manylinux2014** baseline (glibc 2.17), so
61
+ `pip install hapc` works out of the box on HPC login/compute nodes —
62
+ no `conda` toolchain, `devtoolset`, or sysroot setup required:
63
+
64
+ ```bash
65
+ pip install hapc
66
+ ```
67
+
68
+ If you must build from the source distribution (niche architecture, very
69
+ old Python, or an air-gapped node), provide a C++17 compiler and either
70
+ let CMake fetch Eigen automatically (needs network) or install Eigen and
71
+ let `find_package(Eigen3)` find it:
72
+
73
+ ```bash
74
+ # with conda compilers (recommended on HPC)
75
+ conda install -c conda-forge cxx-compiler cmake eigen
76
+ pip install hapc --no-binary hapc
77
+ ```
78
+
54
79
  ### Install from GitHub (latest development version)
55
80
 
56
81
  ```bash
57
- pip install git+https://github.com/yourusername/hapc.git
82
+ pip install git+https://github.com/meixide/hapc.git
58
83
  ```
59
84
 
60
85
  Or with editable install for development:
61
86
 
62
87
  ```bash
63
- git clone https://github.com/yourusername/hapc.git
88
+ git clone https://github.com/meixide/hapc.git
64
89
  cd hapc
65
90
  pip install -e .
66
91
  ```
@@ -201,7 +226,7 @@ Cross-validation to select lambda.
201
226
  Contributions welcome! The C++ core is shared between R and Python packages.
202
227
 
203
228
  ```bash
204
- git clone https://github.com/yourusername/hapc.git
229
+ git clone https://github.com/meixide/hapc.git
205
230
  cd hapc
206
231
  pip install -e .
207
232
  pytest
@@ -3,6 +3,7 @@
3
3
  from setuptools import setup, find_packages, Extension
4
4
  from setuptools.command.build_ext import build_ext
5
5
  import os
6
+ import shlex
6
7
  import subprocess
7
8
  import sys
8
9
  from pathlib import Path
@@ -44,6 +45,15 @@ class CMakeBuild(build_ext):
44
45
  build_args = ['--config', cfg]
45
46
 
46
47
  cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
48
+
49
+ # Honour the conventional CMAKE_ARGS env var (set by cibuildwheel/conda).
50
+ # Used to force universal2 macOS builds via
51
+ # CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64;x86_64", which CMake cannot
52
+ # infer from the (single-arch) build interpreter on its own.
53
+ extra_cmake_args = os.environ.get('CMAKE_ARGS')
54
+ if extra_cmake_args:
55
+ cmake_args += shlex.split(extra_cmake_args)
56
+
47
57
  # Add parallel build flag only on non-Windows platforms
48
58
  # On Windows, MSBuild doesn't support -j flag and handles parallelization automatically
49
59
  if sys.platform != 'win32':
@@ -117,4 +117,9 @@ PYBIND11_MODULE(hapc_core, m) {
117
117
 
118
118
  m.def("logistic_ridge_init", &logistic_ridge_init,
119
119
  py::arg("Y"), py::arg("X"), py::arg("lambda"));
120
+
121
+ // Soft-label logistic ridge initialiser: target Y may be any value in
122
+ // [0,1] (hard {0,1} labels or fractional EM-HAL E-step posteriors).
123
+ m.def("logistic_ridge_init_y01", &logistic_ridge_init_y01,
124
+ py::arg("Y"), py::arg("X"), py::arg("lambda"));
120
125
  }
@@ -91,6 +91,11 @@ FastCVOutput fasthal_cv_python(const MatrixXd& X, const VectorXd& Y, int npc,
91
91
  // (internally multiplied by n, matching logistic_call).
92
92
  VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda);
93
93
 
94
+ // Soft-label variant: target `y01` may take any value in [0, 1] (hard {0,1}
95
+ // labels or fractional EM-HAL E-step posteriors). On hard {0,1} inputs the
96
+ // result is identical to logistic_ridge_init. lambda has the same scaling.
97
+ VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda);
98
+
94
99
  // Cross-validation output for binomial (logistic) HAPC.
95
100
  struct CVClassiOutput {
96
101
  std::vector<double> deviances;
@@ -101,7 +106,9 @@ struct CVClassiOutput {
101
106
  };
102
107
 
103
108
  // Python-friendly binomial CV (mirrors R `pchal_cv_classi_call`).
104
- // Y must contain only 0 or 1 values.
109
+ // Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
110
+ // labels are supported only when with_pgd == false (norm="2"); with_pgd ==
111
+ // true (norm="sv") rejects soft labels.
105
112
  //
106
113
  // When `with_pgd == true` (default): per fold runs logistic-ridge initialiser
107
114
  // followed by projected gradient descent on logistic loss (norm="sv").
@@ -28,10 +28,15 @@
28
28
  // rule `beta := delta_beta` (i.e. solving the full normal equation each
29
29
  // iteration, treating the IRLS working response as the regression target).
30
30
  // ---------------------------------------------------------------------------
31
- VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda) {
31
+ // Soft-label logistic ridge. The target `y01` may take any value in [0, 1]:
32
+ // hard {0,1} labels or fractional EM-HAL E-step posteriors. The IRLS update
33
+ // is unchanged; fractional targets are standard for cross-entropy
34
+ // minimisation, so on hard {0,1} inputs the result is bit-identical to the
35
+ // former {-1,+1} implementation.
36
+ VectorXd logistic_ridge_init_y01(const VectorXd& y01, const MatrixXd& X, double lambda) {
32
37
  const int n = X.rows();
33
38
  const int p = X.cols();
34
- if (Y_pm1.size() != n) {
39
+ if (y01.size() != n) {
35
40
  throw std::runtime_error("logistic_ridge_init: Y length must match nrow(X).");
36
41
  }
37
42
  // Match logistic_call: lambda is multiplied by n internally.
@@ -39,12 +44,6 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
39
44
  const int max_iter = 100;
40
45
  const double tol = 1e-8;
41
46
 
42
- // logistic_call expects Y in {-1,+1} but treats it via the GLM update with
43
- // the {0,1} working response. We replicate that behaviour exactly: convert
44
- // back to a {0,1} response y01 = (Y_pm1 + 1) / 2 to compute mu/working z.
45
- VectorXd y01(n);
46
- for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
47
-
48
47
  VectorXd beta = VectorXd::Zero(p);
49
48
  for (int iter = 0; iter < max_iter; ++iter) {
50
49
  VectorXd eta = X * beta;
@@ -66,6 +65,15 @@ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double la
66
65
  return beta;
67
66
  }
68
67
 
68
+ // Backward-compatible wrapper: accepts Y in {-1,+1} and converts to {0,1}.
69
+ // Used by the PGD (norm="sv") single-fit path, which is hard-label only.
70
+ VectorXd logistic_ridge_init(const VectorXd& Y_pm1, const MatrixXd& X, double lambda) {
71
+ const int n = X.rows();
72
+ VectorXd y01(n);
73
+ for (int i = 0; i < n; ++i) y01[i] = (Y_pm1[i] > 0) ? 1.0 : 0.0;
74
+ return logistic_ridge_init_y01(y01, X, lambda);
75
+ }
76
+
69
77
  static double calibrate_logistic_intercept(const VectorXd& Y01,
70
78
  const VectorXd& eta) {
71
79
  const int n = (int)Y01.size();
@@ -84,16 +92,20 @@ static double calibrate_logistic_intercept(const VectorXd& Y01,
84
92
  return b0;
85
93
  }
86
94
 
87
- static double logistic_risk_pm1(const VectorXd& Y_pm1, const VectorXd& eta) {
88
- const int n = (int)Y_pm1.size();
95
+ // Soft cross-entropy risk for fractional targets y01 in [0,1], given a linear
96
+ // predictor `eta` (intercept already folded in). On hard {0,1} labels this
97
+ // equals the former {-1,+1} logistic risk, so behaviour is unchanged on
98
+ // binary inputs.
99
+ static double logistic_risk_y01(const VectorXd& y01, const VectorXd& eta) {
100
+ const int n = (int)y01.size();
89
101
  if (eta.size() != n) {
90
- throw std::runtime_error("logistic_risk_pm1: length mismatch");
102
+ throw std::runtime_error("logistic_risk_y01: length mismatch");
91
103
  }
92
104
  double risk = 0.0;
93
105
  for (int i = 0; i < n; ++i) {
94
- const double ymu = Y_pm1[i] * eta[i];
95
- risk += (ymu > 0) ? std::log1p(std::exp(-ymu))
96
- : -ymu + std::log1p(std::exp(ymu));
106
+ const double pi = 1.0 / (1.0 + std::exp(-eta[i]));
107
+ const double p = std::min(1.0 - 1e-15, std::max(1e-15, pi));
108
+ risk += -(y01[i] * std::log(p) + (1.0 - y01[i]) * std::log(1.0 - p));
97
109
  }
98
110
  return risk / n;
99
111
  }
@@ -136,28 +148,31 @@ static std::vector<int> make_folds(int n, int K) {
136
148
  // for the post-CV refit). When `with_pgd == false`, returns the logistic-ridge
137
149
  // initialiser α directly with its training logistic risk; otherwise runs the
138
150
  // PGD step on top of it (norm="sv").
139
- static OptimizerOutput logistic_full_fit(const VectorXd& Y_pm1,
151
+ static OptimizerOutput logistic_full_fit(const VectorXd& Y01,
140
152
  const MatrixXd& Xtilde,
141
153
  const MatrixXd& E_Nn,
142
154
  double lambda,
143
155
  int max_iter, double tol,
144
156
  double step_factor, bool verbose,
145
157
  bool with_pgd) {
146
- VectorXd alpha0 = logistic_ridge_init(Y_pm1, Xtilde, lambda);
158
+ VectorXd alpha0 = logistic_ridge_init_y01(Y01, Xtilde, lambda);
147
159
  const int n = Xtilde.rows();
148
160
  VectorXd alpha_fit;
149
161
  if (with_pgd) {
162
+ // PGD (norm="sv") uses the {-1,+1} logistic loss and is reached only
163
+ // for hard labels (soft labels are rejected upstream), so thresholding
164
+ // at 0.5 recovers the exact {-1,+1} encoding.
165
+ VectorXd Y_pm1(n);
166
+ for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] > 0.5) ? 1.0 : -1.0;
150
167
  OptimizerOutput out = pcghal_classi_call(Y_pm1, Xtilde, E_Nn, alpha0,
151
168
  max_iter, tol, step_factor, verbose);
152
169
  alpha_fit = out.alpha;
153
170
  } else {
154
171
  alpha_fit = alpha0; // logistic ridge only (norm="2")
155
172
  }
156
- VectorXd Y01(n);
157
- for (int i = 0; i < n; ++i) Y01[i] = (Y_pm1[i] > 0.0) ? 1.0 : 0.0;
158
173
  VectorXd eta = Xtilde * alpha_fit;
159
174
  const double b0 = calibrate_logistic_intercept(Y01, eta);
160
- const double risk = logistic_risk_pm1(Y_pm1, eta.array() + b0);
175
+ const double risk = logistic_risk_y01(Y01, eta.array() + b0);
161
176
  OptimizerOutput out;
162
177
  out.alpha = alpha_fit;
163
178
  out.alphaiters = MatrixXd::Zero(0, alpha_fit.size());
@@ -177,10 +192,21 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
177
192
  const int n = X.rows();
178
193
  const int p = X.cols();
179
194
  if (Y.size() != n) throw std::runtime_error("pcghal_cv_classi: length(Y) != nrow(X)");
195
+ // Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. Soft
196
+ // labels (any value strictly inside (0,1)) are supported only for the
197
+ // logistic-ridge path (norm="2"); the PGD path (norm="sv", with_pgd=true)
198
+ // is not implemented for soft labels.
199
+ bool soft = false;
180
200
  for (int i = 0; i < n; ++i) {
181
- if (Y[i] != 0.0 && Y[i] != 1.0) {
182
- throw std::runtime_error("pcghal_cv_classi: Y must be 0/1");
201
+ if (Y[i] < -1e-12 || Y[i] > 1.0 + 1e-12) {
202
+ throw std::runtime_error("pcghal_cv_classi: Y must be in [0,1]");
183
203
  }
204
+ if (Y[i] > 1e-12 && Y[i] < 1.0 - 1e-12) soft = true;
205
+ }
206
+ if (soft && with_pgd) {
207
+ throw std::runtime_error(
208
+ "pcghal_cv_classi: soft labels (Y in (0,1)) are not implemented for "
209
+ "norm='sv'; use norm='1' or norm='2'.");
184
210
  }
185
211
  const int L = (int)lambdas.size();
186
212
  if (L <= 0) throw std::runtime_error("pcghal_cv_classi: lambdas must be non-empty");
@@ -198,9 +224,9 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
198
224
  const int final_npc = compute_classi_design(X, maxdeg, npc_eff, center,
199
225
  Xtilde, E_Nn, U_top, d_top);
200
226
 
201
- // Y in {-1,+1} for the optimiser
202
- VectorXd Y_pm1(n);
203
- for (int i = 0; i < n; ++i) Y_pm1[i] = (Y[i] == 1.0) ? 1.0 : -1.0;
227
+ // Soft target in [0,1] used throughout (the ridge/CE machinery works
228
+ // directly in this space; the PGD branch builds {-1,+1} locally).
229
+ const VectorXd& Y01 = Y;
204
230
 
205
231
  // Degenerate case: R `hapc(family="binomial", …)` passes nfolds=1 with a
206
232
  // single λ — there is no proper train/test split. Fit on full data and
@@ -213,7 +239,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
213
239
  for (int j = 0; j < L; ++j) {
214
240
  const double lam = lambdas[j];
215
241
  OptimizerOutput full_out = logistic_full_fit(
216
- Y_pm1, Xtilde, E_Nn, lam, max_iter, tol, step_factor,
242
+ Y01, Xtilde, E_Nn, lam, max_iter, tol, step_factor,
217
243
  verbose, with_pgd);
218
244
  deviances[j] = full_out.risk;
219
245
  if (full_out.risk < best_val) {
@@ -265,19 +291,22 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
265
291
  if (ntr == 0 || nte == 0) continue;
266
292
 
267
293
  MatrixXd Xtr(ntr, final_npc), Xte(nte, final_npc);
268
- VectorXd Ytr_pm1(ntr), Yte01(nte);
294
+ VectorXd Ytr01(ntr), Yte01(nte);
269
295
  for (int i = 0; i < ntr; ++i) {
270
296
  Xtr.row(i) = Xtilde.row(tr_idx[i]);
271
- Ytr_pm1[i] = Y_pm1[tr_idx[i]];
297
+ Ytr01[i] = Y01[tr_idx[i]];
272
298
  }
273
299
  for (int i = 0; i < nte; ++i) {
274
300
  Xte.row(i) = Xtilde.row(te_idx[i]);
275
- Yte01[i] = Y[te_idx[i]];
301
+ Yte01[i] = Y01[te_idx[i]];
276
302
  }
277
303
 
278
- VectorXd alpha0 = logistic_ridge_init(Ytr_pm1, Xtr, lambda);
304
+ VectorXd alpha0 = logistic_ridge_init_y01(Ytr01, Xtr, lambda);
279
305
  VectorXd alpha_fold;
280
306
  if (with_pgd) {
307
+ // Hard-label only path (soft labels rejected upstream).
308
+ VectorXd Ytr_pm1(ntr);
309
+ for (int i = 0; i < ntr; ++i) Ytr_pm1[i] = (Ytr01[i] > 0.5) ? 1.0 : -1.0;
281
310
  OptimizerOutput out = pcghal_classi_call(Ytr_pm1, Xtr, E_Nn, alpha0,
282
311
  max_iter, tol, step_factor,
283
312
  verbose);
@@ -287,15 +316,13 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
287
316
  }
288
317
 
289
318
  VectorXd eta_tr = Xtr * alpha_fold;
290
- VectorXd Ytr01(ntr);
291
- for (int i = 0; i < ntr; ++i) Ytr01[i] = (Ytr_pm1[i] > 0.0) ? 1.0 : 0.0;
292
319
  const double b0_fold = calibrate_logistic_intercept(Ytr01, eta_tr);
293
320
  VectorXd eta = (Xte * alpha_fold).array() + b0_fold;
294
321
  VectorXd probs = (1.0 + (-eta.array()).exp()).inverse();
295
322
  double dev = 0.0;
296
323
  for (int i = 0; i < nte; ++i) {
297
324
  double pi = std::max(1e-15, std::min(1.0 - 1e-15, probs[i]));
298
- dev += (Yte01[i] == 1.0) ? -std::log(pi) : -std::log(1.0 - pi);
325
+ dev += -(Yte01[i] * std::log(pi) + (1.0 - Yte01[i]) * std::log(1.0 - pi));
299
326
  }
300
327
  fold_error(k - 1, j) = dev / nte;
301
328
  }
@@ -325,7 +352,7 @@ CVClassiOutput pcghal_cv_classi_python(const MatrixXd& X, const VectorXd& Y,
325
352
 
326
353
  // Refit on full data at best_lambda (logistic ridge ± PGD).
327
354
  OptimizerOutput full_out = logistic_full_fit(
328
- Y_pm1, Xtilde, E_Nn, best_lambda,
355
+ Y01, Xtilde, E_Nn, best_lambda,
329
356
  max_iter, tol, step_factor, verbose, with_pgd);
330
357
 
331
358
  // Predict on `predict_data` if supplied (else empty vector).
@@ -347,8 +347,11 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
347
347
  if (Rf_length(Y_) != n) Rf_error("length(Y) must equal nrow(X).");
348
348
  Map<const MatrixXd> X(REAL(X_), n, p);
349
349
  Map<const VectorXd> Y01(REAL(Y_), n);
350
+ // Y must lie in [0,1]: hard {0,1} labels or soft EM-HAL posteriors. The
351
+ // logistic-ridge fit (norm="2") supports both.
350
352
  for (int i = 0; i < n; ++i) {
351
- if (Y01[i] != 0.0 && Y01[i] != 1.0) Rf_error("Y must contain only 0 and 1");
353
+ if (Y01[i] < -1e-12 || Y01[i] > 1.0 + 1e-12)
354
+ Rf_error("Y must be in [0,1]");
352
355
  }
353
356
  int maxdeg = Rf_isInteger(maxdeg_) ? INTEGER(maxdeg_)[0] : (int)REAL(maxdeg_)[0];
354
357
  int npc = Rf_isInteger(npc_) ? INTEGER(npc_)[0] : (int)REAL(npc_)[0];
@@ -365,9 +368,6 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
365
368
  const int final_npc = (int)des.d.size();
366
369
  MatrixXd Xtilde = des.U * des.d.asDiagonal();
367
370
 
368
- VectorXd Y_pm1(n);
369
- for (int i = 0; i < n; ++i) Y_pm1[i] = (Y01[i] == 1.0) ? 1.0 : -1.0;
370
-
371
371
  auto calibrate_b0 = [](const VectorXd& y01, const VectorXd& eta) {
372
372
  double b0 = 0.0;
373
373
  for (int it = 0; it < 50; ++it) {
@@ -381,16 +381,15 @@ extern "C" SEXP single_pcghal_classi_ridge_call(SEXP X_, SEXP Y_, SEXP maxdeg_,
381
381
  return b0;
382
382
  };
383
383
 
384
- VectorXd alpha = logistic_ridge_init(Y_pm1, Xtilde, lambda);
384
+ VectorXd alpha = logistic_ridge_init_y01(Y01, Xtilde, lambda);
385
385
  VectorXd eta = Xtilde * alpha;
386
386
  const double b0 = calibrate_b0(Y01, eta);
387
+ // Soft cross-entropy risk (equals the {-1,+1} logistic risk on hard labels).
387
388
  double risk = 0.0;
388
389
  for (int i = 0; i < n; ++i) {
389
- double ymu = Y_pm1[i] * (eta[i] + b0);
390
- if (ymu > 0)
391
- risk += std::log1p(std::exp(-ymu));
392
- else
393
- risk += -ymu + std::log1p(std::exp(ymu));
390
+ const double pi = 1.0 / (1.0 + std::exp(-(eta[i] + b0)));
391
+ const double pp = std::min(1.0 - 1e-15, std::max(1e-15, pi));
392
+ risk += -(Y01[i] * std::log(pp) + (1.0 - Y01[i]) * std::log(1.0 - pp));
394
393
  }
395
394
  risk /= n;
396
395
 
hapc-2.1.0/pyproject.toml DELETED
@@ -1,36 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools>=65", "wheel", "cmake>=3.15", "pybind11>=2.6"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "hapc"
7
- version = "2.1.0"
8
- description = "Highly Adaptive Principal Components"
9
- readme = "README.md"
10
- requires-python = ">=3.8"
11
- authors = [
12
- {name = "Carlos García Meixide", email = "cgmeixide@gmail.com"}
13
- ]
14
- license = {text = "MIT"}
15
- classifiers = [
16
- "Programming Language :: Python :: 3",
17
- "Programming Language :: Python :: 3.8",
18
- "Programming Language :: Python :: 3.9",
19
- "Programming Language :: Python :: 3.10",
20
- "Programming Language :: Python :: 3.11",
21
- "Programming Language :: Python :: 3.12",
22
- "Operating System :: OS Independent",
23
- ]
24
- dependencies = [
25
- "numpy>=1.24,<2.3",
26
- "scikit-learn>=1.0",
27
- ]
28
-
29
- [project.optional-dependencies]
30
- dev = ["pytest", "pytest-cov", "black", "flake8"]
31
-
32
- [project.urls]
33
- Homepage = "https://github.com/meixide/hapc"
34
- Documentation = "https://github.com/meixide/hapc#readme"
35
- Repository = "https://github.com/meixide/hapc.git"
36
- Issues = "https://github.com/meixide/hapc/issues"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes