knn-engine-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- knn_engine_core-0.1.0/.github/workflows/publish.yml +59 -0
- knn_engine_core-0.1.0/.gitignore +36 -0
- knn_engine_core-0.1.0/CMakeLists.txt +47 -0
- knn_engine_core-0.1.0/CMakePresets.json +27 -0
- knn_engine_core-0.1.0/PKG-INFO +10 -0
- knn_engine_core-0.1.0/README.md +1 -0
- knn_engine_core-0.1.0/include/KNN.hpp +21 -0
- knn_engine_core-0.1.0/include/KNNEngine.hpp +29 -0
- knn_engine_core-0.1.0/include/PCA.hpp +24 -0
- knn_engine_core-0.1.0/pyproject.toml +17 -0
- knn_engine_core-0.1.0/script.py +34 -0
- knn_engine_core-0.1.0/src/KNN.cpp +40 -0
- knn_engine_core-0.1.0/src/KNNEngine.cpp +34 -0
- knn_engine_core-0.1.0/src/PCA.cpp +88 -0
- knn_engine_core-0.1.0/src/binding.cpp +23 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
name: Build and Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types:
|
|
6
|
+
- published
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build_wheels:
|
|
10
|
+
name: Build wheels on ${{ matrix.os }}
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Build wheels
|
|
20
|
+
uses: pypa/cibuildwheel@v2.17.0
|
|
21
|
+
|
|
22
|
+
- uses: actions/upload-artifact@v4
|
|
23
|
+
with:
|
|
24
|
+
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
|
|
25
|
+
path: ./wheelhouse/*.whl
|
|
26
|
+
|
|
27
|
+
build_sdist:
|
|
28
|
+
name: Build source distribution
|
|
29
|
+
runs-on: ubuntu-latest
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/checkout@v4
|
|
32
|
+
|
|
33
|
+
- name: Build sdist
|
|
34
|
+
run: pipx run build --sdist
|
|
35
|
+
|
|
36
|
+
- uses: actions/upload-artifact@v4
|
|
37
|
+
with:
|
|
38
|
+
name: cibw-sdist
|
|
39
|
+
path: dist/*.tar.gz
|
|
40
|
+
|
|
41
|
+
publish-to-pypi:
|
|
42
|
+
name: Publish Python distribution to PyPI
|
|
43
|
+
needs: [build_wheels, build_sdist]
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
environment:
|
|
46
|
+
name: pypi
|
|
47
|
+
url: https://pypi.org/p/knn-engine-core
|
|
48
|
+
permissions:
|
|
49
|
+
id-token: write
|
|
50
|
+
steps:
|
|
51
|
+
- name: Download all the dists
|
|
52
|
+
uses: actions/download-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
pattern: cibw-*
|
|
55
|
+
path: dist
|
|
56
|
+
merge-multiple: true
|
|
57
|
+
|
|
58
|
+
- name: Publish distribution to PyPI
|
|
59
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# --- Build Artifacts ---
|
|
2
|
+
build/
|
|
3
|
+
out/
|
|
4
|
+
bin/
|
|
5
|
+
obj/
|
|
6
|
+
*.so
|
|
7
|
+
*.a
|
|
8
|
+
*.la
|
|
9
|
+
*.o
|
|
10
|
+
*.obj
|
|
11
|
+
|
|
12
|
+
# --- Python ---
|
|
13
|
+
__pycache__/
|
|
14
|
+
*.py[cod]
|
|
15
|
+
*$py.class
|
|
16
|
+
.venv/
|
|
17
|
+
venv/
|
|
18
|
+
ENV/
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
|
|
21
|
+
# --- IDEs and Editors ---
|
|
22
|
+
.vscode/
|
|
23
|
+
.idea/
|
|
24
|
+
*.swp
|
|
25
|
+
*.swo
|
|
26
|
+
.clangd/
|
|
27
|
+
compile_commands.json
|
|
28
|
+
|
|
29
|
+
# --- Data & Logs ---
|
|
30
|
+
data/*.csv
|
|
31
|
+
!data/README.md
|
|
32
|
+
*.log
|
|
33
|
+
|
|
34
|
+
# --- CMake ---
|
|
35
|
+
CMakeUserPresets.json
|
|
36
|
+
_deps/
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.18)
|
|
2
|
+
|
|
3
|
+
if(POLICY CMP0135)
|
|
4
|
+
cmake_policy(SET CMP0135 NEW)
|
|
5
|
+
endif()
|
|
6
|
+
|
|
7
|
+
if(POLICY CMP0169)
|
|
8
|
+
cmake_policy(SET CMP0169 OLD)
|
|
9
|
+
endif()
|
|
10
|
+
|
|
11
|
+
project(KNN_ENGINE LANGUAGES CXX)
|
|
12
|
+
|
|
13
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
14
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
15
|
+
|
|
16
|
+
include(FetchContent)
|
|
17
|
+
|
|
18
|
+
FetchContent_Declare(
|
|
19
|
+
eigen
|
|
20
|
+
URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
|
|
21
|
+
)
|
|
22
|
+
FetchContent_GetProperties(eigen)
|
|
23
|
+
if(NOT eigen_POPULATED)
|
|
24
|
+
FetchContent_Populate(eigen)
|
|
25
|
+
endif()
|
|
26
|
+
|
|
27
|
+
FetchContent_Declare(
|
|
28
|
+
pybind11
|
|
29
|
+
URL https://github.com/pybind/pybind11/archive/refs/tags/v2.13.1.tar.gz
|
|
30
|
+
)
|
|
31
|
+
FetchContent_MakeAvailable(pybind11)
|
|
32
|
+
|
|
33
|
+
pybind11_add_module(knn_core NO_EXTRAS
|
|
34
|
+
src/binding.cpp
|
|
35
|
+
src/PCA.cpp
|
|
36
|
+
src/KNN.cpp
|
|
37
|
+
src/KNNEngine.cpp
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
target_include_directories(knn_core SYSTEM PRIVATE
|
|
41
|
+
include
|
|
42
|
+
${eigen_SOURCE_DIR}
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
target_link_libraries(knn_core PRIVATE pybind11::module)
|
|
46
|
+
|
|
47
|
+
install(TARGETS knn_core DESTINATION .)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 8,
|
|
3
|
+
"configurePresets": [
|
|
4
|
+
{
|
|
5
|
+
"name": "Debug",
|
|
6
|
+
"displayName": "Debug",
|
|
7
|
+
"description": "Debug",
|
|
8
|
+
"generator": "Ninja",
|
|
9
|
+
"binaryDir": "${sourceDir}/out/build/${presetName}",
|
|
10
|
+
"cacheVariables": {
|
|
11
|
+
"CMAKE_BUILD_TYPE": "Debug",
|
|
12
|
+
"CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"name": "Release",
|
|
17
|
+
"displayName": "Release",
|
|
18
|
+
"description": "Release",
|
|
19
|
+
"generator": "Ninja",
|
|
20
|
+
"binaryDir": "${sourceDir}/out/build/${presetName}",
|
|
21
|
+
"cacheVariables": {
|
|
22
|
+
"CMAKE_BUILD_TYPE": "Release",
|
|
23
|
+
"CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}"
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: knn-engine-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A high-performance C++ KNN Engine with Adaptive PCA
|
|
5
|
+
Requires-Python: >=3.8
|
|
6
|
+
Requires-Dist: numpy
|
|
7
|
+
Requires-Dist: scikit-learn
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# KNN Core
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# KNN Core
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <Eigen/Dense>
|
|
3
|
+
#include <map>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include <vector>
|
|
6
|
+
|
|
7
|
+
class KNN {
|
|
8
|
+
public:
|
|
9
|
+
explicit KNN(int k = 3);
|
|
10
|
+
|
|
11
|
+
void train(const Eigen::MatrixXd& training_data,
|
|
12
|
+
const std::vector<std::string>& training_labels);
|
|
13
|
+
|
|
14
|
+
std::string predict(const Eigen::VectorXd& query_point) const;
|
|
15
|
+
|
|
16
|
+
private:
|
|
17
|
+
int k_neighbors;
|
|
18
|
+
Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
|
|
19
|
+
train_X;
|
|
20
|
+
std::vector<std::string> train_y;
|
|
21
|
+
};
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <Eigen/Dense>
|
|
3
|
+
#include <memory>
|
|
4
|
+
#include <string>
|
|
5
|
+
#include <vector>
|
|
6
|
+
|
|
7
|
+
#include "KNN.hpp"
|
|
8
|
+
#include "PCA.hpp"
|
|
9
|
+
|
|
10
|
+
struct KNNConfig {
|
|
11
|
+
int k = 3;
|
|
12
|
+
double variance = 0.95;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
class KNNEngine {
|
|
16
|
+
public:
|
|
17
|
+
explicit KNNEngine(KNNConfig config = KNNConfig());
|
|
18
|
+
|
|
19
|
+
void train(const Eigen::MatrixXd& X, const std::vector<std::string>& y,
|
|
20
|
+
bool scale = false);
|
|
21
|
+
|
|
22
|
+
std::string predict(const Eigen::VectorXd& raw_input) const;
|
|
23
|
+
std::vector<std::string> predict_batch(const Eigen::MatrixXd& inputs) const;
|
|
24
|
+
|
|
25
|
+
private:
|
|
26
|
+
KNNConfig cfg;
|
|
27
|
+
std::unique_ptr<PCA> pca;
|
|
28
|
+
std::unique_ptr<KNN> knn;
|
|
29
|
+
};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <Eigen/Dense>
|
|
3
|
+
|
|
4
|
+
class PCA {
|
|
5
|
+
public:
|
|
6
|
+
// components > 1: fixed count. components < 1: variance threshold (e.g. 0.95)
|
|
7
|
+
explicit PCA(double components = 0.95);
|
|
8
|
+
|
|
9
|
+
void fit(const Eigen::MatrixXd& X, bool scale = false);
|
|
10
|
+
Eigen::MatrixXd transform(const Eigen::MatrixXd& X) const;
|
|
11
|
+
Eigen::MatrixXd fit_transform(const Eigen::MatrixXd& X);
|
|
12
|
+
|
|
13
|
+
int getComponentCount() const { return num_components; }
|
|
14
|
+
|
|
15
|
+
private:
|
|
16
|
+
double threshold;
|
|
17
|
+
int num_components;
|
|
18
|
+
Eigen::RowVectorXd mean;
|
|
19
|
+
Eigen::RowVectorXd std_dev;
|
|
20
|
+
Eigen::MatrixXd projection_matrix;
|
|
21
|
+
|
|
22
|
+
void apply_threshold(const Eigen::VectorXd& evals,
|
|
23
|
+
const Eigen::MatrixXd& evecs, bool is_thin_mode);
|
|
24
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["scikit-build-core", "pybind11"]
|
|
3
|
+
build-backend = "scikit_build_core.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "knn-engine-core"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A high-performance C++ KNN Engine with Adaptive PCA"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"numpy",
|
|
13
|
+
"scikit-learn"
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[tool.scikit-build]
|
|
17
|
+
cmake.version = ">=3.18"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import knn_core
|
|
2
|
+
import numpy as np
|
|
3
|
+
from sklearn.datasets import fetch_olivetti_faces, load_iris, load_digits
|
|
4
|
+
from sklearn.model_selection import train_test_split
|
|
5
|
+
|
|
6
|
+
def test_dataset(name, X, y, k, variance, scale):
|
|
7
|
+
print(f"\n--- Testing {name} ---")
|
|
8
|
+
cfg = knn_core.KNNConfig()
|
|
9
|
+
cfg.k = k
|
|
10
|
+
cfg.variance = variance
|
|
11
|
+
engine = knn_core.KNNEngine(cfg)
|
|
12
|
+
|
|
13
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
14
|
+
X.astype(np.float64), y.astype(str), test_size=0.2, stratify=y, random_state=42
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
engine.train(X_train, y_train.tolist(), scale)
|
|
18
|
+
|
|
19
|
+
correct = 0
|
|
20
|
+
for i in range(len(X_test)):
|
|
21
|
+
if engine.predict(X_test[i]) == y_test[i]:
|
|
22
|
+
correct += 1
|
|
23
|
+
|
|
24
|
+
print(f"Accuracy: {correct/len(X_test)*100:.2f}%")
|
|
25
|
+
|
|
26
|
+
if __name__ == "__main__":
|
|
27
|
+
faces = fetch_olivetti_faces()
|
|
28
|
+
test_dataset("Olivetti Faces", faces.data, faces.target, k=1, variance=0.90, scale=False)
|
|
29
|
+
|
|
30
|
+
iris = load_iris()
|
|
31
|
+
test_dataset("Iris Flower", iris.data, iris.target, k=3, variance=0.99, scale=False)
|
|
32
|
+
|
|
33
|
+
digits = load_digits()
|
|
34
|
+
test_dataset("Handwritten Digits", digits.data, digits.target, k=3, variance=0.95, scale=False)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
#include "KNN.hpp"
|
|
2
|
+
|
|
3
|
+
#include <algorithm>
|
|
4
|
+
#include <numeric>
|
|
5
|
+
|
|
6
|
+
KNN::KNN(int k) : k_neighbors(k) {}
|
|
7
|
+
|
|
8
|
+
void KNN::train(const Eigen::MatrixXd& training_data,
|
|
9
|
+
const std::vector<std::string>& training_labels) {
|
|
10
|
+
train_X = training_data;
|
|
11
|
+
train_y = training_labels;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
std::string KNN::predict(const Eigen::VectorXd& query_point) const {
|
|
15
|
+
if (train_X.rows() == 0 || k_neighbors <= 0) return "Unknown";
|
|
16
|
+
|
|
17
|
+
Eigen::VectorXd distances =
|
|
18
|
+
(train_X.rowwise() - query_point.transpose()).rowwise().squaredNorm();
|
|
19
|
+
|
|
20
|
+
std::vector<int> indices(train_X.rows());
|
|
21
|
+
std::iota(indices.begin(), indices.end(), 0);
|
|
22
|
+
|
|
23
|
+
int k_eff = std::min(k_neighbors, static_cast<int>(train_X.rows()));
|
|
24
|
+
|
|
25
|
+
if (k_eff < train_X.rows()) {
|
|
26
|
+
std::nth_element(
|
|
27
|
+
indices.begin(), indices.begin() + k_eff, indices.end(),
|
|
28
|
+
[&distances](int i, int j) { return distances(i) < distances(j); });
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
std::map<std::string, int> votes;
|
|
32
|
+
for (int i = 0; i < k_eff; ++i) {
|
|
33
|
+
votes[train_y[indices[i]]]++;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return std::max_element(
|
|
37
|
+
votes.begin(), votes.end(),
|
|
38
|
+
[](const auto& a, const auto& b) { return a.second < b.second; })
|
|
39
|
+
->first;
|
|
40
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#include "KNNEngine.hpp"
|
|
2
|
+
|
|
3
|
+
#include <iostream>
|
|
4
|
+
|
|
5
|
+
KNNEngine::KNNEngine(KNNConfig config) : cfg(config) {
|
|
6
|
+
pca = std::make_unique<PCA>(cfg.variance);
|
|
7
|
+
knn = std::make_unique<KNN>(cfg.k);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
void KNNEngine::train(const Eigen::MatrixXd& X,
|
|
11
|
+
const std::vector<std::string>& y, bool scale) {
|
|
12
|
+
pca->fit(X, scale);
|
|
13
|
+
Eigen::MatrixXd x_reduced = pca->transform(X);
|
|
14
|
+
|
|
15
|
+
knn->train(x_reduced, y);
|
|
16
|
+
|
|
17
|
+
std::cout << "[Engine] Trained successfully. Reduced to "
|
|
18
|
+
<< pca->getComponentCount() << " dimensions." << std::endl;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
std::string KNNEngine::predict(const Eigen::VectorXd& raw_input) const {
|
|
22
|
+
Eigen::MatrixXd projected = pca->transform(raw_input.transpose());
|
|
23
|
+
return knn->predict(projected.transpose());
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
std::vector<std::string> KNNEngine::predict_batch(
|
|
27
|
+
const Eigen::MatrixXd& inputs) const {
|
|
28
|
+
Eigen::MatrixXd projected = pca->transform(inputs);
|
|
29
|
+
std::vector<std::string> predictions;
|
|
30
|
+
for (int i = 0; i < projected.rows(); ++i) {
|
|
31
|
+
predictions.push_back(knn->predict(projected.row(i).transpose()));
|
|
32
|
+
}
|
|
33
|
+
return predictions;
|
|
34
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#include "PCA.hpp"
|
|
2
|
+
|
|
3
|
+
#include <Eigen/Dense>
|
|
4
|
+
#include <algorithm>
|
|
5
|
+
#include <iostream>
|
|
6
|
+
|
|
7
|
+
PCA::PCA(double components) : threshold(components), num_components(0) {}
|
|
8
|
+
|
|
9
|
+
void PCA::fit(const Eigen::MatrixXd& X, bool scale) {
|
|
10
|
+
mean = X.colwise().mean();
|
|
11
|
+
Eigen::MatrixXd centered = X.rowwise() - mean;
|
|
12
|
+
|
|
13
|
+
if (scale) {
|
|
14
|
+
std_dev =
|
|
15
|
+
(centered.array().square().colwise().sum() / (X.rows() - 1)).sqrt();
|
|
16
|
+
std_dev = std_dev.unaryExpr([](double v) { return v < 1e-9 ? 1.0 : v; });
|
|
17
|
+
centered = centered.array().rowwise() / std_dev.array();
|
|
18
|
+
} else {
|
|
19
|
+
std_dev = Eigen::RowVectorXd::Ones(X.cols());
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
int N = X.rows();
|
|
23
|
+
int D = X.cols();
|
|
24
|
+
|
|
25
|
+
if (D > N) {
|
|
26
|
+
Eigen::MatrixXd L = (centered * centered.transpose());
|
|
27
|
+
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> solver(L);
|
|
28
|
+
|
|
29
|
+
Eigen::VectorXd evals = solver.eigenvalues().reverse();
|
|
30
|
+
Eigen::MatrixXd evecs_L = solver.eigenvectors().rowwise().reverse();
|
|
31
|
+
|
|
32
|
+
Eigen::MatrixXd evecs = centered.transpose() * evecs_L;
|
|
33
|
+
|
|
34
|
+
for (int i = 0; i < evecs.cols(); ++i) {
|
|
35
|
+
double norm = evecs.col(i).norm();
|
|
36
|
+
if (norm > 1e-9) {
|
|
37
|
+
evecs.col(i) /= norm;
|
|
38
|
+
} else {
|
|
39
|
+
evecs.col(i).setZero();
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
apply_threshold(evals, evecs, true);
|
|
43
|
+
} else {
|
|
44
|
+
Eigen::MatrixXd cov = (centered.transpose() * centered) / double(N - 1);
|
|
45
|
+
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> solver(cov);
|
|
46
|
+
|
|
47
|
+
Eigen::VectorXd evals = solver.eigenvalues().reverse();
|
|
48
|
+
Eigen::MatrixXd evecs = solver.eigenvectors().rowwise().reverse();
|
|
49
|
+
|
|
50
|
+
apply_threshold(evals, evecs, false);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
void PCA::apply_threshold(const Eigen::VectorXd& evals,
|
|
55
|
+
const Eigen::MatrixXd& evecs, bool is_thin_mode) {
|
|
56
|
+
num_components = 0;
|
|
57
|
+
|
|
58
|
+
if (threshold >= 1.0) {
|
|
59
|
+
num_components =
|
|
60
|
+
std::min(static_cast<int>(threshold), static_cast<int>(evals.size()));
|
|
61
|
+
} else {
|
|
62
|
+
double total_variance = evals.sum();
|
|
63
|
+
double cumulative_variance = 0.0;
|
|
64
|
+
for (int i = 0; i < evals.size(); ++i) {
|
|
65
|
+
num_components++;
|
|
66
|
+
cumulative_variance += evals[i];
|
|
67
|
+
if ((cumulative_variance / total_variance) >= threshold) break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
projection_matrix = evecs.leftCols(num_components);
|
|
72
|
+
std::cout << "[PCA] Fit complete. Mode: "
|
|
73
|
+
<< (is_thin_mode ? "Thin" : "Standard")
|
|
74
|
+
<< " | Components: " << num_components << std::endl;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
Eigen::MatrixXd PCA::transform(const Eigen::MatrixXd& X) const {
|
|
78
|
+
Eigen::MatrixXd centered = X.rowwise() - mean;
|
|
79
|
+
|
|
80
|
+
Eigen::MatrixXd standardized = centered.array().rowwise() / std_dev.array();
|
|
81
|
+
|
|
82
|
+
return standardized * projection_matrix;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
Eigen::MatrixXd PCA::fit_transform(const Eigen::MatrixXd& X) {
|
|
86
|
+
fit(X);
|
|
87
|
+
return transform(X);
|
|
88
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
#include <pybind11/eigen.h>
|
|
2
|
+
#include <pybind11/pybind11.h>
|
|
3
|
+
#include <pybind11/stl.h>
|
|
4
|
+
|
|
5
|
+
#include "KNNEngine.hpp"
|
|
6
|
+
|
|
7
|
+
namespace py = pybind11;
|
|
8
|
+
|
|
9
|
+
PYBIND11_MODULE(knn_core, m) {
|
|
10
|
+
m.doc() = "C++ KNN Engine with Adaptive PCA support";
|
|
11
|
+
|
|
12
|
+
py::class_<KNNConfig>(m, "KNNConfig")
|
|
13
|
+
.def(py::init<>())
|
|
14
|
+
.def_readwrite("k", &KNNConfig::k)
|
|
15
|
+
.def_readwrite("variance", &KNNConfig::variance);
|
|
16
|
+
|
|
17
|
+
py::class_<KNNEngine>(m, "KNNEngine")
|
|
18
|
+
.def(py::init<KNNConfig>())
|
|
19
|
+
.def("train", &KNNEngine::train, py::arg("X"), py::arg("y"),
|
|
20
|
+
py::arg("scale") = false)
|
|
21
|
+
.def("predict", &KNNEngine::predict)
|
|
22
|
+
.def("predict_batch", &KNNEngine::predict_batch);
|
|
23
|
+
}
|