knn-engine-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ name: Build and Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types:
6
+ - published
7
+
8
+ jobs:
9
+ build_wheels:
10
+ name: Build wheels on ${{ matrix.os }}
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ matrix:
14
+ os: [ubuntu-latest, windows-latest, macos-latest]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Build wheels
20
+ uses: pypa/cibuildwheel@v2.17.0
21
+
22
+ - uses: actions/upload-artifact@v4
23
+ with:
24
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
25
+ path: ./wheelhouse/*.whl
26
+
27
+ build_sdist:
28
+ name: Build source distribution
29
+ runs-on: ubuntu-latest
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+
33
+ - name: Build sdist
34
+ run: pipx run build --sdist
35
+
36
+ - uses: actions/upload-artifact@v4
37
+ with:
38
+ name: cibw-sdist
39
+ path: dist/*.tar.gz
40
+
41
+ publish-to-pypi:
42
+ name: Publish Python distribution to PyPI
43
+ needs: [build_wheels, build_sdist]
44
+ runs-on: ubuntu-latest
45
+ environment:
46
+ name: pypi
47
+ url: https://pypi.org/p/knn-engine-core
48
+ permissions:
49
+ id-token: write
50
+ steps:
51
+ - name: Download all the dists
52
+ uses: actions/download-artifact@v4
53
+ with:
54
+ pattern: cibw-*
55
+ path: dist
56
+ merge-multiple: true
57
+
58
+ - name: Publish distribution to PyPI
59
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,36 @@
1
+ # --- Build Artifacts ---
2
+ build/
3
+ out/
4
+ bin/
5
+ obj/
6
+ *.so
7
+ *.a
8
+ *.la
9
+ *.o
10
+ *.obj
11
+
12
+ # --- Python ---
13
+ __pycache__/
14
+ *.py[cod]
15
+ *$py.class
16
+ .venv/
17
+ venv/
18
+ ENV/
19
+ .pytest_cache/
20
+
21
+ # --- IDEs and Editors ---
22
+ .vscode/
23
+ .idea/
24
+ *.swp
25
+ *.swo
26
+ .clangd/
27
+ compile_commands.json
28
+
29
+ # --- Data & Logs ---
30
+ data/*.csv
31
+ !data/README.md
32
+ *.log
33
+
34
+ # --- CMake ---
35
+ CMakeUserPresets.json
36
+ _deps/
@@ -0,0 +1,47 @@
1
+ cmake_minimum_required(VERSION 3.18)
2
+
3
+ if(POLICY CMP0135)
4
+ cmake_policy(SET CMP0135 NEW)
5
+ endif()
6
+
7
+ if(POLICY CMP0169)
8
+ cmake_policy(SET CMP0169 OLD)
9
+ endif()
10
+
11
+ project(KNN_ENGINE LANGUAGES CXX)
12
+
13
+ set(CMAKE_CXX_STANDARD 17)
14
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
15
+
16
+ include(FetchContent)
17
+
18
+ FetchContent_Declare(
19
+ eigen
20
+ URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
21
+ )
22
+ FetchContent_GetProperties(eigen)
23
+ if(NOT eigen_POPULATED)
24
+ FetchContent_Populate(eigen)
25
+ endif()
26
+
27
+ FetchContent_Declare(
28
+ pybind11
29
+ URL https://github.com/pybind/pybind11/archive/refs/tags/v2.13.1.tar.gz
30
+ )
31
+ FetchContent_MakeAvailable(pybind11)
32
+
33
+ pybind11_add_module(knn_core NO_EXTRAS
34
+ src/binding.cpp
35
+ src/PCA.cpp
36
+ src/KNN.cpp
37
+ src/KNNEngine.cpp
38
+ )
39
+
40
+ target_include_directories(knn_core SYSTEM PRIVATE
41
+ include
42
+ ${eigen_SOURCE_DIR}
43
+ )
44
+
45
+ target_link_libraries(knn_core PRIVATE pybind11::module)
46
+
47
+ install(TARGETS knn_core DESTINATION .)
@@ -0,0 +1,27 @@
1
+ {
2
+ "version": 8,
3
+ "configurePresets": [
4
+ {
5
+ "name": "Debug",
6
+ "displayName": "Debug",
7
+ "description": "Debug",
8
+ "generator": "Ninja",
9
+ "binaryDir": "${sourceDir}/out/build/${presetName}",
10
+ "cacheVariables": {
11
+ "CMAKE_BUILD_TYPE": "Debug",
12
+ "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}"
13
+ }
14
+ },
15
+ {
16
+ "name": "Release",
17
+ "displayName": "Release",
18
+ "description": "Release",
19
+ "generator": "Ninja",
20
+ "binaryDir": "${sourceDir}/out/build/${presetName}",
21
+ "cacheVariables": {
22
+ "CMAKE_BUILD_TYPE": "Release",
23
+ "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}"
24
+ }
25
+ }
26
+ ]
27
+ }
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.2
2
+ Name: knn-engine-core
3
+ Version: 0.1.0
4
+ Summary: A high-performance C++ KNN Engine with Adaptive PCA
5
+ Requires-Python: >=3.8
6
+ Requires-Dist: numpy
7
+ Requires-Dist: scikit-learn
8
+ Description-Content-Type: text/markdown
9
+
10
+ # KNN Core
@@ -0,0 +1 @@
1
+ # KNN Core
@@ -0,0 +1,21 @@
1
+ #pragma once
2
+ #include <Eigen/Dense>
3
+ #include <map>
4
+ #include <string>
5
+ #include <vector>
6
+
7
+ class KNN {
8
+ public:
9
+ explicit KNN(int k = 3);
10
+
11
+ void train(const Eigen::MatrixXd& training_data,
12
+ const std::vector<std::string>& training_labels);
13
+
14
+ std::string predict(const Eigen::VectorXd& query_point) const;
15
+
16
+ private:
17
+ int k_neighbors;
18
+ Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>
19
+ train_X;
20
+ std::vector<std::string> train_y;
21
+ };
@@ -0,0 +1,29 @@
1
+ #pragma once
2
+ #include <Eigen/Dense>
3
+ #include <memory>
4
+ #include <string>
5
+ #include <vector>
6
+
7
+ #include "KNN.hpp"
8
+ #include "PCA.hpp"
9
+
10
+ struct KNNConfig {
11
+ int k = 3;
12
+ double variance = 0.95;
13
+ };
14
+
15
+ class KNNEngine {
16
+ public:
17
+ explicit KNNEngine(KNNConfig config = KNNConfig());
18
+
19
+ void train(const Eigen::MatrixXd& X, const std::vector<std::string>& y,
20
+ bool scale = false);
21
+
22
+ std::string predict(const Eigen::VectorXd& raw_input) const;
23
+ std::vector<std::string> predict_batch(const Eigen::MatrixXd& inputs) const;
24
+
25
+ private:
26
+ KNNConfig cfg;
27
+ std::unique_ptr<PCA> pca;
28
+ std::unique_ptr<KNN> knn;
29
+ };
@@ -0,0 +1,24 @@
1
+ #pragma once
2
+ #include <Eigen/Dense>
3
+
4
+ class PCA {
5
+ public:
6
+ // components > 1: fixed count. components < 1: variance threshold (e.g. 0.95)
7
+ explicit PCA(double components = 0.95);
8
+
9
+ void fit(const Eigen::MatrixXd& X, bool scale = false);
10
+ Eigen::MatrixXd transform(const Eigen::MatrixXd& X) const;
11
+ Eigen::MatrixXd fit_transform(const Eigen::MatrixXd& X);
12
+
13
+ int getComponentCount() const { return num_components; }
14
+
15
+ private:
16
+ double threshold;
17
+ int num_components;
18
+ Eigen::RowVectorXd mean;
19
+ Eigen::RowVectorXd std_dev;
20
+ Eigen::MatrixXd projection_matrix;
21
+
22
+ void apply_threshold(const Eigen::VectorXd& evals,
23
+ const Eigen::MatrixXd& evecs, bool is_thin_mode);
24
+ };
@@ -0,0 +1,17 @@
1
+ [build-system]
2
+ requires = ["scikit-build-core", "pybind11"]
3
+ build-backend = "scikit_build_core.build"
4
+
5
+ [project]
6
+ name = "knn-engine-core"
7
+ version = "0.1.0"
8
+ description = "A high-performance C++ KNN Engine with Adaptive PCA"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ dependencies = [
12
+ "numpy",
13
+ "scikit-learn"
14
+ ]
15
+
16
+ [tool.scikit-build]
17
+ cmake.version = ">=3.18"
@@ -0,0 +1,34 @@
1
+ import knn_core
2
+ import numpy as np
3
+ from sklearn.datasets import fetch_olivetti_faces, load_iris, load_digits
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ def test_dataset(name, X, y, k, variance, scale):
7
+ print(f"\n--- Testing {name} ---")
8
+ cfg = knn_core.KNNConfig()
9
+ cfg.k = k
10
+ cfg.variance = variance
11
+ engine = knn_core.KNNEngine(cfg)
12
+
13
+ X_train, X_test, y_train, y_test = train_test_split(
14
+ X.astype(np.float64), y.astype(str), test_size=0.2, stratify=y, random_state=42
15
+ )
16
+
17
+ engine.train(X_train, y_train.tolist(), scale)
18
+
19
+ correct = 0
20
+ for i in range(len(X_test)):
21
+ if engine.predict(X_test[i]) == y_test[i]:
22
+ correct += 1
23
+
24
+ print(f"Accuracy: {correct/len(X_test)*100:.2f}%")
25
+
26
+ if __name__ == "__main__":
27
+ faces = fetch_olivetti_faces()
28
+ test_dataset("Olivetti Faces", faces.data, faces.target, k=1, variance=0.90, scale=False)
29
+
30
+ iris = load_iris()
31
+ test_dataset("Iris Flower", iris.data, iris.target, k=3, variance=0.99, scale=False)
32
+
33
+ digits = load_digits()
34
+ test_dataset("Handwritten Digits", digits.data, digits.target, k=3, variance=0.95, scale=False)
@@ -0,0 +1,40 @@
1
+ #include "KNN.hpp"
2
+
3
+ #include <algorithm>
4
+ #include <numeric>
5
+
6
+ KNN::KNN(int k) : k_neighbors(k) {}
7
+
8
+ void KNN::train(const Eigen::MatrixXd& training_data,
9
+ const std::vector<std::string>& training_labels) {
10
+ train_X = training_data;
11
+ train_y = training_labels;
12
+ }
13
+
14
+ std::string KNN::predict(const Eigen::VectorXd& query_point) const {
15
+ if (train_X.rows() == 0 || k_neighbors <= 0) return "Unknown";
16
+
17
+ Eigen::VectorXd distances =
18
+ (train_X.rowwise() - query_point.transpose()).rowwise().squaredNorm();
19
+
20
+ std::vector<int> indices(train_X.rows());
21
+ std::iota(indices.begin(), indices.end(), 0);
22
+
23
+ int k_eff = std::min(k_neighbors, static_cast<int>(train_X.rows()));
24
+
25
+ if (k_eff < train_X.rows()) {
26
+ std::nth_element(
27
+ indices.begin(), indices.begin() + k_eff, indices.end(),
28
+ [&distances](int i, int j) { return distances(i) < distances(j); });
29
+ }
30
+
31
+ std::map<std::string, int> votes;
32
+ for (int i = 0; i < k_eff; ++i) {
33
+ votes[train_y[indices[i]]]++;
34
+ }
35
+
36
+ return std::max_element(
37
+ votes.begin(), votes.end(),
38
+ [](const auto& a, const auto& b) { return a.second < b.second; })
39
+ ->first;
40
+ }
@@ -0,0 +1,34 @@
1
+ #include "KNNEngine.hpp"
2
+
3
+ #include <iostream>
4
+
5
+ KNNEngine::KNNEngine(KNNConfig config) : cfg(config) {
6
+ pca = std::make_unique<PCA>(cfg.variance);
7
+ knn = std::make_unique<KNN>(cfg.k);
8
+ }
9
+
10
+ void KNNEngine::train(const Eigen::MatrixXd& X,
11
+ const std::vector<std::string>& y, bool scale) {
12
+ pca->fit(X, scale);
13
+ Eigen::MatrixXd x_reduced = pca->transform(X);
14
+
15
+ knn->train(x_reduced, y);
16
+
17
+ std::cout << "[Engine] Trained successfully. Reduced to "
18
+ << pca->getComponentCount() << " dimensions." << std::endl;
19
+ }
20
+
21
+ std::string KNNEngine::predict(const Eigen::VectorXd& raw_input) const {
22
+ Eigen::MatrixXd projected = pca->transform(raw_input.transpose());
23
+ return knn->predict(projected.transpose());
24
+ }
25
+
26
+ std::vector<std::string> KNNEngine::predict_batch(
27
+ const Eigen::MatrixXd& inputs) const {
28
+ Eigen::MatrixXd projected = pca->transform(inputs);
29
+ std::vector<std::string> predictions;
30
+ for (int i = 0; i < projected.rows(); ++i) {
31
+ predictions.push_back(knn->predict(projected.row(i).transpose()));
32
+ }
33
+ return predictions;
34
+ }
@@ -0,0 +1,88 @@
1
+ #include "PCA.hpp"
2
+
3
+ #include <Eigen/Dense>
4
+ #include <algorithm>
5
+ #include <iostream>
6
+
7
+ PCA::PCA(double components) : threshold(components), num_components(0) {}
8
+
9
+ void PCA::fit(const Eigen::MatrixXd& X, bool scale) {
10
+ mean = X.colwise().mean();
11
+ Eigen::MatrixXd centered = X.rowwise() - mean;
12
+
13
+ if (scale) {
14
+ std_dev =
15
+ (centered.array().square().colwise().sum() / (X.rows() - 1)).sqrt();
16
+ std_dev = std_dev.unaryExpr([](double v) { return v < 1e-9 ? 1.0 : v; });
17
+ centered = centered.array().rowwise() / std_dev.array();
18
+ } else {
19
+ std_dev = Eigen::RowVectorXd::Ones(X.cols());
20
+ }
21
+
22
+ int N = X.rows();
23
+ int D = X.cols();
24
+
25
+ if (D > N) {
26
+ Eigen::MatrixXd L = (centered * centered.transpose());
27
+ Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> solver(L);
28
+
29
+ Eigen::VectorXd evals = solver.eigenvalues().reverse();
30
+ Eigen::MatrixXd evecs_L = solver.eigenvectors().rowwise().reverse();
31
+
32
+ Eigen::MatrixXd evecs = centered.transpose() * evecs_L;
33
+
34
+ for (int i = 0; i < evecs.cols(); ++i) {
35
+ double norm = evecs.col(i).norm();
36
+ if (norm > 1e-9) {
37
+ evecs.col(i) /= norm;
38
+ } else {
39
+ evecs.col(i).setZero();
40
+ }
41
+ }
42
+ apply_threshold(evals, evecs, true);
43
+ } else {
44
+ Eigen::MatrixXd cov = (centered.transpose() * centered) / double(N - 1);
45
+ Eigen::SelfAdjointEigenSolver<Eigen::MatrixXd> solver(cov);
46
+
47
+ Eigen::VectorXd evals = solver.eigenvalues().reverse();
48
+ Eigen::MatrixXd evecs = solver.eigenvectors().rowwise().reverse();
49
+
50
+ apply_threshold(evals, evecs, false);
51
+ }
52
+ }
53
+
54
+ void PCA::apply_threshold(const Eigen::VectorXd& evals,
55
+ const Eigen::MatrixXd& evecs, bool is_thin_mode) {
56
+ num_components = 0;
57
+
58
+ if (threshold >= 1.0) {
59
+ num_components =
60
+ std::min(static_cast<int>(threshold), static_cast<int>(evals.size()));
61
+ } else {
62
+ double total_variance = evals.sum();
63
+ double cumulative_variance = 0.0;
64
+ for (int i = 0; i < evals.size(); ++i) {
65
+ num_components++;
66
+ cumulative_variance += evals[i];
67
+ if ((cumulative_variance / total_variance) >= threshold) break;
68
+ }
69
+ }
70
+
71
+ projection_matrix = evecs.leftCols(num_components);
72
+ std::cout << "[PCA] Fit complete. Mode: "
73
+ << (is_thin_mode ? "Thin" : "Standard")
74
+ << " | Components: " << num_components << std::endl;
75
+ }
76
+
77
+ Eigen::MatrixXd PCA::transform(const Eigen::MatrixXd& X) const {
78
+ Eigen::MatrixXd centered = X.rowwise() - mean;
79
+
80
+ Eigen::MatrixXd standardized = centered.array().rowwise() / std_dev.array();
81
+
82
+ return standardized * projection_matrix;
83
+ }
84
+
85
+ Eigen::MatrixXd PCA::fit_transform(const Eigen::MatrixXd& X) {
86
+ fit(X);
87
+ return transform(X);
88
+ }
@@ -0,0 +1,23 @@
1
+ #include <pybind11/eigen.h>
2
+ #include <pybind11/pybind11.h>
3
+ #include <pybind11/stl.h>
4
+
5
+ #include "KNNEngine.hpp"
6
+
7
+ namespace py = pybind11;
8
+
9
+ PYBIND11_MODULE(knn_core, m) {
10
+ m.doc() = "C++ KNN Engine with Adaptive PCA support";
11
+
12
+ py::class_<KNNConfig>(m, "KNNConfig")
13
+ .def(py::init<>())
14
+ .def_readwrite("k", &KNNConfig::k)
15
+ .def_readwrite("variance", &KNNConfig::variance);
16
+
17
+ py::class_<KNNEngine>(m, "KNNEngine")
18
+ .def(py::init<KNNConfig>())
19
+ .def("train", &KNNEngine::train, py::arg("X"), py::arg("y"),
20
+ py::arg("scale") = false)
21
+ .def("predict", &KNNEngine::predict)
22
+ .def("predict_batch", &KNNEngine::predict_batch);
23
+ }