nn-engine-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ name: Build and Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types:
6
+ - published
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build_wheels:
11
+ name: Build wheels on ${{ matrix.os }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-latest, windows-latest, macos-latest]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Build wheels
21
+ uses: pypa/cibuildwheel@v2.17.0
22
+ env:
23
+ CIBW_BEFORE_ALL_MACOS: brew install libomp
24
+
25
+ - uses: actions/upload-artifact@v4
26
+ with:
27
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
28
+ path: ./wheelhouse/*.whl
29
+
30
+ build_sdist:
31
+ name: Build source distribution
32
+ runs-on: ubuntu-latest
33
+ steps:
34
+ - uses: actions/checkout@v4
35
+
36
+ - name: Build sdist
37
+ run: pipx run build --sdist
38
+
39
+ - uses: actions/upload-artifact@v4
40
+ with:
41
+ name: cibw-sdist
42
+ path: dist/*.tar.gz
43
+
44
+ publish-to-pypi:
45
+ name: Publish Python distribution to PyPI
46
+ needs: [build_wheels, build_sdist]
47
+ runs-on: ubuntu-latest
48
+ environment:
49
+ name: pypi
50
+ url: https://pypi.org/p/nn-engine-core
51
+ permissions:
52
+ id-token: write
53
+ steps:
54
+ - name: Download all the dists
55
+ uses: actions/download-artifact@v4
56
+ with:
57
+ pattern: cibw-*
58
+ path: dist
59
+ merge-multiple: true
60
+
61
+ - name: Publish distribution to PyPI
62
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,36 @@
1
+ # --- Build Artifacts ---
2
+ build/
3
+ out/
4
+ bin/
5
+ obj/
6
+ *.so
7
+ *.a
8
+ *.la
9
+ *.o
10
+ *.obj
11
+
12
+ # --- Python ---
13
+ __pycache__/
14
+ *.py[cod]
15
+ *$py.class
16
+ .venv/
17
+ venv/
18
+ ENV/
19
+ .pytest_cache/
20
+
21
+ # --- IDEs and Editors ---
22
+ .vscode/
23
+ .idea/
24
+ *.swp
25
+ *.swo
26
+ .clangd/
27
+ compile_commands.json
28
+
29
+ # --- Data & Logs ---
30
+ data/*.csv
31
+ !data/README.md
32
+ *.log
33
+
34
+ # --- CMake ---
35
+ CMakeUserPresets.json
36
+ _deps/
@@ -0,0 +1,63 @@
1
+ cmake_minimum_required(VERSION 3.18)
2
+
3
+ if(POLICY CMP0135)
4
+ cmake_policy(SET CMP0135 NEW)
5
+ endif()
6
+
7
+ project(NN_ENGINE LANGUAGES CXX)
8
+
9
+ set(CMAKE_CXX_STANDARD 17)
10
+ set(CMAKE_CXX_STANDARD_REQUIRED ON)
11
+
12
+ if(NOT MSVC)
13
+ if(APPLE)
14
+ add_compile_options(-O3)
15
+ else()
16
+ add_compile_options(-O3 -march=native)
17
+ endif()
18
+ endif()
19
+
20
+ include(FetchContent)
21
+
22
+ FetchContent_Declare(
23
+ eigen
24
+ URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
25
+ )
26
+ FetchContent_GetProperties(eigen)
27
+ if(NOT eigen_POPULATED)
28
+ FetchContent_Populate(eigen)
29
+ endif()
30
+
31
+ FetchContent_Declare(
32
+ pybind11
33
+ URL https://github.com/pybind/pybind11/archive/refs/tags/v2.13.1.tar.gz
34
+ )
35
+ FetchContent_MakeAvailable(pybind11)
36
+
37
+ find_package(OpenMP)
38
+
39
+ pybind11_add_module(nn_core NO_EXTRAS
40
+ src/binding.cpp
41
+ src/core/Model.cpp
42
+ src/parametric/LogisticNeuron.cpp
43
+ src/parametric/DenseLayer.cpp
44
+ src/parametric/ReLULayer.cpp
45
+ )
46
+
47
+ target_include_directories(nn_core SYSTEM PRIVATE
48
+ include
49
+ ${eigen_SOURCE_DIR}
50
+ )
51
+
52
+ target_link_libraries(nn_core PRIVATE pybind11::module)
53
+
54
+ if(OpenMP_CXX_FOUND)
55
+ target_link_libraries(nn_core PRIVATE OpenMP::OpenMP_CXX)
56
+ message(STATUS "OpenMP found: Multi-threading enabled in NNEngine.")
57
+ else()
58
+ message(WARNING "OpenMP NOT found: Falling back to single-threaded NNEngine.")
59
+ endif()
60
+
61
+ target_compile_definitions(nn_core PRIVATE EIGEN_USE_THREADS)
62
+
63
+ install(TARGETS nn_core DESTINATION .)
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 MLEngineProject
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,158 @@
1
+ Metadata-Version: 2.2
2
+ Name: nn-engine-core
3
+ Version: 0.1.0
4
+ Summary: A high-performance C++ parametric optimization backend for NNEngine
5
+ License: MIT
6
+ Requires-Python: >=3.8
7
+ Requires-Dist: numpy
8
+ Description-Content-Type: text/markdown
9
+
10
+ # NN Engine Core
11
+
12
+ [![PyPI version](https://img.shields.io/pypi/v/nn-engine-core?logo=pypi&logoColor=white)](https://pypi.org/project/nn-engine-core/)
13
+ [![Python](https://img.shields.io/pypi/pyversions/nn-engine-core?logo=python&logoColor=white)](https://pypi.org/project/nn-engine-core/)
14
+ [![Build system](https://img.shields.io/badge/build-scikit--build--core-blue?logo=cmake&logoColor=white)](https://scikit-build-core.readthedocs.io/)
15
+ [![Bindings](https://img.shields.io/badge/bindings-pybind11-4C72B0?logo=python&logoColor=white)](https://pybind11.readthedocs.io/)
16
+
17
+ A high-performance, fully native C++ Neural Network engine exposed to Python via pybind11.
18
+
19
+ Designed for rapid experimentation without the Python Global Interpreter Lock (GIL) overhead, `nn-engine-core` executes the entire deep learning training loop (forward pass, loss calculation, backpropagation, and weight updates) strictly in native C++ using Eigen, achieving significant speedups over standard Python-loop ML libraries.
20
+
21
+ ## Highlights
22
+
23
+ - **Native Loop Hoisting**: The `Model::fit` loop executes entirely in C++, eliminating the Python GIL overhead across epochs.
24
+ - **Memory-Optimized Backpropagation**: Utilizes Eigen's `.noalias()` to perform in-place matrix calculus without allocating temporary memory buffers.
25
+ - **Mathematically Stable**: Built-in Xavier (Glorot) initialization and batch-normalized gradients to prevent exploding gradients.
26
+ - **Cross-Platform Threading**: Graceful degradation OpenMP support. Uses multi-threading where available, gracefully falling back to single-threaded standard C++ on restricted environments (e.g., Apple Clang without `libomp`).
27
+ - **Clean Python API**: A familiar Keras/scikit-learn style interface.
28
+
29
+ ## Repository Structure
30
+
31
+ ```text
32
+ .
33
+ ├── CMakeLists.txt
34
+ ├── pyproject.toml
35
+ ├── include/
36
+ │ ├── core/
37
+ │ │ ├── Layer.hpp
38
+ │ │ ├── Loss.hpp
39
+ │ │ └── Model.hpp
40
+ │ └── parametric/
41
+ │ ├── DenseLayer.hpp
42
+ │ ├── LogisticNeuron.hpp
43
+ │ ├── ReLULayer.hpp
44
+ │ └── Sequential.hpp
45
+ ├── src/
46
+ │ ├── binding.cpp
47
+ │ ├── core/
48
+ │ │ └── Model.cpp
49
+ │ └── parametric/
50
+ │ ├── DenseLayer.cpp
51
+ │ ├── LogisticNeuron.cpp
52
+ │ └── ReLULayer.cpp
53
+ └── examples/
54
+ └── script.py
55
+ ```
56
+
57
+ ## Requirements
58
+
59
+ - Python 3.8+
60
+ - CMake 3.18+
61
+ - C++17 compiler
62
+ - Ninja (recommended generator)
63
+
64
+ Python dependencies are declared in `pyproject.toml`:
65
+ - `numpy`
66
+
67
+ ## Installation
68
+
69
+ ### Option 1: Install from PyPI (recommended)
70
+
71
+ ```bash
72
+ pip install nn-engine-core
73
+ ```
74
+
75
+ ### Option 2: Install from source (editable)
76
+
77
+ From the project root:
78
+
79
+ ```bash
80
+ python -m pip install -U pip
81
+ python -m pip install -e .
82
+ ```
83
+
84
+ ## Quick Start
85
+
86
+ ```python
87
+ import numpy as np
88
+ import nn_core
89
+
90
+ # Example Data (Scaled)
91
+ X_train = np.random.rand(100, 20).astype(np.float64)
92
+ y_train = np.random.rand(100, 1).astype(np.float64)
93
+
94
+ # 1. Initialize the Model
95
+ model = nn_core.Model()
96
+
97
+ # 2. Build Architecture
98
+ model.add(nn_core.DenseLayer(20, 64))
99
+ model.add(nn_core.ReLULayer())
100
+ model.add(nn_core.DenseLayer(64, 1))
101
+
102
+ # 3. Compile with Loss Function
103
+ model.compile(nn_core.MSELoss())
104
+
105
+ # 4. Train (Executes entirely in C++)
106
+ model.fit(X_train, y_train, epochs=200, learning_rate=0.01, verbose=True)
107
+
108
+ # 5. Predict
109
+ sample = np.random.rand(1, 20).astype(np.float64)
110
+ predictions = model.predict(sample)
111
+ print("Prediction:", predictions)
112
+ ```
113
+
114
+ ## Python API
115
+
116
+ ### `Model()`
117
+ The orchestrator for the neural network.
118
+ - `add(layer)`: Appends a layer to the network sequence.
119
+ - `compile(loss_fn)`: Attaches a loss function to the model.
120
+ - `fit(X, y, epochs=100, learning_rate=0.01, verbose=True)`: Executes full-batch gradient descent. *Note: `X` and `y` must be 2D `float64` NumPy arrays.*
121
+ - `predict(X)`: Runs a forward pass on new data.
122
+
123
+ ### Layers (`nn_core.*`)
124
+ - `DenseLayer(input_dim: int, output_dim: int)`: A fully connected parametric layer using Xavier initialization.
125
+ - `ReLULayer()`: A non-parametric Rectified Linear Unit activation layer.
126
+ - `Sequential()`: The underlying layer container (automatically managed by `Model`).
127
+
128
+ ### Loss Functions (`nn_core.*`)
129
+ - `MSELoss()`: Mean Squared Error loss. Automatically normalizes gradients by batch size.
130
+
131
+ ## Benchmark Results
132
+
133
+ The following results were produced using a non-linear regression dataset (`sklearn.datasets.make_regression`, 5000 samples, 20 features, noise=0.1) trained over 200 epochs via Full-Batch Gradient Descent.
134
+
135
+ Comparing `NNEngine` against `sklearn.neural_network.MLPRegressor` natively highlights the extreme performance advantage of C++ loop hoisting and `noalias()` matrix optimization.
136
+
137
+ | Engine | MSE | Time | Speedup |
138
+ |---|---:|---:|---:|
139
+ | **NNEngine (C++)** | **0.067268** | **0.5593s** | **2.59×** |
140
+ | Scikit-Learn | 0.145411 | 1.4470s | 1.00× |
141
+
142
+ Console output:
143
+ ```text
144
+ --- Testing NNEngine: Non-Linear Regression ---
145
+ Training NNEngine...
146
+ NNEngine | MSE: 0.067268 | Time: 0.5593s
147
+
148
+ Training Scikit-Learn...
149
+ Scikit-Learn | MSE: 0.145411 | Time: 1.4470s
150
+
151
+ Speedup: 2.59x faster than Sklearn!
152
+ ```
153
+
154
+ ## Notes and Limitations
155
+
156
+ - Targets (`y`) passed to `model.fit()` must be strictly 2D arrays (e.g., shape `(N, 1)` for regression), unlike scikit-learn which often accepts 1D arrays.
157
+ - The optimizer is currently integrated as Vanilla Full-Batch Gradient Descent.
158
+ - Input and Target data should be standardized (e.g., mean `0`, variance `1`) before passing to `fit()` to maintain gradient stability.
@@ -0,0 +1,149 @@
1
+ # NN Engine Core
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/nn-engine-core?logo=pypi&logoColor=white)](https://pypi.org/project/nn-engine-core/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/nn-engine-core?logo=python&logoColor=white)](https://pypi.org/project/nn-engine-core/)
5
+ [![Build system](https://img.shields.io/badge/build-scikit--build--core-blue?logo=cmake&logoColor=white)](https://scikit-build-core.readthedocs.io/)
6
+ [![Bindings](https://img.shields.io/badge/bindings-pybind11-4C72B0?logo=python&logoColor=white)](https://pybind11.readthedocs.io/)
7
+
8
+ A high-performance, fully native C++ Neural Network engine exposed to Python via pybind11.
9
+
10
+ Designed for rapid experimentation without the Python Global Interpreter Lock (GIL) overhead, `nn-engine-core` executes the entire deep learning training loop (forward pass, loss calculation, backpropagation, and weight updates) strictly in native C++ using Eigen, achieving significant speedups over standard Python-loop ML libraries.
11
+
12
+ ## Highlights
13
+
14
+ - **Native Loop Hoisting**: The `Model::fit` loop executes entirely in C++, eliminating the Python GIL overhead across epochs.
15
+ - **Memory-Optimized Backpropagation**: Utilizes Eigen's `.noalias()` to perform in-place matrix calculus without allocating temporary memory buffers.
16
+ - **Mathematically Stable**: Built-in Xavier (Glorot) initialization and batch-normalized gradients to prevent exploding gradients.
17
+ - **Cross-Platform Threading**: Graceful degradation OpenMP support. Uses multi-threading where available, gracefully falling back to single-threaded standard C++ on restricted environments (e.g., Apple Clang without `libomp`).
18
+ - **Clean Python API**: A familiar Keras/scikit-learn style interface.
19
+
20
+ ## Repository Structure
21
+
22
+ ```text
23
+ .
24
+ ├── CMakeLists.txt
25
+ ├── pyproject.toml
26
+ ├── include/
27
+ │ ├── core/
28
+ │ │ ├── Layer.hpp
29
+ │ │ ├── Loss.hpp
30
+ │ │ └── Model.hpp
31
+ │ └── parametric/
32
+ │ ├── DenseLayer.hpp
33
+ │ ├── LogisticNeuron.hpp
34
+ │ ├── ReLULayer.hpp
35
+ │ └── Sequential.hpp
36
+ ├── src/
37
+ │ ├── binding.cpp
38
+ │ ├── core/
39
+ │ │ └── Model.cpp
40
+ │ └── parametric/
41
+ │ ├── DenseLayer.cpp
42
+ │ ├── LogisticNeuron.cpp
43
+ │ └── ReLULayer.cpp
44
+ └── examples/
45
+ └── script.py
46
+ ```
47
+
48
+ ## Requirements
49
+
50
+ - Python 3.8+
51
+ - CMake 3.18+
52
+ - C++17 compiler
53
+ - Ninja (recommended generator)
54
+
55
+ Python dependencies are declared in `pyproject.toml`:
56
+ - `numpy`
57
+
58
+ ## Installation
59
+
60
+ ### Option 1: Install from PyPI (recommended)
61
+
62
+ ```bash
63
+ pip install nn-engine-core
64
+ ```
65
+
66
+ ### Option 2: Install from source (editable)
67
+
68
+ From the project root:
69
+
70
+ ```bash
71
+ python -m pip install -U pip
72
+ python -m pip install -e .
73
+ ```
74
+
75
+ ## Quick Start
76
+
77
+ ```python
78
+ import numpy as np
79
+ import nn_core
80
+
81
+ # Example Data (Scaled)
82
+ X_train = np.random.rand(100, 20).astype(np.float64)
83
+ y_train = np.random.rand(100, 1).astype(np.float64)
84
+
85
+ # 1. Initialize the Model
86
+ model = nn_core.Model()
87
+
88
+ # 2. Build Architecture
89
+ model.add(nn_core.DenseLayer(20, 64))
90
+ model.add(nn_core.ReLULayer())
91
+ model.add(nn_core.DenseLayer(64, 1))
92
+
93
+ # 3. Compile with Loss Function
94
+ model.compile(nn_core.MSELoss())
95
+
96
+ # 4. Train (Executes entirely in C++)
97
+ model.fit(X_train, y_train, epochs=200, learning_rate=0.01, verbose=True)
98
+
99
+ # 5. Predict
100
+ sample = np.random.rand(1, 20).astype(np.float64)
101
+ predictions = model.predict(sample)
102
+ print("Prediction:", predictions)
103
+ ```
104
+
105
+ ## Python API
106
+
107
+ ### `Model()`
108
+ The orchestrator for the neural network.
109
+ - `add(layer)`: Appends a layer to the network sequence.
110
+ - `compile(loss_fn)`: Attaches a loss function to the model.
111
+ - `fit(X, y, epochs=100, learning_rate=0.01, verbose=True)`: Executes full-batch gradient descent. *Note: `X` and `y` must be 2D `float64` NumPy arrays.*
112
+ - `predict(X)`: Runs a forward pass on new data.
113
+
114
+ ### Layers (`nn_core.*`)
115
+ - `DenseLayer(input_dim: int, output_dim: int)`: A fully connected parametric layer using Xavier initialization.
116
+ - `ReLULayer()`: A non-parametric Rectified Linear Unit activation layer.
117
+ - `Sequential()`: The underlying layer container (automatically managed by `Model`).
118
+
119
+ ### Loss Functions (`nn_core.*`)
120
+ - `MSELoss()`: Mean Squared Error loss. Automatically normalizes gradients by batch size.
121
+
122
+ ## Benchmark Results
123
+
124
+ The following results were produced using a non-linear regression dataset (`sklearn.datasets.make_regression`, 5000 samples, 20 features, noise=0.1) trained over 200 epochs via Full-Batch Gradient Descent.
125
+
126
+ Comparing `NNEngine` against `sklearn.neural_network.MLPRegressor` natively highlights the extreme performance advantage of C++ loop hoisting and `noalias()` matrix optimization.
127
+
128
+ | Engine | MSE | Time | Speedup |
129
+ |---|---:|---:|---:|
130
+ | **NNEngine (C++)** | **0.067268** | **0.5593s** | **2.59×** |
131
+ | Scikit-Learn | 0.145411 | 1.4470s | 1.00× |
132
+
133
+ Console output:
134
+ ```text
135
+ --- Testing NNEngine: Non-Linear Regression ---
136
+ Training NNEngine...
137
+ NNEngine | MSE: 0.067268 | Time: 0.5593s
138
+
139
+ Training Scikit-Learn...
140
+ Scikit-Learn | MSE: 0.145411 | Time: 1.4470s
141
+
142
+ Speedup: 2.59x faster than Sklearn!
143
+ ```
144
+
145
+ ## Notes and Limitations
146
+
147
+ - Targets (`y`) passed to `model.fit()` must be strictly 2D arrays (e.g., shape `(N, 1)` for regression), unlike scikit-learn which often accepts 1D arrays.
148
+ - The optimizer is currently integrated as Vanilla Full-Batch Gradient Descent.
149
+ - Input and Target data should be standardized (e.g., mean `0`, variance `1`) before passing to `fit()` to maintain gradient stability.
@@ -0,0 +1,94 @@
1
+ import nn_core
2
+ import numpy as np
3
+ import time
4
+ from sklearn.datasets import make_regression
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.neural_network import MLPRegressor
8
+ from sklearn.metrics import mean_squared_error
9
+
10
+ def test_regression_performance():
11
+ print("\n--- Testing NNEngine: Non-Linear Regression ---")
12
+
13
+ # 1. Generate Data
14
+ # 5000 samples, 20 features, making it slightly non-linear
15
+ X, y = make_regression(n_samples=5000, n_features=20, noise=0.1, random_state=42)
16
+
17
+ # NNEngine strictly expects 2D matrices (MatrixRM) for both X and y.
18
+ # Scikit-learn generates y as (N,), so we must reshape it to (N, 1).
19
+ y = y.reshape(-1, 1)
20
+
21
+ X_train, X_test, y_train, y_test = train_test_split(
22
+ X.astype(np.float64), y.astype(np.float64), test_size=0.2, random_state=42
23
+ )
24
+
25
+ # Neural Networks require scaled data to prevent exploding gradients
26
+ scaler_X = StandardScaler()
27
+ scaler_y = StandardScaler()
28
+
29
+ X_train_scaled = scaler_X.fit_transform(X_train)
30
+ X_test_scaled = scaler_X.transform(X_test)
31
+ y_train_scaled = scaler_y.fit_transform(y_train)
32
+ y_test_scaled = scaler_y.transform(y_test)
33
+
34
+ # Architecture Config
35
+ epochs = 200
36
+ lr = 0.01
37
+
38
+ # ==========================================
39
+ # Custom NNEngine (C++ Native)
40
+ # ==========================================
41
+ model = nn_core.Model()
42
+ model.add(nn_core.DenseLayer(20, 64))
43
+ model.add(nn_core.ReLULayer())
44
+ model.add(nn_core.DenseLayer(64, 1))
45
+ model.compile(nn_core.MSELoss())
46
+
47
+ print("Training NNEngine...")
48
+ t0 = time.perf_counter()
49
+ # Verbose=False to avoid cluttering the benchmark output
50
+ model.fit(X_train_scaled, y_train_scaled, epochs=epochs, learning_rate=lr, verbose=False)
51
+ t1 = time.perf_counter()
52
+
53
+ nn_preds = model.predict(X_test_scaled)
54
+ nn_mse = mean_squared_error(y_test_scaled, nn_preds)
55
+ nn_time = t1 - t0
56
+
57
+ print(f"NNEngine | MSE: {nn_mse:.6f} | Time: {nn_time:.4f}s")
58
+
59
+ # ==========================================
60
+ # Scikit-Learn (MLPRegressor)
61
+ # ==========================================
62
+ print("Training Scikit-Learn...")
63
+ # Configure sklearn to mirror our C++ Engine:
64
+ # solver='sgd' with momentum=0 mirrors our basic Vanilla Gradient Descent.
65
+ # batch_size=X_train.shape[0] forces Full-Batch GD instead of mini-batches.
66
+ sk_model = MLPRegressor(
67
+ hidden_layer_sizes=(64,),
68
+ activation='relu',
69
+ solver='sgd',
70
+ batch_size=X_train.shape[0],
71
+ learning_rate_init=lr,
72
+ max_iter=epochs,
73
+ momentum=0.0,
74
+ random_state=42
75
+ )
76
+
77
+ t0 = time.perf_counter()
78
+ # sklearn expects y as (N,)
79
+ sk_model.fit(X_train_scaled, y_train_scaled.ravel())
80
+ t1 = time.perf_counter()
81
+
82
+ sk_preds = sk_model.predict(X_test_scaled)
83
+ sk_mse = mean_squared_error(y_test_scaled.ravel(), sk_preds)
84
+ sk_time = t1 - t0
85
+
86
+ print(f"Scikit-Learn | MSE: {sk_mse:.6f} | Time: {sk_time:.4f}s")
87
+
88
+ if nn_time < sk_time:
89
+ print(f"Speedup: {sk_time / nn_time:.2f}x faster than Sklearn!")
90
+ else:
91
+ print(f"Speedup: Sklearn is {nn_time / sk_time:.2f}x faster.")
92
+
93
+ if __name__ == "__main__":
94
+ test_regression_performance()
@@ -0,0 +1,21 @@
1
+ #pragma once
2
+ #include <Eigen/Core>
3
+ #include <iostream>
4
+
5
+ namespace mlengine::core {
6
+
7
+ using MatrixRM =
8
+ Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
9
+
10
+ class Layer {
11
+ public:
12
+ virtual ~Layer() = default;
13
+
14
+ virtual void forward(const MatrixRM& input, MatrixRM& output) = 0;
15
+
16
+ virtual MatrixRM backward(const MatrixRM& output_gradient) = 0;
17
+
18
+ virtual void update_weights(double learning_rate) {}
19
+ };
20
+
21
+ }
@@ -0,0 +1,28 @@
1
+ #pragma once
2
+ #include <Eigen/Core>
3
+
4
+ namespace mlengine::core {
5
+
6
+ class Loss {
7
+ public:
8
+ virtual ~Loss() = default;
9
+ virtual double calculate(const MatrixRM& predictions,
10
+ const MatrixRM& targets) = 0;
11
+ virtual MatrixRM backward(const MatrixRM& predictions,
12
+ const MatrixRM& targets) = 0;
13
+ };
14
+
15
+ class MSELoss : public Loss {
16
+ public:
17
+ double calculate(const MatrixRM& predictions,
18
+ const MatrixRM& targets) override {
19
+ return (predictions - targets).squaredNorm() / predictions.rows();
20
+ }
21
+
22
+ MatrixRM backward(const MatrixRM& predictions,
23
+ const MatrixRM& targets) override {
24
+ return 2.0 * (predictions - targets) / predictions.rows();
25
+ }
26
+ };
27
+
28
+ }
@@ -0,0 +1,28 @@
1
+ #pragma once
2
+ #include <Eigen/Core>
3
+ #include <memory>
4
+
5
+ #include "core/Layer.hpp"
6
+ #include "core/Loss.hpp"
7
+ #include "parametric/Sequential.hpp"
8
+
9
+ namespace mlengine::core {
10
+
11
+ class Model {
12
+ public:
13
+ Model();
14
+
15
+ void add(std::shared_ptr<Layer> layer);
16
+ void compile(std::shared_ptr<Loss> loss_fn);
17
+
18
+ void fit(const MatrixRM& X, const MatrixRM& y, int epochs,
19
+ double learning_rate, bool verbose = true);
20
+
21
+ MatrixRM predict(const MatrixRM& X);
22
+
23
+ private:
24
+ std::shared_ptr<parametric::Sequential> network_;
25
+ std::shared_ptr<Loss> loss_fn_;
26
+ };
27
+
28
+ }
@@ -0,0 +1,25 @@
1
+ #pragma once
2
+ #include "core/Layer.hpp"
3
+
4
+ namespace mlengine::parametric {
5
+
6
+ class DenseLayer : public core::Layer {
7
+ public:
8
+ DenseLayer(int input_dim, int output_dim);
9
+
10
+ void forward(const core::MatrixRM& input, core::MatrixRM& output) override;
11
+ core::MatrixRM backward(const core::MatrixRM& output_gradient) override;
12
+ void update_weights(double learning_rate) override;
13
+
14
+ core::MatrixRM get_weights() const { return weights_; }
15
+ core::MatrixRM get_bias() const { return bias_; }
16
+
17
+ private:
18
+ core::MatrixRM weights_;
19
+ core::MatrixRM bias_;
20
+ core::MatrixRM last_input_;
21
+ core::MatrixRM dW_;
22
+ core::MatrixRM db_;
23
+ };
24
+
25
+ }
@@ -0,0 +1,29 @@
1
+ #pragma once
2
+ #include <Eigen/Core>
3
+
4
+ namespace mlengine::parametric {
5
+
6
+ using MatrixRM =
7
+ Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
8
+ using VectorRM = Eigen::Matrix<double, Eigen::Dynamic, 1>;
9
+
10
+ class LogisticNeuron {
11
+ public:
12
+ LogisticNeuron() = default;
13
+
14
+ void fit(const MatrixRM& X, const VectorRM& y, int epochs,
15
+ double learning_rate);
16
+
17
+ VectorRM predict_proba(const MatrixRM& X) const;
18
+
19
+ VectorRM predict(const MatrixRM& X) const;
20
+
21
+ VectorRM get_weights() const { return weights_; }
22
+ double get_bias() const { return bias_; }
23
+
24
+ private:
25
+ VectorRM weights_;
26
+ double bias_ = 0.0;
27
+ };
28
+
29
+ }
@@ -0,0 +1,15 @@
1
+ #pragma once
2
+ #include "core/Layer.hpp"
3
+
4
+ namespace mlengine::parametric {
5
+
6
+ class ReLULayer : public core::Layer {
7
+ public:
8
+ void forward(const core::MatrixRM& input, core::MatrixRM& output) override;
9
+ core::MatrixRM backward(const core::MatrixRM& output_gradient) override;
10
+
11
+ private:
12
+ core::MatrixRM last_input_;
13
+ };
14
+
15
+ }
@@ -0,0 +1,43 @@
1
+ #pragma once
2
+ #include <iostream>
3
+ #include <memory>
4
+ #include <vector>
5
+
6
+ #include "core/Layer.hpp"
7
+
8
+ namespace mlengine::parametric {
9
+
10
+ class Sequential : public core::Layer {
11
+ public:
12
+ void add(std::shared_ptr<core::Layer> layer) { layers_.push_back(layer); }
13
+
14
+ void forward(const core::MatrixRM& input, core::MatrixRM& output) override {
15
+ core::MatrixRM current_input = input;
16
+ core::MatrixRM current_output;
17
+
18
+ for (auto& layer : layers_) {
19
+ layer->forward(current_input, current_output);
20
+ current_input = current_output;
21
+ }
22
+ output = current_input;
23
+ }
24
+
25
+ core::MatrixRM backward(const core::MatrixRM& output_gradient) override {
26
+ core::MatrixRM gradient = output_gradient;
27
+ for (auto it = layers_.rbegin(); it != layers_.rend(); ++it) {
28
+ gradient = (*it)->backward(gradient);
29
+ }
30
+ return gradient;
31
+ }
32
+
33
+ void update_weights(double learning_rate) override {
34
+ for (auto& layer : layers_) {
35
+ layer->update_weights(learning_rate);
36
+ }
37
+ }
38
+
39
+ private:
40
+ std::vector<std::shared_ptr<core::Layer>> layers_;
41
+ };
42
+
43
+ } // namespace mlengine::parametric
@@ -0,0 +1,21 @@
1
+ [build-system]
2
+ requires = ["scikit-build-core", "pybind11"]
3
+ build-backend = "scikit_build_core.build"
4
+
5
+ [project]
6
+ name = "nn-engine-core"
7
+ version = "0.1.0"
8
+ description = "A high-performance C++ parametric optimization backend for NNEngine"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.8"
12
+ dependencies = [
13
+ "numpy"
14
+ ]
15
+
16
+ [tool.scikit-build]
17
+ cmake.version = ">=3.18"
18
+ wheel.install-dir = "."
19
+
20
+ [tool.cibuildwheel]
21
+ skip = "*-musllinux_*"
@@ -0,0 +1,85 @@
1
+ #include <pybind11/eigen.h>
2
+ #include <pybind11/pybind11.h>
3
+ #include <pybind11/stl.h>
4
+
5
+ #include <memory>
6
+
7
+ #include "core/Layer.hpp"
8
+ #include "core/Loss.hpp"
9
+ #include "core/Model.hpp"
10
+ #include "parametric/DenseLayer.hpp"
11
+ #include "parametric/LogisticNeuron.hpp"
12
+ #include "parametric/ReLULayer.hpp"
13
+ #include "parametric/Sequential.hpp"
14
+
15
+ namespace py = pybind11;
16
+ using namespace mlengine::core;
17
+ using namespace mlengine::parametric;
18
+
19
+ PYBIND11_MODULE(nn_core, m) {
20
+ m.doc() = "C++ Parametric Optimization Layer Engine for NNEngine";
21
+
22
+ py::class_<Layer, std::shared_ptr<Layer>>(m, "Layer")
23
+ .def("update_weights", &Layer::update_weights);
24
+
25
+ py::class_<Loss, std::shared_ptr<Loss>>(m, "Loss");
26
+
27
+ py::class_<MSELoss, Loss, std::shared_ptr<MSELoss>>(m, "MSELoss")
28
+ .def(py::init<>())
29
+ .def("calculate", &MSELoss::calculate)
30
+ .def("backward", &MSELoss::backward);
31
+
32
+ py::class_<LogisticNeuron>(m, "LogisticNeuron")
33
+ .def(py::init<>())
34
+ .def("fit", &LogisticNeuron::fit, py::arg("X"), py::arg("y"),
35
+ py::arg("epochs"), py::arg("learning_rate"))
36
+ .def("predict_proba", &LogisticNeuron::predict_proba)
37
+ .def("predict", &LogisticNeuron::predict)
38
+ .def("get_weights", &LogisticNeuron::get_weights)
39
+ .def("get_bias", &LogisticNeuron::get_bias);
40
+
41
+ py::class_<DenseLayer, Layer, std::shared_ptr<DenseLayer>>(m, "DenseLayer")
42
+ .def(py::init<int, int>())
43
+ .def("forward",
44
+ [](DenseLayer& self, const MatrixRM& input) {
45
+ MatrixRM output;
46
+ self.forward(input, output);
47
+ return output;
48
+ })
49
+ .def("backward", &DenseLayer::backward)
50
+ .def("update_weights", &DenseLayer::update_weights)
51
+ .def("get_weights", &DenseLayer::get_weights)
52
+ .def("get_bias", &DenseLayer::get_bias);
53
+
54
+ py::class_<ReLULayer, Layer, std::shared_ptr<ReLULayer>>(m, "ReLULayer")
55
+ .def(py::init<>())
56
+ .def("forward",
57
+ [](ReLULayer& self, const MatrixRM& input) {
58
+ MatrixRM output;
59
+ self.forward(input, output);
60
+ return output;
61
+ })
62
+ .def("backward", &ReLULayer::backward)
63
+ .def("update_weights", &ReLULayer::update_weights);
64
+
65
+ py::class_<Sequential, Layer, std::shared_ptr<Sequential>>(m, "Sequential")
66
+ .def(py::init<>())
67
+ .def("add", &Sequential::add, py::arg("layer"))
68
+ .def("forward",
69
+ [](Sequential& self, const MatrixRM& input) {
70
+ MatrixRM output;
71
+ self.forward(input, output);
72
+ return output;
73
+ })
74
+ .def("backward", &Sequential::backward)
75
+ .def("update_weights", &Sequential::update_weights);
76
+
77
+ py::class_<Model, std::shared_ptr<Model>>(m, "Model")
78
+ .def(py::init<>())
79
+ .def("add", &Model::add, py::arg("layer"))
80
+ .def("compile", &Model::compile, py::arg("loss_fn"))
81
+ .def("fit", &Model::fit, py::arg("X"), py::arg("y"),
82
+ py::arg("epochs") = 100, py::arg("learning_rate") = 0.01,
83
+ py::arg("verbose") = true)
84
+ .def("predict", &Model::predict, py::arg("X"));
85
+ }
@@ -0,0 +1,46 @@
1
+ #include "core/Model.hpp"
2
+
3
+ #include <algorithm>
4
+ #include <iostream>
5
+ #include <stdexcept>
6
+
7
+ namespace mlengine::core {
8
+
9
+ Model::Model() { network_ = std::make_shared<parametric::Sequential>(); }
10
+
11
+ void Model::add(std::shared_ptr<Layer> layer) { network_->add(layer); }
12
+
13
+ void Model::compile(std::shared_ptr<Loss> loss_fn) { loss_fn_ = loss_fn; }
14
+
15
+ void Model::fit(const MatrixRM& X, const MatrixRM& y, int epochs,
16
+ double learning_rate, bool verbose) {
17
+ if (!loss_fn_) {
18
+ throw std::runtime_error(
19
+ "Model must be compiled with a loss function before fitting.");
20
+ }
21
+
22
+ for (int epoch = 0; epoch < epochs; ++epoch) {
23
+ MatrixRM predictions;
24
+ network_->forward(X, predictions);
25
+
26
+ double loss_val = loss_fn_->calculate(predictions, y);
27
+
28
+ MatrixRM loss_gradient = loss_fn_->backward(predictions, y);
29
+ network_->backward(loss_gradient);
30
+
31
+ network_->update_weights(learning_rate);
32
+
33
+ if (verbose &&
34
+ (epoch % std::max(1, epochs / 10) == 0 || epoch == epochs - 1)) {
35
+ std::cout << "Epoch " << epoch << " | Loss: " << loss_val << std::endl;
36
+ }
37
+ }
38
+ }
39
+
40
+ MatrixRM Model::predict(const MatrixRM& X) {
41
+ MatrixRM predictions;
42
+ network_->forward(X, predictions);
43
+ return predictions;
44
+ }
45
+
46
+ }
@@ -0,0 +1,30 @@
1
+ #include "parametric/DenseLayer.hpp"
2
+
3
+ #include <cmath>
4
+
5
+ namespace mlengine::parametric {
6
+
7
+ DenseLayer::DenseLayer(int input_dim, int output_dim) {
8
+ double limit = std::sqrt(6.0 / (input_dim + output_dim));
9
+ weights_ = core::MatrixRM::Random(input_dim, output_dim) * limit;
10
+ bias_ = core::MatrixRM::Zero(1, output_dim);
11
+ }
12
+
13
+ void DenseLayer::forward(const core::MatrixRM& input, core::MatrixRM& output) {
14
+ last_input_ = input;
15
+ output.noalias() = (input * weights_).rowwise() + bias_.row(0);
16
+ }
17
+
18
+ core::MatrixRM DenseLayer::backward(const core::MatrixRM& output_gradient) {
19
+ dW_.noalias() = last_input_.transpose() * output_gradient;
20
+ db_ = output_gradient.colwise().sum();
21
+
22
+ return output_gradient * weights_.transpose();
23
+ }
24
+
25
+ void DenseLayer::update_weights(double learning_rate) {
26
+ weights_ -= learning_rate * dW_;
27
+ bias_ -= learning_rate * db_;
28
+ }
29
+
30
+ } // namespace mlengine::parametric
@@ -0,0 +1,41 @@
1
+ #include "parametric/LogisticNeuron.hpp"
2
+
3
+ #include <cmath>
4
+
5
+ namespace mlengine::parametric {
6
+
7
+ void LogisticNeuron::fit(const MatrixRM& X, const VectorRM& y, int epochs,
8
+ double learning_rate) {
9
+ int m = X.rows();
10
+ int n = X.cols();
11
+
12
+ weights_ = VectorRM::Zero(n);
13
+ bias_ = 0.0;
14
+
15
+ for (int epoch = 0; epoch < epochs; ++epoch) {
16
+ VectorRM Z = (X * weights_).array() + bias_;
17
+
18
+ VectorRM A =
19
+ Z.unaryExpr([](double z) { return 1.0 / (1.0 + std::exp(-z)); });
20
+
21
+ VectorRM error = A - y;
22
+
23
+ // dW = (X^T * error) / m
24
+ VectorRM dW = (X.transpose() * error) / m;
25
+ double db = error.sum() / m;
26
+ weights_ -= learning_rate * dW;
27
+ bias_ -= learning_rate * db;
28
+ }
29
+ }
30
+
31
+ VectorRM LogisticNeuron::predict_proba(const MatrixRM& X) const {
32
+ VectorRM Z = (X * weights_).array() + bias_;
33
+ return Z.unaryExpr([](double z) { return 1.0 / (1.0 + std::exp(-z)); });
34
+ }
35
+
36
+ VectorRM LogisticNeuron::predict(const MatrixRM& X) const {
37
+ VectorRM proba = predict_proba(X);
38
+ return proba.unaryExpr([](double p) { return p >= 0.5 ? 1.0 : 0.0; });
39
+ }
40
+
41
+ } // namespace mlengine::parametric
@@ -0,0 +1,16 @@
1
+ #include "parametric/ReLULayer.hpp"
2
+
3
+ namespace mlengine::parametric {
4
+
5
+ void ReLULayer::forward(const core::MatrixRM& input, core::MatrixRM& output) {
6
+ last_input_ = input;
7
+ output = input.cwiseMax(0.0);
8
+ }
9
+
10
+ core::MatrixRM ReLULayer::backward(const core::MatrixRM& output_gradient) {
11
+ core::MatrixRM dX = output_gradient;
12
+ dX = (last_input_.array() > 0.0).select(dX, 0.0);
13
+ return dX;
14
+ }
15
+
16
+ }