nn-engine-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nn_engine_core-0.1.0/.github/workflows/publish.yml +62 -0
- nn_engine_core-0.1.0/.gitignore +36 -0
- nn_engine_core-0.1.0/CMakeLists.txt +63 -0
- nn_engine_core-0.1.0/LICENSE +21 -0
- nn_engine_core-0.1.0/PKG-INFO +158 -0
- nn_engine_core-0.1.0/README.md +149 -0
- nn_engine_core-0.1.0/examples/script.py +94 -0
- nn_engine_core-0.1.0/include/core/Layer.hpp +21 -0
- nn_engine_core-0.1.0/include/core/Loss.hpp +28 -0
- nn_engine_core-0.1.0/include/core/Model.hpp +28 -0
- nn_engine_core-0.1.0/include/parametric/DenseLayer.hpp +25 -0
- nn_engine_core-0.1.0/include/parametric/LogisticNeuron.hpp +29 -0
- nn_engine_core-0.1.0/include/parametric/ReLULayer.hpp +15 -0
- nn_engine_core-0.1.0/include/parametric/Sequential.hpp +43 -0
- nn_engine_core-0.1.0/pyproject.toml +21 -0
- nn_engine_core-0.1.0/src/binding.cpp +85 -0
- nn_engine_core-0.1.0/src/core/Model.cpp +46 -0
- nn_engine_core-0.1.0/src/parametric/DenseLayer.cpp +30 -0
- nn_engine_core-0.1.0/src/parametric/LogisticNeuron.cpp +41 -0
- nn_engine_core-0.1.0/src/parametric/ReLULayer.cpp +16 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
name: Build and Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types:
|
|
6
|
+
- published
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
build_wheels:
|
|
11
|
+
name: Build wheels on ${{ matrix.os }}
|
|
12
|
+
runs-on: ${{ matrix.os }}
|
|
13
|
+
strategy:
|
|
14
|
+
matrix:
|
|
15
|
+
os: [ubuntu-latest, windows-latest, macos-latest]
|
|
16
|
+
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Build wheels
|
|
21
|
+
uses: pypa/cibuildwheel@v2.17.0
|
|
22
|
+
env:
|
|
23
|
+
CIBW_BEFORE_ALL_MACOS: brew install libomp
|
|
24
|
+
|
|
25
|
+
- uses: actions/upload-artifact@v4
|
|
26
|
+
with:
|
|
27
|
+
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
|
|
28
|
+
path: ./wheelhouse/*.whl
|
|
29
|
+
|
|
30
|
+
build_sdist:
|
|
31
|
+
name: Build source distribution
|
|
32
|
+
runs-on: ubuntu-latest
|
|
33
|
+
steps:
|
|
34
|
+
- uses: actions/checkout@v4
|
|
35
|
+
|
|
36
|
+
- name: Build sdist
|
|
37
|
+
run: pipx run build --sdist
|
|
38
|
+
|
|
39
|
+
- uses: actions/upload-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: cibw-sdist
|
|
42
|
+
path: dist/*.tar.gz
|
|
43
|
+
|
|
44
|
+
publish-to-pypi:
|
|
45
|
+
name: Publish Python distribution to PyPI
|
|
46
|
+
needs: [build_wheels, build_sdist]
|
|
47
|
+
runs-on: ubuntu-latest
|
|
48
|
+
environment:
|
|
49
|
+
name: pypi
|
|
50
|
+
url: https://pypi.org/p/nn-engine-core
|
|
51
|
+
permissions:
|
|
52
|
+
id-token: write
|
|
53
|
+
steps:
|
|
54
|
+
- name: Download all the dists
|
|
55
|
+
uses: actions/download-artifact@v4
|
|
56
|
+
with:
|
|
57
|
+
pattern: cibw-*
|
|
58
|
+
path: dist
|
|
59
|
+
merge-multiple: true
|
|
60
|
+
|
|
61
|
+
- name: Publish distribution to PyPI
|
|
62
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# --- Build Artifacts ---
|
|
2
|
+
build/
|
|
3
|
+
out/
|
|
4
|
+
bin/
|
|
5
|
+
obj/
|
|
6
|
+
*.so
|
|
7
|
+
*.a
|
|
8
|
+
*.la
|
|
9
|
+
*.o
|
|
10
|
+
*.obj
|
|
11
|
+
|
|
12
|
+
# --- Python ---
|
|
13
|
+
__pycache__/
|
|
14
|
+
*.py[cod]
|
|
15
|
+
*$py.class
|
|
16
|
+
.venv/
|
|
17
|
+
venv/
|
|
18
|
+
ENV/
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
|
|
21
|
+
# --- IDEs and Editors ---
|
|
22
|
+
.vscode/
|
|
23
|
+
.idea/
|
|
24
|
+
*.swp
|
|
25
|
+
*.swo
|
|
26
|
+
.clangd/
|
|
27
|
+
compile_commands.json
|
|
28
|
+
|
|
29
|
+
# --- Data & Logs ---
|
|
30
|
+
data/*.csv
|
|
31
|
+
!data/README.md
|
|
32
|
+
*.log
|
|
33
|
+
|
|
34
|
+
# --- CMake ---
|
|
35
|
+
CMakeUserPresets.json
|
|
36
|
+
_deps/
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.18)
|
|
2
|
+
|
|
3
|
+
if(POLICY CMP0135)
|
|
4
|
+
cmake_policy(SET CMP0135 NEW)
|
|
5
|
+
endif()
|
|
6
|
+
|
|
7
|
+
project(NN_ENGINE LANGUAGES CXX)
|
|
8
|
+
|
|
9
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
10
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
11
|
+
|
|
12
|
+
if(NOT MSVC)
|
|
13
|
+
if(APPLE)
|
|
14
|
+
add_compile_options(-O3)
|
|
15
|
+
else()
|
|
16
|
+
add_compile_options(-O3 -march=native)
|
|
17
|
+
endif()
|
|
18
|
+
endif()
|
|
19
|
+
|
|
20
|
+
include(FetchContent)
|
|
21
|
+
|
|
22
|
+
FetchContent_Declare(
|
|
23
|
+
eigen
|
|
24
|
+
URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz
|
|
25
|
+
)
|
|
26
|
+
FetchContent_GetProperties(eigen)
|
|
27
|
+
if(NOT eigen_POPULATED)
|
|
28
|
+
FetchContent_Populate(eigen)
|
|
29
|
+
endif()
|
|
30
|
+
|
|
31
|
+
FetchContent_Declare(
|
|
32
|
+
pybind11
|
|
33
|
+
URL https://github.com/pybind/pybind11/archive/refs/tags/v2.13.1.tar.gz
|
|
34
|
+
)
|
|
35
|
+
FetchContent_MakeAvailable(pybind11)
|
|
36
|
+
|
|
37
|
+
find_package(OpenMP)
|
|
38
|
+
|
|
39
|
+
pybind11_add_module(nn_core NO_EXTRAS
|
|
40
|
+
src/binding.cpp
|
|
41
|
+
src/core/Model.cpp
|
|
42
|
+
src/parametric/LogisticNeuron.cpp
|
|
43
|
+
src/parametric/DenseLayer.cpp
|
|
44
|
+
src/parametric/ReLULayer.cpp
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
target_include_directories(nn_core SYSTEM PRIVATE
|
|
48
|
+
include
|
|
49
|
+
${eigen_SOURCE_DIR}
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
target_link_libraries(nn_core PRIVATE pybind11::module)
|
|
53
|
+
|
|
54
|
+
if(OpenMP_CXX_FOUND)
|
|
55
|
+
target_link_libraries(nn_core PRIVATE OpenMP::OpenMP_CXX)
|
|
56
|
+
message(STATUS "OpenMP found: Multi-threading enabled in NNEngine.")
|
|
57
|
+
else()
|
|
58
|
+
message(WARNING "OpenMP NOT found: Falling back to single-threaded NNEngine.")
|
|
59
|
+
endif()
|
|
60
|
+
|
|
61
|
+
target_compile_definitions(nn_core PRIVATE EIGEN_USE_THREADS)
|
|
62
|
+
|
|
63
|
+
install(TARGETS nn_core DESTINATION .)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 MLEngineProject
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: nn-engine-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A high-performance C++ parametric optimization backend for NNEngine
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.8
|
|
7
|
+
Requires-Dist: numpy
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# NN Engine Core
|
|
11
|
+
|
|
12
|
+
[](https://pypi.org/project/nn-engine-core/)
|
|
13
|
+
[](https://pypi.org/project/nn-engine-core/)
|
|
14
|
+
[](https://scikit-build-core.readthedocs.io/)
|
|
15
|
+
[](https://pybind11.readthedocs.io/)
|
|
16
|
+
|
|
17
|
+
A high-performance, fully native C++ Neural Network engine exposed to Python via pybind11.
|
|
18
|
+
|
|
19
|
+
Designed for rapid experimentation without the Python Global Interpreter Lock (GIL) overhead, `nn-engine-core` executes the entire deep learning training loop (forward pass, loss calculation, backpropagation, and weight updates) strictly in native C++ using Eigen, achieving significant speedups over standard Python-loop ML libraries.
|
|
20
|
+
|
|
21
|
+
## Highlights
|
|
22
|
+
|
|
23
|
+
- **Native Loop Hoisting**: The `Model::fit` loop executes entirely in C++, eliminating the Python GIL overhead across epochs.
|
|
24
|
+
- **Memory-Optimized Backpropagation**: Utilizes Eigen's `.noalias()` to perform in-place matrix calculus without allocating temporary memory buffers.
|
|
25
|
+
- **Mathematically Stable**: Built-in Xavier (Glorot) initialization and batch-normalized gradients to prevent exploding gradients.
|
|
26
|
+
- **Cross-Platform Threading**: Graceful degradation OpenMP support. Uses multi-threading where available, gracefully falling back to single-threaded standard C++ on restricted environments (e.g., Apple Clang without `libomp`).
|
|
27
|
+
- **Clean Python API**: A familiar Keras/scikit-learn style interface.
|
|
28
|
+
|
|
29
|
+
## Repository Structure
|
|
30
|
+
|
|
31
|
+
```text
|
|
32
|
+
.
|
|
33
|
+
├── CMakeLists.txt
|
|
34
|
+
├── pyproject.toml
|
|
35
|
+
├── include/
|
|
36
|
+
│ ├── core/
|
|
37
|
+
│ │ ├── Layer.hpp
|
|
38
|
+
│ │ ├── Loss.hpp
|
|
39
|
+
│ │ └── Model.hpp
|
|
40
|
+
│ └── parametric/
|
|
41
|
+
│ ├── DenseLayer.hpp
|
|
42
|
+
│ ├── LogisticNeuron.hpp
|
|
43
|
+
│ ├── ReLULayer.hpp
|
|
44
|
+
│ └── Sequential.hpp
|
|
45
|
+
├── src/
|
|
46
|
+
│ ├── binding.cpp
|
|
47
|
+
│ ├── core/
|
|
48
|
+
│ │ └── Model.cpp
|
|
49
|
+
│ └── parametric/
|
|
50
|
+
│ ├── DenseLayer.cpp
|
|
51
|
+
│ ├── LogisticNeuron.cpp
|
|
52
|
+
│ └── ReLULayer.cpp
|
|
53
|
+
└── examples/
|
|
54
|
+
└── script.py
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Requirements
|
|
58
|
+
|
|
59
|
+
- Python 3.8+
|
|
60
|
+
- CMake 3.18+
|
|
61
|
+
- C++17 compiler
|
|
62
|
+
- Ninja (recommended generator)
|
|
63
|
+
|
|
64
|
+
Python dependencies are declared in `pyproject.toml`:
|
|
65
|
+
- `numpy`
|
|
66
|
+
|
|
67
|
+
## Installation
|
|
68
|
+
|
|
69
|
+
### Option 1: Install from PyPI (recommended)
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install nn-engine-core
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### Option 2: Install from source (editable)
|
|
76
|
+
|
|
77
|
+
From the project root:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
python -m pip install -U pip
|
|
81
|
+
python -m pip install -e .
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Quick Start
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
import numpy as np
|
|
88
|
+
import nn_core
|
|
89
|
+
|
|
90
|
+
# Example Data (Scaled)
|
|
91
|
+
X_train = np.random.rand(100, 20).astype(np.float64)
|
|
92
|
+
y_train = np.random.rand(100, 1).astype(np.float64)
|
|
93
|
+
|
|
94
|
+
# 1. Initialize the Model
|
|
95
|
+
model = nn_core.Model()
|
|
96
|
+
|
|
97
|
+
# 2. Build Architecture
|
|
98
|
+
model.add(nn_core.DenseLayer(20, 64))
|
|
99
|
+
model.add(nn_core.ReLULayer())
|
|
100
|
+
model.add(nn_core.DenseLayer(64, 1))
|
|
101
|
+
|
|
102
|
+
# 3. Compile with Loss Function
|
|
103
|
+
model.compile(nn_core.MSELoss())
|
|
104
|
+
|
|
105
|
+
# 4. Train (Executes entirely in C++)
|
|
106
|
+
model.fit(X_train, y_train, epochs=200, learning_rate=0.01, verbose=True)
|
|
107
|
+
|
|
108
|
+
# 5. Predict
|
|
109
|
+
sample = np.random.rand(1, 20).astype(np.float64)
|
|
110
|
+
predictions = model.predict(sample)
|
|
111
|
+
print("Prediction:", predictions)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Python API
|
|
115
|
+
|
|
116
|
+
### `Model()`
|
|
117
|
+
The orchestrator for the neural network.
|
|
118
|
+
- `add(layer)`: Appends a layer to the network sequence.
|
|
119
|
+
- `compile(loss_fn)`: Attaches a loss function to the model.
|
|
120
|
+
- `fit(X, y, epochs=100, learning_rate=0.01, verbose=True)`: Executes full-batch gradient descent. *Note: `X` and `y` must be 2D `float64` NumPy arrays.*
|
|
121
|
+
- `predict(X)`: Runs a forward pass on new data.
|
|
122
|
+
|
|
123
|
+
### Layers (`nn_core.*`)
|
|
124
|
+
- `DenseLayer(input_dim: int, output_dim: int)`: A fully connected parametric layer using Xavier initialization.
|
|
125
|
+
- `ReLULayer()`: A non-parametric Rectified Linear Unit activation layer.
|
|
126
|
+
- `Sequential()`: The underlying layer container (automatically managed by `Model`).
|
|
127
|
+
|
|
128
|
+
### Loss Functions (`nn_core.*`)
|
|
129
|
+
- `MSELoss()`: Mean Squared Error loss. Automatically normalizes gradients by batch size.
|
|
130
|
+
|
|
131
|
+
## Benchmark Results
|
|
132
|
+
|
|
133
|
+
The following results were produced using a non-linear regression dataset (`sklearn.datasets.make_regression`, 5000 samples, 20 features, noise=0.1) trained over 200 epochs via Full-Batch Gradient Descent.
|
|
134
|
+
|
|
135
|
+
Comparing `NNEngine` against `sklearn.neural_network.MLPRegressor` natively highlights the extreme performance advantage of C++ loop hoisting and `noalias()` matrix optimization.
|
|
136
|
+
|
|
137
|
+
| Engine | MSE | Time | Speedup |
|
|
138
|
+
|---|---:|---:|---:|
|
|
139
|
+
| **NNEngine (C++)** | **0.067268** | **0.5593s** | **2.59×** |
|
|
140
|
+
| Scikit-Learn | 0.145411 | 1.4470s | 1.00× |
|
|
141
|
+
|
|
142
|
+
Console output:
|
|
143
|
+
```text
|
|
144
|
+
--- Testing NNEngine: Non-Linear Regression ---
|
|
145
|
+
Training NNEngine...
|
|
146
|
+
NNEngine | MSE: 0.067268 | Time: 0.5593s
|
|
147
|
+
|
|
148
|
+
Training Scikit-Learn...
|
|
149
|
+
Scikit-Learn | MSE: 0.145411 | Time: 1.4470s
|
|
150
|
+
|
|
151
|
+
Speedup: 2.59x faster than Sklearn!
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Notes and Limitations
|
|
155
|
+
|
|
156
|
+
- Targets (`y`) passed to `model.fit()` must be strictly 2D arrays (e.g., shape `(N, 1)` for regression), unlike scikit-learn which often accepts 1D arrays.
|
|
157
|
+
- The optimizer is currently integrated as Vanilla Full-Batch Gradient Descent.
|
|
158
|
+
- Input and Target data should be standardized (e.g., mean `0`, variance `1`) before passing to `fit()` to maintain gradient stability.
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# NN Engine Core
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/nn-engine-core/)
|
|
4
|
+
[](https://pypi.org/project/nn-engine-core/)
|
|
5
|
+
[](https://scikit-build-core.readthedocs.io/)
|
|
6
|
+
[](https://pybind11.readthedocs.io/)
|
|
7
|
+
|
|
8
|
+
A high-performance, fully native C++ Neural Network engine exposed to Python via pybind11.
|
|
9
|
+
|
|
10
|
+
Designed for rapid experimentation without the Python Global Interpreter Lock (GIL) overhead, `nn-engine-core` executes the entire deep learning training loop (forward pass, loss calculation, backpropagation, and weight updates) strictly in native C++ using Eigen, achieving significant speedups over standard Python-loop ML libraries.
|
|
11
|
+
|
|
12
|
+
## Highlights
|
|
13
|
+
|
|
14
|
+
- **Native Loop Hoisting**: The `Model::fit` loop executes entirely in C++, eliminating the Python GIL overhead across epochs.
|
|
15
|
+
- **Memory-Optimized Backpropagation**: Utilizes Eigen's `.noalias()` to perform in-place matrix calculus without allocating temporary memory buffers.
|
|
16
|
+
- **Mathematically Stable**: Built-in Xavier (Glorot) initialization and batch-normalized gradients to prevent exploding gradients.
|
|
17
|
+
- **Cross-Platform Threading**: Graceful degradation OpenMP support. Uses multi-threading where available, gracefully falling back to single-threaded standard C++ on restricted environments (e.g., Apple Clang without `libomp`).
|
|
18
|
+
- **Clean Python API**: A familiar Keras/scikit-learn style interface.
|
|
19
|
+
|
|
20
|
+
## Repository Structure
|
|
21
|
+
|
|
22
|
+
```text
|
|
23
|
+
.
|
|
24
|
+
├── CMakeLists.txt
|
|
25
|
+
├── pyproject.toml
|
|
26
|
+
├── include/
|
|
27
|
+
│ ├── core/
|
|
28
|
+
│ │ ├── Layer.hpp
|
|
29
|
+
│ │ ├── Loss.hpp
|
|
30
|
+
│ │ └── Model.hpp
|
|
31
|
+
│ └── parametric/
|
|
32
|
+
│ ├── DenseLayer.hpp
|
|
33
|
+
│ ├── LogisticNeuron.hpp
|
|
34
|
+
│ ├── ReLULayer.hpp
|
|
35
|
+
│ └── Sequential.hpp
|
|
36
|
+
├── src/
|
|
37
|
+
│ ├── binding.cpp
|
|
38
|
+
│ ├── core/
|
|
39
|
+
│ │ └── Model.cpp
|
|
40
|
+
│ └── parametric/
|
|
41
|
+
│ ├── DenseLayer.cpp
|
|
42
|
+
│ ├── LogisticNeuron.cpp
|
|
43
|
+
│ └── ReLULayer.cpp
|
|
44
|
+
└── examples/
|
|
45
|
+
└── script.py
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Requirements
|
|
49
|
+
|
|
50
|
+
- Python 3.8+
|
|
51
|
+
- CMake 3.18+
|
|
52
|
+
- C++17 compiler
|
|
53
|
+
- Ninja (recommended generator)
|
|
54
|
+
|
|
55
|
+
Python dependencies are declared in `pyproject.toml`:
|
|
56
|
+
- `numpy`
|
|
57
|
+
|
|
58
|
+
## Installation
|
|
59
|
+
|
|
60
|
+
### Option 1: Install from PyPI (recommended)
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install nn-engine-core
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Option 2: Install from source (editable)
|
|
67
|
+
|
|
68
|
+
From the project root:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
python -m pip install -U pip
|
|
72
|
+
python -m pip install -e .
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Quick Start
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import numpy as np
|
|
79
|
+
import nn_core
|
|
80
|
+
|
|
81
|
+
# Example Data (Scaled)
|
|
82
|
+
X_train = np.random.rand(100, 20).astype(np.float64)
|
|
83
|
+
y_train = np.random.rand(100, 1).astype(np.float64)
|
|
84
|
+
|
|
85
|
+
# 1. Initialize the Model
|
|
86
|
+
model = nn_core.Model()
|
|
87
|
+
|
|
88
|
+
# 2. Build Architecture
|
|
89
|
+
model.add(nn_core.DenseLayer(20, 64))
|
|
90
|
+
model.add(nn_core.ReLULayer())
|
|
91
|
+
model.add(nn_core.DenseLayer(64, 1))
|
|
92
|
+
|
|
93
|
+
# 3. Compile with Loss Function
|
|
94
|
+
model.compile(nn_core.MSELoss())
|
|
95
|
+
|
|
96
|
+
# 4. Train (Executes entirely in C++)
|
|
97
|
+
model.fit(X_train, y_train, epochs=200, learning_rate=0.01, verbose=True)
|
|
98
|
+
|
|
99
|
+
# 5. Predict
|
|
100
|
+
sample = np.random.rand(1, 20).astype(np.float64)
|
|
101
|
+
predictions = model.predict(sample)
|
|
102
|
+
print("Prediction:", predictions)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Python API
|
|
106
|
+
|
|
107
|
+
### `Model()`
|
|
108
|
+
The orchestrator for the neural network.
|
|
109
|
+
- `add(layer)`: Appends a layer to the network sequence.
|
|
110
|
+
- `compile(loss_fn)`: Attaches a loss function to the model.
|
|
111
|
+
- `fit(X, y, epochs=100, learning_rate=0.01, verbose=True)`: Executes full-batch gradient descent. *Note: `X` and `y` must be 2D `float64` NumPy arrays.*
|
|
112
|
+
- `predict(X)`: Runs a forward pass on new data.
|
|
113
|
+
|
|
114
|
+
### Layers (`nn_core.*`)
|
|
115
|
+
- `DenseLayer(input_dim: int, output_dim: int)`: A fully connected parametric layer using Xavier initialization.
|
|
116
|
+
- `ReLULayer()`: A non-parametric Rectified Linear Unit activation layer.
|
|
117
|
+
- `Sequential()`: The underlying layer container (automatically managed by `Model`).
|
|
118
|
+
|
|
119
|
+
### Loss Functions (`nn_core.*`)
|
|
120
|
+
- `MSELoss()`: Mean Squared Error loss. Automatically normalizes gradients by batch size.
|
|
121
|
+
|
|
122
|
+
## Benchmark Results
|
|
123
|
+
|
|
124
|
+
The following results were produced using a non-linear regression dataset (`sklearn.datasets.make_regression`, 5000 samples, 20 features, noise=0.1) trained over 200 epochs via Full-Batch Gradient Descent.
|
|
125
|
+
|
|
126
|
+
Comparing `NNEngine` against `sklearn.neural_network.MLPRegressor` natively highlights the extreme performance advantage of C++ loop hoisting and `noalias()` matrix optimization.
|
|
127
|
+
|
|
128
|
+
| Engine | MSE | Time | Speedup |
|
|
129
|
+
|---|---:|---:|---:|
|
|
130
|
+
| **NNEngine (C++)** | **0.067268** | **0.5593s** | **2.59×** |
|
|
131
|
+
| Scikit-Learn | 0.145411 | 1.4470s | 1.00× |
|
|
132
|
+
|
|
133
|
+
Console output:
|
|
134
|
+
```text
|
|
135
|
+
--- Testing NNEngine: Non-Linear Regression ---
|
|
136
|
+
Training NNEngine...
|
|
137
|
+
NNEngine | MSE: 0.067268 | Time: 0.5593s
|
|
138
|
+
|
|
139
|
+
Training Scikit-Learn...
|
|
140
|
+
Scikit-Learn | MSE: 0.145411 | Time: 1.4470s
|
|
141
|
+
|
|
142
|
+
Speedup: 2.59x faster than Sklearn!
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Notes and Limitations
|
|
146
|
+
|
|
147
|
+
- Targets (`y`) passed to `model.fit()` must be strictly 2D arrays (e.g., shape `(N, 1)` for regression), unlike scikit-learn which often accepts 1D arrays.
|
|
148
|
+
- The optimizer is currently integrated as Vanilla Full-Batch Gradient Descent.
|
|
149
|
+
- Input and Target data should be standardized (e.g., mean `0`, variance `1`) before passing to `fit()` to maintain gradient stability.
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import nn_core
|
|
2
|
+
import numpy as np
|
|
3
|
+
import time
|
|
4
|
+
from sklearn.datasets import make_regression
|
|
5
|
+
from sklearn.model_selection import train_test_split
|
|
6
|
+
from sklearn.preprocessing import StandardScaler
|
|
7
|
+
from sklearn.neural_network import MLPRegressor
|
|
8
|
+
from sklearn.metrics import mean_squared_error
|
|
9
|
+
|
|
10
|
+
def test_regression_performance():
|
|
11
|
+
print("\n--- Testing NNEngine: Non-Linear Regression ---")
|
|
12
|
+
|
|
13
|
+
# 1. Generate Data
|
|
14
|
+
# 5000 samples, 20 features, making it slightly non-linear
|
|
15
|
+
X, y = make_regression(n_samples=5000, n_features=20, noise=0.1, random_state=42)
|
|
16
|
+
|
|
17
|
+
# NNEngine strictly expects 2D matrices (MatrixRM) for both X and y.
|
|
18
|
+
# Scikit-learn generates y as (N,), so we must reshape it to (N, 1).
|
|
19
|
+
y = y.reshape(-1, 1)
|
|
20
|
+
|
|
21
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
22
|
+
X.astype(np.float64), y.astype(np.float64), test_size=0.2, random_state=42
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Neural Networks require scaled data to prevent exploding gradients
|
|
26
|
+
scaler_X = StandardScaler()
|
|
27
|
+
scaler_y = StandardScaler()
|
|
28
|
+
|
|
29
|
+
X_train_scaled = scaler_X.fit_transform(X_train)
|
|
30
|
+
X_test_scaled = scaler_X.transform(X_test)
|
|
31
|
+
y_train_scaled = scaler_y.fit_transform(y_train)
|
|
32
|
+
y_test_scaled = scaler_y.transform(y_test)
|
|
33
|
+
|
|
34
|
+
# Architecture Config
|
|
35
|
+
epochs = 200
|
|
36
|
+
lr = 0.01
|
|
37
|
+
|
|
38
|
+
# ==========================================
|
|
39
|
+
# Custom NNEngine (C++ Native)
|
|
40
|
+
# ==========================================
|
|
41
|
+
model = nn_core.Model()
|
|
42
|
+
model.add(nn_core.DenseLayer(20, 64))
|
|
43
|
+
model.add(nn_core.ReLULayer())
|
|
44
|
+
model.add(nn_core.DenseLayer(64, 1))
|
|
45
|
+
model.compile(nn_core.MSELoss())
|
|
46
|
+
|
|
47
|
+
print("Training NNEngine...")
|
|
48
|
+
t0 = time.perf_counter()
|
|
49
|
+
# Verbose=False to avoid cluttering the benchmark output
|
|
50
|
+
model.fit(X_train_scaled, y_train_scaled, epochs=epochs, learning_rate=lr, verbose=False)
|
|
51
|
+
t1 = time.perf_counter()
|
|
52
|
+
|
|
53
|
+
nn_preds = model.predict(X_test_scaled)
|
|
54
|
+
nn_mse = mean_squared_error(y_test_scaled, nn_preds)
|
|
55
|
+
nn_time = t1 - t0
|
|
56
|
+
|
|
57
|
+
print(f"NNEngine | MSE: {nn_mse:.6f} | Time: {nn_time:.4f}s")
|
|
58
|
+
|
|
59
|
+
# ==========================================
|
|
60
|
+
# Scikit-Learn (MLPRegressor)
|
|
61
|
+
# ==========================================
|
|
62
|
+
print("Training Scikit-Learn...")
|
|
63
|
+
# Configure sklearn to mirror our C++ Engine:
|
|
64
|
+
# solver='sgd' with momentum=0 mirrors our basic Vanilla Gradient Descent.
|
|
65
|
+
# batch_size=X_train.shape[0] forces Full-Batch GD instead of mini-batches.
|
|
66
|
+
sk_model = MLPRegressor(
|
|
67
|
+
hidden_layer_sizes=(64,),
|
|
68
|
+
activation='relu',
|
|
69
|
+
solver='sgd',
|
|
70
|
+
batch_size=X_train.shape[0],
|
|
71
|
+
learning_rate_init=lr,
|
|
72
|
+
max_iter=epochs,
|
|
73
|
+
momentum=0.0,
|
|
74
|
+
random_state=42
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
t0 = time.perf_counter()
|
|
78
|
+
# sklearn expects y as (N,)
|
|
79
|
+
sk_model.fit(X_train_scaled, y_train_scaled.ravel())
|
|
80
|
+
t1 = time.perf_counter()
|
|
81
|
+
|
|
82
|
+
sk_preds = sk_model.predict(X_test_scaled)
|
|
83
|
+
sk_mse = mean_squared_error(y_test_scaled.ravel(), sk_preds)
|
|
84
|
+
sk_time = t1 - t0
|
|
85
|
+
|
|
86
|
+
print(f"Scikit-Learn | MSE: {sk_mse:.6f} | Time: {sk_time:.4f}s")
|
|
87
|
+
|
|
88
|
+
if nn_time < sk_time:
|
|
89
|
+
print(f"Speedup: {sk_time / nn_time:.2f}x faster than Sklearn!")
|
|
90
|
+
else:
|
|
91
|
+
print(f"Speedup: Sklearn is {nn_time / sk_time:.2f}x faster.")
|
|
92
|
+
|
|
93
|
+
if __name__ == "__main__":
|
|
94
|
+
test_regression_performance()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <Eigen/Core>
|
|
3
|
+
#include <iostream>
|
|
4
|
+
|
|
5
|
+
namespace mlengine::core {
|
|
6
|
+
|
|
7
|
+
using MatrixRM =
|
|
8
|
+
Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
|
|
9
|
+
|
|
10
|
+
class Layer {
|
|
11
|
+
public:
|
|
12
|
+
virtual ~Layer() = default;
|
|
13
|
+
|
|
14
|
+
virtual void forward(const MatrixRM& input, MatrixRM& output) = 0;
|
|
15
|
+
|
|
16
|
+
virtual MatrixRM backward(const MatrixRM& output_gradient) = 0;
|
|
17
|
+
|
|
18
|
+
virtual void update_weights(double learning_rate) {}
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <Eigen/Core>
|
|
3
|
+
|
|
4
|
+
namespace mlengine::core {
|
|
5
|
+
|
|
6
|
+
class Loss {
|
|
7
|
+
public:
|
|
8
|
+
virtual ~Loss() = default;
|
|
9
|
+
virtual double calculate(const MatrixRM& predictions,
|
|
10
|
+
const MatrixRM& targets) = 0;
|
|
11
|
+
virtual MatrixRM backward(const MatrixRM& predictions,
|
|
12
|
+
const MatrixRM& targets) = 0;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
class MSELoss : public Loss {
|
|
16
|
+
public:
|
|
17
|
+
double calculate(const MatrixRM& predictions,
|
|
18
|
+
const MatrixRM& targets) override {
|
|
19
|
+
return (predictions - targets).squaredNorm() / predictions.rows();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
MatrixRM backward(const MatrixRM& predictions,
|
|
23
|
+
const MatrixRM& targets) override {
|
|
24
|
+
return 2.0 * (predictions - targets) / predictions.rows();
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <Eigen/Core>
|
|
3
|
+
#include <memory>
|
|
4
|
+
|
|
5
|
+
#include "core/Layer.hpp"
|
|
6
|
+
#include "core/Loss.hpp"
|
|
7
|
+
#include "parametric/Sequential.hpp"
|
|
8
|
+
|
|
9
|
+
namespace mlengine::core {
|
|
10
|
+
|
|
11
|
+
class Model {
|
|
12
|
+
public:
|
|
13
|
+
Model();
|
|
14
|
+
|
|
15
|
+
void add(std::shared_ptr<Layer> layer);
|
|
16
|
+
void compile(std::shared_ptr<Loss> loss_fn);
|
|
17
|
+
|
|
18
|
+
void fit(const MatrixRM& X, const MatrixRM& y, int epochs,
|
|
19
|
+
double learning_rate, bool verbose = true);
|
|
20
|
+
|
|
21
|
+
MatrixRM predict(const MatrixRM& X);
|
|
22
|
+
|
|
23
|
+
private:
|
|
24
|
+
std::shared_ptr<parametric::Sequential> network_;
|
|
25
|
+
std::shared_ptr<Loss> loss_fn_;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include "core/Layer.hpp"
|
|
3
|
+
|
|
4
|
+
namespace mlengine::parametric {
|
|
5
|
+
|
|
6
|
+
class DenseLayer : public core::Layer {
|
|
7
|
+
public:
|
|
8
|
+
DenseLayer(int input_dim, int output_dim);
|
|
9
|
+
|
|
10
|
+
void forward(const core::MatrixRM& input, core::MatrixRM& output) override;
|
|
11
|
+
core::MatrixRM backward(const core::MatrixRM& output_gradient) override;
|
|
12
|
+
void update_weights(double learning_rate) override;
|
|
13
|
+
|
|
14
|
+
core::MatrixRM get_weights() const { return weights_; }
|
|
15
|
+
core::MatrixRM get_bias() const { return bias_; }
|
|
16
|
+
|
|
17
|
+
private:
|
|
18
|
+
core::MatrixRM weights_;
|
|
19
|
+
core::MatrixRM bias_;
|
|
20
|
+
core::MatrixRM last_input_;
|
|
21
|
+
core::MatrixRM dW_;
|
|
22
|
+
core::MatrixRM db_;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <Eigen/Core>
|
|
3
|
+
|
|
4
|
+
namespace mlengine::parametric {
|
|
5
|
+
|
|
6
|
+
using MatrixRM =
|
|
7
|
+
Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
|
|
8
|
+
using VectorRM = Eigen::Matrix<double, Eigen::Dynamic, 1>;
|
|
9
|
+
|
|
10
|
+
class LogisticNeuron {
|
|
11
|
+
public:
|
|
12
|
+
LogisticNeuron() = default;
|
|
13
|
+
|
|
14
|
+
void fit(const MatrixRM& X, const VectorRM& y, int epochs,
|
|
15
|
+
double learning_rate);
|
|
16
|
+
|
|
17
|
+
VectorRM predict_proba(const MatrixRM& X) const;
|
|
18
|
+
|
|
19
|
+
VectorRM predict(const MatrixRM& X) const;
|
|
20
|
+
|
|
21
|
+
VectorRM get_weights() const { return weights_; }
|
|
22
|
+
double get_bias() const { return bias_; }
|
|
23
|
+
|
|
24
|
+
private:
|
|
25
|
+
VectorRM weights_;
|
|
26
|
+
double bias_ = 0.0;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include "core/Layer.hpp"
|
|
3
|
+
|
|
4
|
+
namespace mlengine::parametric {
|
|
5
|
+
|
|
6
|
+
class ReLULayer : public core::Layer {
|
|
7
|
+
public:
|
|
8
|
+
void forward(const core::MatrixRM& input, core::MatrixRM& output) override;
|
|
9
|
+
core::MatrixRM backward(const core::MatrixRM& output_gradient) override;
|
|
10
|
+
|
|
11
|
+
private:
|
|
12
|
+
core::MatrixRM last_input_;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <iostream>
|
|
3
|
+
#include <memory>
|
|
4
|
+
#include <vector>
|
|
5
|
+
|
|
6
|
+
#include "core/Layer.hpp"
|
|
7
|
+
|
|
8
|
+
namespace mlengine::parametric {
|
|
9
|
+
|
|
10
|
+
class Sequential : public core::Layer {
|
|
11
|
+
public:
|
|
12
|
+
void add(std::shared_ptr<core::Layer> layer) { layers_.push_back(layer); }
|
|
13
|
+
|
|
14
|
+
void forward(const core::MatrixRM& input, core::MatrixRM& output) override {
|
|
15
|
+
core::MatrixRM current_input = input;
|
|
16
|
+
core::MatrixRM current_output;
|
|
17
|
+
|
|
18
|
+
for (auto& layer : layers_) {
|
|
19
|
+
layer->forward(current_input, current_output);
|
|
20
|
+
current_input = current_output;
|
|
21
|
+
}
|
|
22
|
+
output = current_input;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
core::MatrixRM backward(const core::MatrixRM& output_gradient) override {
|
|
26
|
+
core::MatrixRM gradient = output_gradient;
|
|
27
|
+
for (auto it = layers_.rbegin(); it != layers_.rend(); ++it) {
|
|
28
|
+
gradient = (*it)->backward(gradient);
|
|
29
|
+
}
|
|
30
|
+
return gradient;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
void update_weights(double learning_rate) override {
|
|
34
|
+
for (auto& layer : layers_) {
|
|
35
|
+
layer->update_weights(learning_rate);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
private:
|
|
40
|
+
std::vector<std::shared_ptr<core::Layer>> layers_;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
} // namespace mlengine::parametric
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["scikit-build-core", "pybind11"]
|
|
3
|
+
build-backend = "scikit_build_core.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "nn-engine-core"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A high-performance C++ parametric optimization backend for NNEngine"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"numpy"
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[tool.scikit-build]
|
|
17
|
+
cmake.version = ">=3.18"
|
|
18
|
+
wheel.install-dir = "."
|
|
19
|
+
|
|
20
|
+
[tool.cibuildwheel]
|
|
21
|
+
skip = "*-musllinux_*"
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
#include <pybind11/eigen.h>
|
|
2
|
+
#include <pybind11/pybind11.h>
|
|
3
|
+
#include <pybind11/stl.h>
|
|
4
|
+
|
|
5
|
+
#include <memory>
|
|
6
|
+
|
|
7
|
+
#include "core/Layer.hpp"
|
|
8
|
+
#include "core/Loss.hpp"
|
|
9
|
+
#include "core/Model.hpp"
|
|
10
|
+
#include "parametric/DenseLayer.hpp"
|
|
11
|
+
#include "parametric/LogisticNeuron.hpp"
|
|
12
|
+
#include "parametric/ReLULayer.hpp"
|
|
13
|
+
#include "parametric/Sequential.hpp"
|
|
14
|
+
|
|
15
|
+
namespace py = pybind11;
|
|
16
|
+
using namespace mlengine::core;
|
|
17
|
+
using namespace mlengine::parametric;
|
|
18
|
+
|
|
19
|
+
PYBIND11_MODULE(nn_core, m) {
|
|
20
|
+
m.doc() = "C++ Parametric Optimization Layer Engine for NNEngine";
|
|
21
|
+
|
|
22
|
+
py::class_<Layer, std::shared_ptr<Layer>>(m, "Layer")
|
|
23
|
+
.def("update_weights", &Layer::update_weights);
|
|
24
|
+
|
|
25
|
+
py::class_<Loss, std::shared_ptr<Loss>>(m, "Loss");
|
|
26
|
+
|
|
27
|
+
py::class_<MSELoss, Loss, std::shared_ptr<MSELoss>>(m, "MSELoss")
|
|
28
|
+
.def(py::init<>())
|
|
29
|
+
.def("calculate", &MSELoss::calculate)
|
|
30
|
+
.def("backward", &MSELoss::backward);
|
|
31
|
+
|
|
32
|
+
py::class_<LogisticNeuron>(m, "LogisticNeuron")
|
|
33
|
+
.def(py::init<>())
|
|
34
|
+
.def("fit", &LogisticNeuron::fit, py::arg("X"), py::arg("y"),
|
|
35
|
+
py::arg("epochs"), py::arg("learning_rate"))
|
|
36
|
+
.def("predict_proba", &LogisticNeuron::predict_proba)
|
|
37
|
+
.def("predict", &LogisticNeuron::predict)
|
|
38
|
+
.def("get_weights", &LogisticNeuron::get_weights)
|
|
39
|
+
.def("get_bias", &LogisticNeuron::get_bias);
|
|
40
|
+
|
|
41
|
+
py::class_<DenseLayer, Layer, std::shared_ptr<DenseLayer>>(m, "DenseLayer")
|
|
42
|
+
.def(py::init<int, int>())
|
|
43
|
+
.def("forward",
|
|
44
|
+
[](DenseLayer& self, const MatrixRM& input) {
|
|
45
|
+
MatrixRM output;
|
|
46
|
+
self.forward(input, output);
|
|
47
|
+
return output;
|
|
48
|
+
})
|
|
49
|
+
.def("backward", &DenseLayer::backward)
|
|
50
|
+
.def("update_weights", &DenseLayer::update_weights)
|
|
51
|
+
.def("get_weights", &DenseLayer::get_weights)
|
|
52
|
+
.def("get_bias", &DenseLayer::get_bias);
|
|
53
|
+
|
|
54
|
+
py::class_<ReLULayer, Layer, std::shared_ptr<ReLULayer>>(m, "ReLULayer")
|
|
55
|
+
.def(py::init<>())
|
|
56
|
+
.def("forward",
|
|
57
|
+
[](ReLULayer& self, const MatrixRM& input) {
|
|
58
|
+
MatrixRM output;
|
|
59
|
+
self.forward(input, output);
|
|
60
|
+
return output;
|
|
61
|
+
})
|
|
62
|
+
.def("backward", &ReLULayer::backward)
|
|
63
|
+
.def("update_weights", &ReLULayer::update_weights);
|
|
64
|
+
|
|
65
|
+
py::class_<Sequential, Layer, std::shared_ptr<Sequential>>(m, "Sequential")
|
|
66
|
+
.def(py::init<>())
|
|
67
|
+
.def("add", &Sequential::add, py::arg("layer"))
|
|
68
|
+
.def("forward",
|
|
69
|
+
[](Sequential& self, const MatrixRM& input) {
|
|
70
|
+
MatrixRM output;
|
|
71
|
+
self.forward(input, output);
|
|
72
|
+
return output;
|
|
73
|
+
})
|
|
74
|
+
.def("backward", &Sequential::backward)
|
|
75
|
+
.def("update_weights", &Sequential::update_weights);
|
|
76
|
+
|
|
77
|
+
py::class_<Model, std::shared_ptr<Model>>(m, "Model")
|
|
78
|
+
.def(py::init<>())
|
|
79
|
+
.def("add", &Model::add, py::arg("layer"))
|
|
80
|
+
.def("compile", &Model::compile, py::arg("loss_fn"))
|
|
81
|
+
.def("fit", &Model::fit, py::arg("X"), py::arg("y"),
|
|
82
|
+
py::arg("epochs") = 100, py::arg("learning_rate") = 0.01,
|
|
83
|
+
py::arg("verbose") = true)
|
|
84
|
+
.def("predict", &Model::predict, py::arg("X"));
|
|
85
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
#include "core/Model.hpp"
|
|
2
|
+
|
|
3
|
+
#include <algorithm>
|
|
4
|
+
#include <iostream>
|
|
5
|
+
#include <stdexcept>
|
|
6
|
+
|
|
7
|
+
namespace mlengine::core {
|
|
8
|
+
|
|
9
|
+
Model::Model() { network_ = std::make_shared<parametric::Sequential>(); }
|
|
10
|
+
|
|
11
|
+
void Model::add(std::shared_ptr<Layer> layer) { network_->add(layer); }
|
|
12
|
+
|
|
13
|
+
void Model::compile(std::shared_ptr<Loss> loss_fn) { loss_fn_ = loss_fn; }
|
|
14
|
+
|
|
15
|
+
void Model::fit(const MatrixRM& X, const MatrixRM& y, int epochs,
|
|
16
|
+
double learning_rate, bool verbose) {
|
|
17
|
+
if (!loss_fn_) {
|
|
18
|
+
throw std::runtime_error(
|
|
19
|
+
"Model must be compiled with a loss function before fitting.");
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
for (int epoch = 0; epoch < epochs; ++epoch) {
|
|
23
|
+
MatrixRM predictions;
|
|
24
|
+
network_->forward(X, predictions);
|
|
25
|
+
|
|
26
|
+
double loss_val = loss_fn_->calculate(predictions, y);
|
|
27
|
+
|
|
28
|
+
MatrixRM loss_gradient = loss_fn_->backward(predictions, y);
|
|
29
|
+
network_->backward(loss_gradient);
|
|
30
|
+
|
|
31
|
+
network_->update_weights(learning_rate);
|
|
32
|
+
|
|
33
|
+
if (verbose &&
|
|
34
|
+
(epoch % std::max(1, epochs / 10) == 0 || epoch == epochs - 1)) {
|
|
35
|
+
std::cout << "Epoch " << epoch << " | Loss: " << loss_val << std::endl;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
MatrixRM Model::predict(const MatrixRM& X) {
|
|
41
|
+
MatrixRM predictions;
|
|
42
|
+
network_->forward(X, predictions);
|
|
43
|
+
return predictions;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#include "parametric/DenseLayer.hpp"
|
|
2
|
+
|
|
3
|
+
#include <cmath>
|
|
4
|
+
|
|
5
|
+
namespace mlengine::parametric {
|
|
6
|
+
|
|
7
|
+
DenseLayer::DenseLayer(int input_dim, int output_dim) {
|
|
8
|
+
double limit = std::sqrt(6.0 / (input_dim + output_dim));
|
|
9
|
+
weights_ = core::MatrixRM::Random(input_dim, output_dim) * limit;
|
|
10
|
+
bias_ = core::MatrixRM::Zero(1, output_dim);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
void DenseLayer::forward(const core::MatrixRM& input, core::MatrixRM& output) {
|
|
14
|
+
last_input_ = input;
|
|
15
|
+
output.noalias() = (input * weights_).rowwise() + bias_.row(0);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
core::MatrixRM DenseLayer::backward(const core::MatrixRM& output_gradient) {
|
|
19
|
+
dW_.noalias() = last_input_.transpose() * output_gradient;
|
|
20
|
+
db_ = output_gradient.colwise().sum();
|
|
21
|
+
|
|
22
|
+
return output_gradient * weights_.transpose();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
void DenseLayer::update_weights(double learning_rate) {
|
|
26
|
+
weights_ -= learning_rate * dW_;
|
|
27
|
+
bias_ -= learning_rate * db_;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
} // namespace mlengine::parametric
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#include "parametric/LogisticNeuron.hpp"
|
|
2
|
+
|
|
3
|
+
#include <cmath>
|
|
4
|
+
|
|
5
|
+
namespace mlengine::parametric {
|
|
6
|
+
|
|
7
|
+
void LogisticNeuron::fit(const MatrixRM& X, const VectorRM& y, int epochs,
|
|
8
|
+
double learning_rate) {
|
|
9
|
+
int m = X.rows();
|
|
10
|
+
int n = X.cols();
|
|
11
|
+
|
|
12
|
+
weights_ = VectorRM::Zero(n);
|
|
13
|
+
bias_ = 0.0;
|
|
14
|
+
|
|
15
|
+
for (int epoch = 0; epoch < epochs; ++epoch) {
|
|
16
|
+
VectorRM Z = (X * weights_).array() + bias_;
|
|
17
|
+
|
|
18
|
+
VectorRM A =
|
|
19
|
+
Z.unaryExpr([](double z) { return 1.0 / (1.0 + std::exp(-z)); });
|
|
20
|
+
|
|
21
|
+
VectorRM error = A - y;
|
|
22
|
+
|
|
23
|
+
// dW = (X^T * error) / m
|
|
24
|
+
VectorRM dW = (X.transpose() * error) / m;
|
|
25
|
+
double db = error.sum() / m;
|
|
26
|
+
weights_ -= learning_rate * dW;
|
|
27
|
+
bias_ -= learning_rate * db;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
VectorRM LogisticNeuron::predict_proba(const MatrixRM& X) const {
|
|
32
|
+
VectorRM Z = (X * weights_).array() + bias_;
|
|
33
|
+
return Z.unaryExpr([](double z) { return 1.0 / (1.0 + std::exp(-z)); });
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
VectorRM LogisticNeuron::predict(const MatrixRM& X) const {
|
|
37
|
+
VectorRM proba = predict_proba(X);
|
|
38
|
+
return proba.unaryExpr([](double p) { return p >= 0.5 ? 1.0 : 0.0; });
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
} // namespace mlengine::parametric
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#include "parametric/ReLULayer.hpp"
|
|
2
|
+
|
|
3
|
+
namespace mlengine::parametric {
|
|
4
|
+
|
|
5
|
+
void ReLULayer::forward(const core::MatrixRM& input, core::MatrixRM& output) {
|
|
6
|
+
last_input_ = input;
|
|
7
|
+
output = input.cwiseMax(0.0);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
core::MatrixRM ReLULayer::backward(const core::MatrixRM& output_gradient) {
|
|
11
|
+
core::MatrixRM dX = output_gradient;
|
|
12
|
+
dX = (last_input_.array() > 0.0).select(dX, 0.0);
|
|
13
|
+
return dX;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
}
|