ironforest 0.2__cp313-cp313-macosx_10_12_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ironforest/__init__.py +37 -0
- ironforest/_core.cpython-313-darwin.so +0 -0
- ironforest/linear_regression.py +108 -0
- ironforest/models.py +5 -0
- ironforest-0.2.dist-info/METADATA +111 -0
- ironforest-0.2.dist-info/RECORD +7 -0
- ironforest-0.2.dist-info/WHEEL +4 -0
ironforest/__init__.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Substratum: N-dimensional array library for numerical computation and spatial analysis."""
|
|
2
|
+
|
|
3
|
+
from ironforest._core import (
|
|
4
|
+
Array,
|
|
5
|
+
zeros,
|
|
6
|
+
ones,
|
|
7
|
+
full,
|
|
8
|
+
asarray,
|
|
9
|
+
eye,
|
|
10
|
+
diag,
|
|
11
|
+
column_stack,
|
|
12
|
+
linalg,
|
|
13
|
+
stats,
|
|
14
|
+
random,
|
|
15
|
+
spatial,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Import pure Python modules
|
|
19
|
+
from . import models
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"Array",
|
|
23
|
+
"zeros",
|
|
24
|
+
"ones",
|
|
25
|
+
"full",
|
|
26
|
+
"asarray",
|
|
27
|
+
"eye",
|
|
28
|
+
"diag",
|
|
29
|
+
"column_stack",
|
|
30
|
+
"linalg",
|
|
31
|
+
"stats",
|
|
32
|
+
"random",
|
|
33
|
+
"spatial",
|
|
34
|
+
"models",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
__version__ = "0.2"
|
|
Binary file
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from ironforest import Array, linalg, column_stack
|
|
2
|
+
|
|
3
|
+
class LinearRegression:
|
|
4
|
+
"""Linear regression model using least squares."""
|
|
5
|
+
|
|
6
|
+
def __init__(self, fit_intercept: bool = True):
|
|
7
|
+
"""
|
|
8
|
+
Initialize linear regression model.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
fit_intercept: Whether to calculate the intercept for this model.
|
|
12
|
+
"""
|
|
13
|
+
self.fit_intercept = fit_intercept
|
|
14
|
+
self.coef_ = None
|
|
15
|
+
self.intercept_ = None
|
|
16
|
+
self._is_fitted = False
|
|
17
|
+
|
|
18
|
+
def fit(self, X: Array, y: Array) -> 'LinearRegression':
|
|
19
|
+
"""
|
|
20
|
+
Fit linear model.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
X: Training data of shape (n_samples, n_features)
|
|
24
|
+
y: Target values of shape (n_samples,) or (n_samples, n_targets)
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
self: Fitted estimator
|
|
28
|
+
"""
|
|
29
|
+
if self.fit_intercept:
|
|
30
|
+
ones = Array.ones((X.shape[0], 1))
|
|
31
|
+
X_design = column_stack([ones, X])
|
|
32
|
+
else:
|
|
33
|
+
X_design = X
|
|
34
|
+
|
|
35
|
+
params, _ = linalg.lstsq(X_design, y)
|
|
36
|
+
|
|
37
|
+
if self.fit_intercept:
|
|
38
|
+
self.intercept_ = params[0]
|
|
39
|
+
self.coef_ = params[1:]
|
|
40
|
+
else:
|
|
41
|
+
self.intercept_ = 0.0
|
|
42
|
+
self.coef_ = params
|
|
43
|
+
|
|
44
|
+
self._is_fitted = True
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
def predict(self, X: Array) -> Array:
|
|
48
|
+
"""
|
|
49
|
+
Predict using the linear model.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
X: Samples of shape (n_samples, n_features)
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Predicted values of shape (n_samples,) or (n_samples, n_targets)
|
|
56
|
+
|
|
57
|
+
Raises:
|
|
58
|
+
RuntimeError: If model hasn't been fitted yet
|
|
59
|
+
"""
|
|
60
|
+
if not self._is_fitted:
|
|
61
|
+
raise RuntimeError("Model must be fitted before calling predict")
|
|
62
|
+
|
|
63
|
+
y_pred = X @ self.coef_ # type: ignore
|
|
64
|
+
|
|
65
|
+
if self.fit_intercept:
|
|
66
|
+
y_pred = y_pred + self.intercept_ # type: ignore
|
|
67
|
+
|
|
68
|
+
return y_pred
|
|
69
|
+
|
|
70
|
+
def score(self, X: Array, y: Array) -> float:
|
|
71
|
+
"""
|
|
72
|
+
Return the coefficient of determination (R²) of the prediction.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
X: Test samples of shape (n_samples, n_features)
|
|
76
|
+
y: True values of shape (n_samples,) or (n_samples, n_targets)
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
R² score
|
|
80
|
+
"""
|
|
81
|
+
if not self._is_fitted:
|
|
82
|
+
raise RuntimeError("Model must be fitted before calling score")
|
|
83
|
+
|
|
84
|
+
y_pred = self.predict(X)
|
|
85
|
+
|
|
86
|
+
pred_dif = y - y_pred
|
|
87
|
+
mean_diff = y - y.mean()
|
|
88
|
+
|
|
89
|
+
ss_res = (pred_dif * pred_dif).sum()
|
|
90
|
+
ss_tot = (mean_diff * mean_diff).sum()
|
|
91
|
+
|
|
92
|
+
return 1.0 - (ss_res / ss_tot)
|
|
93
|
+
|
|
94
|
+
def residuals(self, X: Array, y: Array) -> Array:
|
|
95
|
+
"""
|
|
96
|
+
Calculate residuals (y - y_pred).
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
X: Samples of shape (n_samples, n_features)
|
|
100
|
+
y: True values of shape (n_samples,) or (n_samples, n_targets)
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Residuals array
|
|
104
|
+
"""
|
|
105
|
+
if not self._is_fitted:
|
|
106
|
+
raise RuntimeError("Model must be fitted before calculating residuals")
|
|
107
|
+
|
|
108
|
+
return y - self.predict(X)
|
ironforest/models.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ironforest
|
|
3
|
+
Version: 0.2
|
|
4
|
+
Summary: An library supporting numerical computation and spatial analysis.
|
|
5
|
+
Requires-Python: >=3.8
|
|
6
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
7
|
+
|
|
8
|
+
# IronForest
|
|
9
|
+
IronForest is a rust-powered python library supporting spatial queries, array-based computation and tree-based machine learning.
|
|
10
|
+
|
|
11
|
+
I started this project to support my previous python library dubious, a project which had the personal constraint of no external dependecies other than numpy. IronForest started as a way for me to eliminate numpy as dubious's lone dependency but has since grown into a standalone project as I have pivoted towards spatial indexing trees and tree-based models. The core rational behind this project was to get a better grasp of how libraries I use on a regular basis work, and learn how to write python bindings to offload computationally expensive tasks to tools better suited. Like dubious, I focused on building things from the ground up. I didn't want to glue dependecies together to get something functional, I wanted to understand from input to output how these algorithms worked under the hood. I chose rust because I had read the book around a year prior to starting this project, and pyo3 bindings are relitively easy to get working. This library is only exposed through python as that's where I've actually needed its features and I don't intend to package this as a rust create at this stage.
|
|
12
|
+
|
|
13
|
+
## Status
|
|
14
|
+
This is largely a learning project and the API is subject to change. We achieve similar performance to numpy (beating them in a rare few) across most operations but basic broadcasting arithmetic is around 4x slower in most cases. I intend to add a few unsafe methods to speed things up where applicable but I don't intend to optimize much further at this stage.
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
`pip install ironforest`
|
|
18
|
+
|
|
19
|
+
You can also build with `maturin build --release` assuming maturin is installed.
|
|
20
|
+
|
|
21
|
+
## Quickstart
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
import ironforest as irn
|
|
25
|
+
|
|
26
|
+
a = irn.Array([2, 2], [1.0, 2.0, 3.0, 4.0])
|
|
27
|
+
b = irn.Array([2, 2], [5.0, 6.0, 7.0, 8.0])
|
|
28
|
+
|
|
29
|
+
print(f"a @ b = {(a @ b).tolist()}")
|
|
30
|
+
```
|
|
31
|
+
Output:
|
|
32
|
+
`a @ b = [[19.0, 22.0], [43.0, 50.0]]`
|
|
33
|
+
|
|
34
|
+
## Features
|
|
35
|
+
- Array, an N-dimensional array object with broadcasting
|
|
36
|
+
- matrix operations and constructors
|
|
37
|
+
- KDTree, BallTree & VPTree with knn radius KDE and KDE approx queries
|
|
38
|
+
- Linear and Local Regression
|
|
39
|
+
- Decision trees, Random Forest and Isolation Forest
|
|
40
|
+
- cholesky, qr and eigen decomposition
|
|
41
|
+
- Least Squares & Weighted Least Squares Solver
|
|
42
|
+
- Random sampling from uniform, normal lognormal, gamma and beta distributions.
|
|
43
|
+
- Statistical methods (mean, median, var, std, quantile)
|
|
44
|
+
- Pearson and Spearman correlation
|
|
45
|
+
|
|
46
|
+
### Top-level
|
|
47
|
+
- `ironforest.Array`
|
|
48
|
+
|
|
49
|
+
### Modules
|
|
50
|
+
- [ironforest.linalg](#linalg)
|
|
51
|
+
- [ironforest.stats](#stats)
|
|
52
|
+
- [ironforest.random](#random)
|
|
53
|
+
- [ironforest.spatial](#spatial)
|
|
54
|
+
- - [ironforest.model](#model)
|
|
55
|
+
|
|
56
|
+
## Examples
|
|
57
|
+
```python
|
|
58
|
+
import ironforest as irn
|
|
59
|
+
|
|
60
|
+
a = irn.Array([2, 2], [1.0, 2.0, 3.0, 4.0])
|
|
61
|
+
b = irn.Array([2, 2], [5.0, 6.0, 7.0, 8.0])
|
|
62
|
+
|
|
63
|
+
print(f"a + b = {(a + b).tolist()}")
|
|
64
|
+
print(f"a * b = {(a * b).tolist()}")
|
|
65
|
+
```
|
|
66
|
+
Output:
|
|
67
|
+
`
|
|
68
|
+
a + b = [[6.0, 8.0], [10.0, 12.0]]
|
|
69
|
+
a * b = [[5.0, 12.0], [21.0, 32.0]]
|
|
70
|
+
`
|
|
71
|
+
```python
|
|
72
|
+
import ironforest as irn
|
|
73
|
+
|
|
74
|
+
gen = irn.Generator.from_seed(123)
|
|
75
|
+
|
|
76
|
+
uniform = gen.uniform(0.0, 1.0, [2, 3])
|
|
77
|
+
print(f"Uniform [0, 1): {uniform.tolist()}")
|
|
78
|
+
|
|
79
|
+
normal = gen.standard_normal([2, 3])
|
|
80
|
+
print(f"Standard normal: {normal.tolist()}")
|
|
81
|
+
```
|
|
82
|
+
Output:
|
|
83
|
+
`Uniform [0, 1): [0.19669435215621578, 0.9695722925002218, 0.46744032361670884, 0.12698379756585432]
|
|
84
|
+
Standard normal: [-0.0008585765206425146, 1.4733334715623352, -1.16180050645278, -0.772101732825336]`
|
|
85
|
+
|
|
86
|
+
## Modules
|
|
87
|
+
|
|
88
|
+
### Random
|
|
89
|
+
`Generator` object that can sample from uniform, normal, lognormal, gamma and beta distributions. Support for additional distributions is planned.
|
|
90
|
+
|
|
91
|
+
### Linalg
|
|
92
|
+
- Standard matrix methods and constructors.
|
|
93
|
+
- cholesky and eigen and qr decomposition.
|
|
94
|
+
- Least Squares & Weighted Least Squares solver.
|
|
95
|
+
|
|
96
|
+
### Stats
|
|
97
|
+
- Basic statistical methods for `Array` objects, mean, var and quantile.
|
|
98
|
+
- Pearson and Spearman correlation.
|
|
99
|
+
|
|
100
|
+
### Spatial
|
|
101
|
+
- `KDTree` kNN, Kernel Density Estimation and radius queries.
|
|
102
|
+
- `BallTree` with kNN, Kernel Density Estimation and radius queries.
|
|
103
|
+
- `VPTree` with kNN, Kernel Density Estimation and radius queries.
|
|
104
|
+
|
|
105
|
+
### Model
|
|
106
|
+
- Linear Regression
|
|
107
|
+
- Local Regression
|
|
108
|
+
- Decision Trees (soon)
|
|
109
|
+
- Random Forest (soon)
|
|
110
|
+
- Isolation Forest (soon)
|
|
111
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
ironforest/__init__.py,sha256=DEdGoxUrAA0bbFmIqlkPX3Egbmi8yCmJIQj84MlnWew,533
|
|
2
|
+
ironforest/_core.cpython-313-darwin.so,sha256=2hbExunAG5Ou_a9N0rbSAuFniGyCJkYn8FN5GPLwbek,1172604
|
|
3
|
+
ironforest/linear_regression.py,sha256=QQ-S44OIdrvSInsVknHFZ0zn95RwPsXt_M8MEvFzbM8,3183
|
|
4
|
+
ironforest/models.py,sha256=Nw9JCPkp-oxqs0T0tzq3sirHzZdkleYfAkE12cdqLVE,138
|
|
5
|
+
ironforest-0.2.dist-info/METADATA,sha256=mbAcSavMR1QC1dmbrlUyCKPb0lxd80fBbOUzxtzB_2M,4450
|
|
6
|
+
ironforest-0.2.dist-info/WHEEL,sha256=y-bBezkr9XqWB_KK0tUa5PfJn4Rf73obH4dbo4t0ZEc,107
|
|
7
|
+
ironforest-0.2.dist-info/RECORD,,
|