r-scikit-learn 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- r_scikit_learn-0.1.2/CHANGELOG.md +35 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/Cargo.lock +2 -1
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/Cargo.toml +3 -1
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/PKG-INFO +31 -13
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/README.md +28 -12
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/benches/benchmark_linear_models.py +6 -0
- r_scikit_learn-0.1.2/benches/benchmark_neighbors.py +124 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/benches/benchmark_preprocessing.py +31 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/pyproject.toml +3 -1
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/__init__.py +3 -1
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_least_squares.py +23 -1
- r_scikit_learn-0.1.2/python/rsklearn/neighbors/__init__.py +5 -0
- r_scikit_learn-0.1.2/python/rsklearn/neighbors/_classification.py +237 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/__init__.py +2 -0
- r_scikit_learn-0.1.2/python/rsklearn/preprocessing/_maxabs_scaler.py +138 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_standard_scaler.py +75 -1
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/utils/__init__.py +8 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/utils/sparse.py +86 -2
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/lib.rs +391 -1
- r_scikit_learn-0.1.2/src/maxabs_scaler.rs +86 -0
- r_scikit_learn-0.1.2/src/neighbors.rs +921 -0
- r_scikit_learn-0.1.2/src/sparse.rs +302 -0
- r_scikit_learn-0.1.2/tests/release_smoke.py +28 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_estimator_compliance.py +4 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_linear_model_parity.py +82 -0
- r_scikit_learn-0.1.2/tests/test_maxabs_scaler.py +60 -0
- r_scikit_learn-0.1.2/tests/test_neighbors.py +68 -0
- r_scikit_learn-0.1.2/tests/test_neighbors_parity.py +50 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_scikit_learn_parity.py +48 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_sparse_infrastructure.py +2 -2
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_standard_scaler.py +41 -0
- r_scikit_learn-0.1.0/src/sparse.rs +0 -133
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/LICENSE +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/benches/benchmark_metrics.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/_validation.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/base.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/compose/__init__.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/compose/_column_transformer.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/impute/__init__.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/impute/_simple_imputer.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/__init__.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_base.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_coordinate_descent.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_logistic.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_warnings.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/metrics/__init__.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/metrics/_classification.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/metrics/_regression.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/metrics/_validation.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/model_selection/__init__.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/model_selection/_split.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/model_selection/_utils.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/model_selection/_validation.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/pipeline.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_base.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_categorical.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_label_encoder.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_minmax_scaler.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_normalizer.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_one_hot_encoder.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_ordinal_encoder.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_robust_scaler.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/py.typed +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/utils/validation.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/categorical.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/error.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/label_encoder.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/linear_model.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/metrics.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/minmax_scaler.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/normalizer.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/robust_scaler.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/simple_imputer.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/standard_scaler.rs +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_base.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_categorical_infrastructure.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_column_transformer.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_column_transformer_parity.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_label_encoder.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_linear_model.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_metrics.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_metrics_parity.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_minmax_scaler.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_model_selection.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_model_selection_parity.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_normalizer.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_one_hot_encoder.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_ordinal_encoder.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_pipeline.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_pipeline_parity.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_public_validation.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_robust_scaler.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_simple_imputer.py +0 -0
- {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_validation.py +0 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to r-scikit-learn are documented here. Release tags and
|
|
4
|
+
published package versions are immutable.
|
|
5
|
+
|
|
6
|
+
## Unreleased
|
|
7
|
+
|
|
8
|
+
## 0.1.2 - 2026-06-24
|
|
9
|
+
|
|
10
|
+
- Added dense brute-force `KNeighborsClassifier` with Rust-backed neighbor
|
|
11
|
+
search, class voting, `predict`, `predict_proba`, and `kneighbors`.
|
|
12
|
+
- Added scikit-learn parity tests and benchmarks for nearest-neighbor
|
|
13
|
+
classification.
|
|
14
|
+
- Optimized the dense Euclidean neighbor search path with blocked dot products,
|
|
15
|
+
reusable work buffers, and macOS Accelerate/CBLAS acceleration with a portable
|
|
16
|
+
`matrixmultiply` fallback.
|
|
17
|
+
- Added sparse `StandardScaler(with_mean=False)` and `MaxAbsScaler` with
|
|
18
|
+
Rust-backed CSR/CSC reductions and column scaling.
|
|
19
|
+
|
|
20
|
+
## 0.1.1 - 2026-06-15
|
|
21
|
+
|
|
22
|
+
- Added wheel and source-distribution installation testing across supported
|
|
23
|
+
operating systems and Python versions.
|
|
24
|
+
- Added a numerical-safety fallback for ill-conditioned tall least-squares
|
|
25
|
+
problems.
|
|
26
|
+
- Added TestPyPI, cross-platform benchmark, and immutable manual release
|
|
27
|
+
workflows.
|
|
28
|
+
|
|
29
|
+
## 0.1.0
|
|
30
|
+
|
|
31
|
+
- Added Rust-powered preprocessing, categorical encoding, sparse
|
|
32
|
+
infrastructure, composition, metrics, model selection, and linear models.
|
|
33
|
+
- Added Linux, macOS, and Windows wheel builds for Python 3.10 through 3.13.
|
|
34
|
+
- Added Rust-native tall-matrix least squares and multinomial logistic
|
|
35
|
+
optimization.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "r-scikit-learn-core"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.2"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
description = "Rust computational core for r-scikit-learn"
|
|
@@ -9,6 +9,7 @@ repository = "https://github.com/rishib42/r-scikit-learn"
|
|
|
9
9
|
include = [
|
|
10
10
|
"/Cargo.lock",
|
|
11
11
|
"/Cargo.toml",
|
|
12
|
+
"/CHANGELOG.md",
|
|
12
13
|
"/LICENSE",
|
|
13
14
|
"/README.md",
|
|
14
15
|
"/benches/*.py",
|
|
@@ -28,6 +29,7 @@ crate-type = ["cdylib", "rlib"]
|
|
|
28
29
|
|
|
29
30
|
[dependencies]
|
|
30
31
|
faer = { version = "0.24", default-features = false, features = ["std", "rayon", "linalg"] }
|
|
32
|
+
matrixmultiply = "0.3"
|
|
31
33
|
nalgebra = { version = "0.34", default-features = false, features = ["std"] }
|
|
32
34
|
numpy = "0.28"
|
|
33
35
|
pyo3 = "0.28"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: r-scikit-learn
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Classifier: Development Status :: 3 - Alpha
|
|
5
5
|
Classifier: License :: OSI Approved :: MIT License
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Rust
|
|
|
12
12
|
Classifier: Typing :: Typed
|
|
13
13
|
Requires-Dist: numpy>=1.23
|
|
14
14
|
Requires-Dist: scipy>=1.10
|
|
15
|
+
Requires-Dist: hypothesis>=6.100,<7 ; extra == 'dev'
|
|
15
16
|
Requires-Dist: maturin>=1.9,<2.0 ; extra == 'dev'
|
|
16
17
|
Requires-Dist: pytest>=8 ; extra == 'dev'
|
|
17
18
|
Requires-Dist: ruff>=0.11 ; extra == 'dev'
|
|
@@ -25,6 +26,7 @@ Author: r-scikit-learn contributors
|
|
|
25
26
|
License-Expression: MIT
|
|
26
27
|
Requires-Python: >=3.10
|
|
27
28
|
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
29
|
+
Project-URL: Changelog, https://github.com/rishib42/r-scikit-learn/blob/main/CHANGELOG.md
|
|
28
30
|
Project-URL: Homepage, https://github.com/rishib42/r-scikit-learn
|
|
29
31
|
Project-URL: Issues, https://github.com/rishib42/r-scikit-learn/issues
|
|
30
32
|
Project-URL: Repository, https://github.com/rishib42/r-scikit-learn
|
|
@@ -34,7 +36,7 @@ Project-URL: Repository, https://github.com/rishib42/r-scikit-learn
|
|
|
34
36
|
Fast, familiar machine-learning building blocks powered by safe Rust. 🦀
|
|
35
37
|
|
|
36
38
|
`r-scikit-learn` combines a Rust computational core with lightweight,
|
|
37
|
-
scikit-learn-style Python estimators. Version 0.1.
|
|
39
|
+
scikit-learn-style Python estimators. Version 0.1.1 includes:
|
|
38
40
|
|
|
39
41
|
- Preprocessing, categorical encoding, and missing-value imputation
|
|
40
42
|
- Pipelines and column transformers
|
|
@@ -124,6 +126,13 @@ encoder = OneHotEncoder(handle_unknown="ignore")
|
|
|
124
126
|
X_one_hot = encoder.fit_transform([["small"], ["large"], ["small"]])
|
|
125
127
|
```
|
|
126
128
|
|
|
129
|
+
```python
|
|
130
|
+
from rsklearn.preprocessing import MaxAbsScaler, StandardScaler
|
|
131
|
+
|
|
132
|
+
X_sparse_scaled = StandardScaler(with_mean=False).fit_transform(X_one_hot)
|
|
133
|
+
X_sparse_maxabs = MaxAbsScaler().fit_transform(X_one_hot)
|
|
134
|
+
```
|
|
135
|
+
|
|
127
136
|
```python
|
|
128
137
|
import numpy as np
|
|
129
138
|
from rsklearn.impute import SimpleImputer
|
|
@@ -193,7 +202,10 @@ probabilities = classifier.predict_proba(X_test)
|
|
|
193
202
|
- Uses float64 fitted statistics and native float32 kernels where supported.
|
|
194
203
|
- Ignores NaNs while fitting, preserves them while transforming, and rejects
|
|
195
204
|
infinity.
|
|
196
|
-
- Supports incremental `partial_fit` for `StandardScaler` and
|
|
205
|
+
- Supports incremental `partial_fit` for `StandardScaler`, `MaxAbsScaler`, and
|
|
206
|
+
`MinMaxScaler`.
|
|
207
|
+
- Supports CSR/CSC sparse `StandardScaler(with_mean=False)` and `MaxAbsScaler`
|
|
208
|
+
without densifying input.
|
|
197
209
|
- Supports L1, L2, and max row normalization.
|
|
198
210
|
- Provides quantile-based `RobustScaler` fitting and inverse transforms.
|
|
199
211
|
|
|
@@ -274,8 +286,6 @@ The core implemented behavior is tested and packaged across Linux, macOS, and
|
|
|
274
286
|
Windows, but the project remains alpha software. Before a stable 1.0 release,
|
|
275
287
|
the following compatibility and operational work remains:
|
|
276
288
|
|
|
277
|
-
- Sparse-aware estimator behavior, including non-centering `StandardScaler`
|
|
278
|
-
operation. Shared CSR/CSC validation and Rust kernels are implemented.
|
|
279
289
|
- `sample_weight` support for `StandardScaler.partial_fit`.
|
|
280
290
|
- Comprehensive `get_feature_names_out` support and configurable output
|
|
281
291
|
containers across estimators.
|
|
@@ -327,14 +337,22 @@ Substantial numerical loops release the Python GIL.
|
|
|
327
337
|
|
|
328
338
|
## Release
|
|
329
339
|
|
|
330
|
-
1.
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
340
|
+
1. Update the matching versions in `pyproject.toml`, `Cargo.toml`, and
|
|
341
|
+
`python/rsklearn/__init__.py`, then update `CHANGELOG.md`.
|
|
342
|
+
2. Push the release commit and wait for CI, including manylinux and sdist
|
|
343
|
+
installation checks, to pass.
|
|
344
|
+
3. Run the manual TestPyPI workflow and verify its distributions.
|
|
345
|
+
4. Run the manual Release workflow with the version number without a `v`
|
|
346
|
+
prefix.
|
|
347
|
+
5. Approve the PyPI environment if required.
|
|
348
|
+
|
|
349
|
+
The release workflow refuses existing versions, installs every wheel on
|
|
350
|
+
Python 3.10-3.13 across Linux, macOS, and Windows, verifies sdist installation,
|
|
351
|
+
publishes through PyPI Trusted Publishing, creates the immutable GitHub tag and
|
|
352
|
+
release, attaches artifacts, and verifies installation from PyPI. No API token
|
|
353
|
+
is stored in the repository. Configure separate `pypi` and `testpypi` GitHub
|
|
354
|
+
environments and matching Trusted Publishers for `release.yml` and
|
|
355
|
+
`test-pypi.yml`, respectively.
|
|
338
356
|
|
|
339
357
|
## Roadmap
|
|
340
358
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
Fast, familiar machine-learning building blocks powered by safe Rust. 🦀
|
|
4
4
|
|
|
5
5
|
`r-scikit-learn` combines a Rust computational core with lightweight,
|
|
6
|
-
scikit-learn-style Python estimators. Version 0.1.
|
|
6
|
+
scikit-learn-style Python estimators. Version 0.1.1 includes:
|
|
7
7
|
|
|
8
8
|
- Preprocessing, categorical encoding, and missing-value imputation
|
|
9
9
|
- Pipelines and column transformers
|
|
@@ -93,6 +93,13 @@ encoder = OneHotEncoder(handle_unknown="ignore")
|
|
|
93
93
|
X_one_hot = encoder.fit_transform([["small"], ["large"], ["small"]])
|
|
94
94
|
```
|
|
95
95
|
|
|
96
|
+
```python
|
|
97
|
+
from rsklearn.preprocessing import MaxAbsScaler, StandardScaler
|
|
98
|
+
|
|
99
|
+
X_sparse_scaled = StandardScaler(with_mean=False).fit_transform(X_one_hot)
|
|
100
|
+
X_sparse_maxabs = MaxAbsScaler().fit_transform(X_one_hot)
|
|
101
|
+
```
|
|
102
|
+
|
|
96
103
|
```python
|
|
97
104
|
import numpy as np
|
|
98
105
|
from rsklearn.impute import SimpleImputer
|
|
@@ -162,7 +169,10 @@ probabilities = classifier.predict_proba(X_test)
|
|
|
162
169
|
- Uses float64 fitted statistics and native float32 kernels where supported.
|
|
163
170
|
- Ignores NaNs while fitting, preserves them while transforming, and rejects
|
|
164
171
|
infinity.
|
|
165
|
-
- Supports incremental `partial_fit` for `StandardScaler` and
|
|
172
|
+
- Supports incremental `partial_fit` for `StandardScaler`, `MaxAbsScaler`, and
|
|
173
|
+
`MinMaxScaler`.
|
|
174
|
+
- Supports CSR/CSC sparse `StandardScaler(with_mean=False)` and `MaxAbsScaler`
|
|
175
|
+
without densifying input.
|
|
166
176
|
- Supports L1, L2, and max row normalization.
|
|
167
177
|
- Provides quantile-based `RobustScaler` fitting and inverse transforms.
|
|
168
178
|
|
|
@@ -243,8 +253,6 @@ The core implemented behavior is tested and packaged across Linux, macOS, and
|
|
|
243
253
|
Windows, but the project remains alpha software. Before a stable 1.0 release,
|
|
244
254
|
the following compatibility and operational work remains:
|
|
245
255
|
|
|
246
|
-
- Sparse-aware estimator behavior, including non-centering `StandardScaler`
|
|
247
|
-
operation. Shared CSR/CSC validation and Rust kernels are implemented.
|
|
248
256
|
- `sample_weight` support for `StandardScaler.partial_fit`.
|
|
249
257
|
- Comprehensive `get_feature_names_out` support and configurable output
|
|
250
258
|
containers across estimators.
|
|
@@ -296,14 +304,22 @@ Substantial numerical loops release the Python GIL.
|
|
|
296
304
|
|
|
297
305
|
## Release
|
|
298
306
|
|
|
299
|
-
1.
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
+
1. Update the matching versions in `pyproject.toml`, `Cargo.toml`, and
|
|
308
|
+
`python/rsklearn/__init__.py`, then update `CHANGELOG.md`.
|
|
309
|
+
2. Push the release commit and wait for CI, including manylinux and sdist
|
|
310
|
+
installation checks, to pass.
|
|
311
|
+
3. Run the manual TestPyPI workflow and verify its distributions.
|
|
312
|
+
4. Run the manual Release workflow with the version number without a `v`
|
|
313
|
+
prefix.
|
|
314
|
+
5. Approve the PyPI environment if required.
|
|
315
|
+
|
|
316
|
+
The release workflow refuses existing versions, installs every wheel on
|
|
317
|
+
Python 3.10-3.13 across Linux, macOS, and Windows, verifies sdist installation,
|
|
318
|
+
publishes through PyPI Trusted Publishing, creates the immutable GitHub tag and
|
|
319
|
+
release, attaches artifacts, and verifies installation from PyPI. No API token
|
|
320
|
+
is stored in the repository. Configure separate `pypi` and `testpypi` GitHub
|
|
321
|
+
environments and matching Trusted Publishers for `release.yml` and
|
|
322
|
+
`test-pypi.yml`, respectively.
|
|
307
323
|
|
|
308
324
|
## Roadmap
|
|
309
325
|
|
|
@@ -10,6 +10,8 @@ from collections.abc import Callable
|
|
|
10
10
|
|
|
11
11
|
import numpy as np
|
|
12
12
|
import rsklearn.linear_model as rlinear
|
|
13
|
+
import scipy
|
|
14
|
+
import sklearn
|
|
13
15
|
import sklearn.linear_model as slinear
|
|
14
16
|
from rsklearn import _core
|
|
15
17
|
|
|
@@ -65,6 +67,10 @@ def main() -> None:
|
|
|
65
67
|
)
|
|
66
68
|
print(f"Python: {sys.executable}")
|
|
67
69
|
print(f"Rust extension: {_core.__file__} ({profile})")
|
|
70
|
+
print(
|
|
71
|
+
f"Dependencies: numpy {np.__version__}, scipy {scipy.__version__}, "
|
|
72
|
+
f"scikit-learn {sklearn.__version__}"
|
|
73
|
+
)
|
|
68
74
|
rng = np.random.default_rng(20260614)
|
|
69
75
|
X = rng.normal(size=(args.samples, args.features))
|
|
70
76
|
coefficients = rng.normal(size=args.features)
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Compare r-scikit-learn and scikit-learn nearest-neighbor performance."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import statistics
|
|
7
|
+
import sys
|
|
8
|
+
import time
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
import rsklearn.neighbors as rneighbors
|
|
13
|
+
import scipy
|
|
14
|
+
import sklearn
|
|
15
|
+
import sklearn.neighbors as sneighbors
|
|
16
|
+
from rsklearn import _core
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def measure(
|
|
20
|
+
function: Callable[[], object], repetitions: int, warmups: int
|
|
21
|
+
) -> tuple[float, float]:
|
|
22
|
+
for _ in range(warmups):
|
|
23
|
+
function()
|
|
24
|
+
values = []
|
|
25
|
+
for _ in range(repetitions):
|
|
26
|
+
started = time.perf_counter()
|
|
27
|
+
function()
|
|
28
|
+
values.append(time.perf_counter() - started)
|
|
29
|
+
return statistics.mean(values), statistics.stdev(values) if repetitions > 1 else 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def report(
|
|
33
|
+
name: str,
|
|
34
|
+
ours: Callable[[], object],
|
|
35
|
+
theirs: Callable[[], object],
|
|
36
|
+
repetitions: int,
|
|
37
|
+
warmups: int,
|
|
38
|
+
) -> None:
|
|
39
|
+
ours_mean, ours_stdev = measure(ours, repetitions, warmups)
|
|
40
|
+
theirs_mean, theirs_stdev = measure(theirs, repetitions, warmups)
|
|
41
|
+
improvement = (theirs_mean - ours_mean) / theirs_mean * 100
|
|
42
|
+
print(
|
|
43
|
+
f"{name:<32} r-scikit-learn {ours_mean:9.6f}s ± {ours_stdev:9.6f}s "
|
|
44
|
+
f"scikit-learn {theirs_mean:9.6f}s ± {theirs_stdev:9.6f}s "
|
|
45
|
+
f"impr. {improvement:+7.2f}%"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def main() -> None:
|
|
50
|
+
parser = argparse.ArgumentParser()
|
|
51
|
+
parser.add_argument("--train-samples", type=int, default=20_000)
|
|
52
|
+
parser.add_argument("--query-samples", type=int, default=1_000)
|
|
53
|
+
parser.add_argument("--features", type=int, default=20)
|
|
54
|
+
parser.add_argument("--classes", type=int, default=5)
|
|
55
|
+
parser.add_argument("--neighbors", type=int, default=5)
|
|
56
|
+
parser.add_argument("--repetitions", type=int, default=5)
|
|
57
|
+
parser.add_argument("--warmups", type=int, default=2)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"--allow-debug",
|
|
60
|
+
action="store_true",
|
|
61
|
+
help="run even when r-scikit-learn's Rust extension is a debug build",
|
|
62
|
+
)
|
|
63
|
+
args = parser.parse_args()
|
|
64
|
+
profile = _core.build_profile()
|
|
65
|
+
if profile != "release" and not args.allow_debug:
|
|
66
|
+
raise SystemExit(
|
|
67
|
+
"Refusing to benchmark a debug Rust extension. Install a release build "
|
|
68
|
+
"with `maturin develop --release`, then rerun. Pass --allow-debug only "
|
|
69
|
+
"when intentionally measuring debug code."
|
|
70
|
+
)
|
|
71
|
+
print(f"Python: {sys.executable}")
|
|
72
|
+
print(f"Rust extension: {_core.__file__} ({profile})")
|
|
73
|
+
print(
|
|
74
|
+
f"Dependencies: numpy {np.__version__}, scipy {scipy.__version__}, "
|
|
75
|
+
f"scikit-learn {sklearn.__version__}"
|
|
76
|
+
)
|
|
77
|
+
rng = np.random.default_rng(20260616)
|
|
78
|
+
X_train = rng.normal(size=(args.train_samples, args.features))
|
|
79
|
+
X_query = rng.normal(size=(args.query_samples, args.features))
|
|
80
|
+
y = rng.integers(0, args.classes, size=args.train_samples, dtype=np.int64)
|
|
81
|
+
options = {
|
|
82
|
+
"n_neighbors": args.neighbors,
|
|
83
|
+
"weights": "uniform",
|
|
84
|
+
"algorithm": "brute",
|
|
85
|
+
"metric": "euclidean",
|
|
86
|
+
}
|
|
87
|
+
print(
|
|
88
|
+
f"Train matrix: {args.train_samples:,} x {args.features:,}; "
|
|
89
|
+
f"query matrix: {args.query_samples:,} x {args.features:,}"
|
|
90
|
+
)
|
|
91
|
+
report(
|
|
92
|
+
"KNeighborsClassifier fit",
|
|
93
|
+
lambda: rneighbors.KNeighborsClassifier(**options).fit(X_train, y),
|
|
94
|
+
lambda: sneighbors.KNeighborsClassifier(**options).fit(X_train, y),
|
|
95
|
+
args.repetitions,
|
|
96
|
+
args.warmups,
|
|
97
|
+
)
|
|
98
|
+
ours = rneighbors.KNeighborsClassifier(**options).fit(X_train, y)
|
|
99
|
+
theirs = sneighbors.KNeighborsClassifier(**options).fit(X_train, y)
|
|
100
|
+
report(
|
|
101
|
+
"KNeighborsClassifier kneighbors",
|
|
102
|
+
lambda: ours.kneighbors(X_query),
|
|
103
|
+
lambda: theirs.kneighbors(X_query),
|
|
104
|
+
args.repetitions,
|
|
105
|
+
args.warmups,
|
|
106
|
+
)
|
|
107
|
+
report(
|
|
108
|
+
"KNeighborsClassifier predict",
|
|
109
|
+
lambda: ours.predict(X_query),
|
|
110
|
+
lambda: theirs.predict(X_query),
|
|
111
|
+
args.repetitions,
|
|
112
|
+
args.warmups,
|
|
113
|
+
)
|
|
114
|
+
report(
|
|
115
|
+
"KNeighborsClassifier proba",
|
|
116
|
+
lambda: ours.predict_proba(X_query),
|
|
117
|
+
lambda: theirs.predict_proba(X_query),
|
|
118
|
+
args.repetitions,
|
|
119
|
+
args.warmups,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
if __name__ == "__main__":
|
|
124
|
+
main()
|
|
@@ -13,6 +13,7 @@ from rsklearn.base import BaseEstimator
|
|
|
13
13
|
from rsklearn.impute import SimpleImputer
|
|
14
14
|
from rsklearn.preprocessing import (
|
|
15
15
|
LabelEncoder,
|
|
16
|
+
MaxAbsScaler,
|
|
16
17
|
MinMaxScaler,
|
|
17
18
|
Normalizer,
|
|
18
19
|
OneHotEncoder,
|
|
@@ -27,6 +28,7 @@ from sklearn.impute import SimpleImputer as ScikitSimpleImputer
|
|
|
27
28
|
|
|
28
29
|
# The scikit-learn distribution intentionally exposes the `sklearn` import package.
|
|
29
30
|
from sklearn.preprocessing import LabelEncoder as ScikitLabelEncoder
|
|
31
|
+
from sklearn.preprocessing import MaxAbsScaler as ScikitMaxAbsScaler
|
|
30
32
|
from sklearn.preprocessing import MinMaxScaler as ScikitMinMaxScaler
|
|
31
33
|
from sklearn.preprocessing import Normalizer as ScikitNormalizer
|
|
32
34
|
from sklearn.preprocessing import OneHotEncoder as ScikitOneHotEncoder
|
|
@@ -88,6 +90,7 @@ def benchmark_matrix(rows: int, columns: int, repetitions: int) -> None:
|
|
|
88
90
|
)
|
|
89
91
|
for name, ours, theirs in [
|
|
90
92
|
("StandardScaler", StandardScaler, ScikitStandardScaler),
|
|
93
|
+
("MaxAbsScaler", MaxAbsScaler, ScikitMaxAbsScaler),
|
|
91
94
|
("MinMaxScaler", MinMaxScaler, ScikitMinMaxScaler),
|
|
92
95
|
("Normalizer", Normalizer, ScikitNormalizer),
|
|
93
96
|
("RobustScaler", RobustScaler, ScikitRobustScaler),
|
|
@@ -294,6 +297,34 @@ def benchmark_sparse(repetitions: int) -> None:
|
|
|
294
297
|
scikit_scale,
|
|
295
298
|
repetitions,
|
|
296
299
|
)
|
|
300
|
+
ours_standard = StandardScaler(with_mean=False).fit(matrix)
|
|
301
|
+
theirs_standard = ScikitStandardScaler(with_mean=False).fit(matrix)
|
|
302
|
+
report_comparison(
|
|
303
|
+
"Sparse StandardScaler fit",
|
|
304
|
+
lambda: StandardScaler(with_mean=False).fit(matrix),
|
|
305
|
+
lambda: ScikitStandardScaler(with_mean=False).fit(matrix),
|
|
306
|
+
repetitions,
|
|
307
|
+
)
|
|
308
|
+
report_comparison(
|
|
309
|
+
"Sparse StandardScaler transform",
|
|
310
|
+
lambda: ours_standard.transform(matrix),
|
|
311
|
+
lambda: theirs_standard.transform(matrix),
|
|
312
|
+
repetitions,
|
|
313
|
+
)
|
|
314
|
+
ours_maxabs = MaxAbsScaler().fit(matrix)
|
|
315
|
+
theirs_maxabs = ScikitMaxAbsScaler().fit(matrix)
|
|
316
|
+
report_comparison(
|
|
317
|
+
"Sparse MaxAbsScaler fit",
|
|
318
|
+
lambda: MaxAbsScaler().fit(matrix),
|
|
319
|
+
lambda: ScikitMaxAbsScaler().fit(matrix),
|
|
320
|
+
repetitions,
|
|
321
|
+
)
|
|
322
|
+
report_comparison(
|
|
323
|
+
"Sparse MaxAbsScaler transform",
|
|
324
|
+
lambda: ours_maxabs.transform(matrix),
|
|
325
|
+
lambda: theirs_maxabs.transform(matrix),
|
|
326
|
+
repetitions,
|
|
327
|
+
)
|
|
297
328
|
|
|
298
329
|
|
|
299
330
|
def main() -> None:
|
|
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "r-scikit-learn"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.2"
|
|
8
8
|
description = "High-performance scikit-learn-style machine learning powered by safe Rust"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -26,6 +26,7 @@ dependencies = ["numpy>=1.23", "scipy>=1.10"]
|
|
|
26
26
|
|
|
27
27
|
[project.optional-dependencies]
|
|
28
28
|
dev = [
|
|
29
|
+
"hypothesis>=6.100,<7",
|
|
29
30
|
"maturin>=1.9,<2.0",
|
|
30
31
|
"pytest>=8",
|
|
31
32
|
"ruff>=0.11",
|
|
@@ -36,6 +37,7 @@ dev = [
|
|
|
36
37
|
Homepage = "https://github.com/rishib42/r-scikit-learn"
|
|
37
38
|
Repository = "https://github.com/rishib42/r-scikit-learn"
|
|
38
39
|
Issues = "https://github.com/rishib42/r-scikit-learn/issues"
|
|
40
|
+
Changelog = "https://github.com/rishib42/r-scikit-learn/blob/main/CHANGELOG.md"
|
|
39
41
|
|
|
40
42
|
[tool.maturin]
|
|
41
43
|
python-source = "python"
|
|
@@ -10,6 +10,7 @@ from .base import (
|
|
|
10
10
|
from .compose import ColumnTransformer, make_column_transformer
|
|
11
11
|
from .impute import SimpleImputer
|
|
12
12
|
from .linear_model import ElasticNet, Lasso, LinearRegression, LogisticRegression, Ridge
|
|
13
|
+
from .neighbors import KNeighborsClassifier
|
|
13
14
|
from .pipeline import Pipeline, make_pipeline
|
|
14
15
|
from .preprocessing import (
|
|
15
16
|
LabelEncoder,
|
|
@@ -26,6 +27,7 @@ __all__ = [
|
|
|
26
27
|
"ClassifierMixin",
|
|
27
28
|
"ColumnTransformer",
|
|
28
29
|
"ElasticNet",
|
|
30
|
+
"KNeighborsClassifier",
|
|
29
31
|
"LabelEncoder",
|
|
30
32
|
"Lasso",
|
|
31
33
|
"LinearRegression",
|
|
@@ -45,4 +47,4 @@ __all__ = [
|
|
|
45
47
|
"make_column_transformer",
|
|
46
48
|
"make_pipeline",
|
|
47
49
|
]
|
|
48
|
-
__version__ = "0.1.
|
|
50
|
+
__version__ = "0.1.2"
|
{r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_least_squares.py
RENAMED
|
@@ -12,6 +12,26 @@ from rsklearn.base import BaseEstimator, RegressorMixin
|
|
|
12
12
|
|
|
13
13
|
from ._base import LinearModel, validate_regression_fit
|
|
14
14
|
|
|
15
|
+
# Normal equations square the condition number. This cutoff limits the
|
|
16
|
+
# resulting float64 error amplification before selecting the fast Gram path.
|
|
17
|
+
_GRAM_MIN_SINGULAR_RATIO = np.finfo(np.float64).eps ** 0.25
|
|
18
|
+
_GRAM_RANK_RESOLUTION = np.sqrt(np.finfo(np.float64).eps)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _tall_solution_is_stable(singular: np.ndarray, rank: int, tolerance: float) -> bool:
|
|
22
|
+
"""Return whether normal-equation accuracy is reliable for this spectrum."""
|
|
23
|
+
if rank == 0 or singular.size == 0 or not np.isfinite(singular).all():
|
|
24
|
+
return False
|
|
25
|
+
if rank < singular.size and tolerance < _GRAM_RANK_RESOLUTION:
|
|
26
|
+
return False
|
|
27
|
+
largest = singular[0]
|
|
28
|
+
smallest_retained = singular[rank - 1]
|
|
29
|
+
return (
|
|
30
|
+
largest > 0
|
|
31
|
+
and smallest_retained > 0
|
|
32
|
+
and smallest_retained / largest >= _GRAM_MIN_SINGULAR_RATIO
|
|
33
|
+
)
|
|
34
|
+
|
|
15
35
|
|
|
16
36
|
def _fit_lstsq(
|
|
17
37
|
X: np.ndarray,
|
|
@@ -22,7 +42,9 @@ def _fit_lstsq(
|
|
|
22
42
|
) -> tuple[np.ndarray, np.ndarray, int, np.ndarray]:
|
|
23
43
|
"""Solve unregularized least squares through a shape-aware dense backend."""
|
|
24
44
|
if X.shape[0] >= 4 * X.shape[1]:
|
|
25
|
-
|
|
45
|
+
tall_fit = _core.linear_fit_tall(X, y, weights, fit_intercept, tolerance)
|
|
46
|
+
if _tall_solution_is_stable(tall_fit[3], tall_fit[2], tolerance):
|
|
47
|
+
return tall_fit
|
|
26
48
|
uniform_weights = np.all(weights == weights[0])
|
|
27
49
|
if fit_intercept:
|
|
28
50
|
if uniform_weights:
|