r-scikit-learn 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. r_scikit_learn-0.1.2/CHANGELOG.md +35 -0
  2. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/Cargo.lock +2 -1
  3. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/Cargo.toml +3 -1
  4. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/PKG-INFO +31 -13
  5. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/README.md +28 -12
  6. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/benches/benchmark_linear_models.py +6 -0
  7. r_scikit_learn-0.1.2/benches/benchmark_neighbors.py +124 -0
  8. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/benches/benchmark_preprocessing.py +31 -0
  9. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/pyproject.toml +3 -1
  10. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/__init__.py +3 -1
  11. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_least_squares.py +23 -1
  12. r_scikit_learn-0.1.2/python/rsklearn/neighbors/__init__.py +5 -0
  13. r_scikit_learn-0.1.2/python/rsklearn/neighbors/_classification.py +237 -0
  14. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/__init__.py +2 -0
  15. r_scikit_learn-0.1.2/python/rsklearn/preprocessing/_maxabs_scaler.py +138 -0
  16. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_standard_scaler.py +75 -1
  17. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/utils/__init__.py +8 -0
  18. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/utils/sparse.py +86 -2
  19. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/lib.rs +391 -1
  20. r_scikit_learn-0.1.2/src/maxabs_scaler.rs +86 -0
  21. r_scikit_learn-0.1.2/src/neighbors.rs +921 -0
  22. r_scikit_learn-0.1.2/src/sparse.rs +302 -0
  23. r_scikit_learn-0.1.2/tests/release_smoke.py +28 -0
  24. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_estimator_compliance.py +4 -0
  25. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_linear_model_parity.py +82 -0
  26. r_scikit_learn-0.1.2/tests/test_maxabs_scaler.py +60 -0
  27. r_scikit_learn-0.1.2/tests/test_neighbors.py +68 -0
  28. r_scikit_learn-0.1.2/tests/test_neighbors_parity.py +50 -0
  29. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_scikit_learn_parity.py +48 -0
  30. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_sparse_infrastructure.py +2 -2
  31. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_standard_scaler.py +41 -0
  32. r_scikit_learn-0.1.0/src/sparse.rs +0 -133
  33. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/LICENSE +0 -0
  34. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/benches/benchmark_metrics.py +0 -0
  35. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/_validation.py +0 -0
  36. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/base.py +0 -0
  37. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/compose/__init__.py +0 -0
  38. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/compose/_column_transformer.py +0 -0
  39. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/impute/__init__.py +0 -0
  40. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/impute/_simple_imputer.py +0 -0
  41. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/__init__.py +0 -0
  42. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_base.py +0 -0
  43. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_coordinate_descent.py +0 -0
  44. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_logistic.py +0 -0
  45. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/linear_model/_warnings.py +0 -0
  46. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/metrics/__init__.py +0 -0
  47. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/metrics/_classification.py +0 -0
  48. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/metrics/_regression.py +0 -0
  49. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/metrics/_validation.py +0 -0
  50. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/model_selection/__init__.py +0 -0
  51. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/model_selection/_split.py +0 -0
  52. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/model_selection/_utils.py +0 -0
  53. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/model_selection/_validation.py +0 -0
  54. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/pipeline.py +0 -0
  55. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_base.py +0 -0
  56. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_categorical.py +0 -0
  57. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_label_encoder.py +0 -0
  58. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_minmax_scaler.py +0 -0
  59. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_normalizer.py +0 -0
  60. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_one_hot_encoder.py +0 -0
  61. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_ordinal_encoder.py +0 -0
  62. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/preprocessing/_robust_scaler.py +0 -0
  63. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/py.typed +0 -0
  64. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/python/rsklearn/utils/validation.py +0 -0
  65. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/categorical.rs +0 -0
  66. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/error.rs +0 -0
  67. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/label_encoder.rs +0 -0
  68. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/linear_model.rs +0 -0
  69. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/metrics.rs +0 -0
  70. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/minmax_scaler.rs +0 -0
  71. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/normalizer.rs +0 -0
  72. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/robust_scaler.rs +0 -0
  73. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/simple_imputer.rs +0 -0
  74. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/src/standard_scaler.rs +0 -0
  75. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_base.py +0 -0
  76. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_categorical_infrastructure.py +0 -0
  77. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_column_transformer.py +0 -0
  78. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_column_transformer_parity.py +0 -0
  79. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_label_encoder.py +0 -0
  80. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_linear_model.py +0 -0
  81. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_metrics.py +0 -0
  82. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_metrics_parity.py +0 -0
  83. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_minmax_scaler.py +0 -0
  84. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_model_selection.py +0 -0
  85. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_model_selection_parity.py +0 -0
  86. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_normalizer.py +0 -0
  87. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_one_hot_encoder.py +0 -0
  88. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_ordinal_encoder.py +0 -0
  89. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_pipeline.py +0 -0
  90. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_pipeline_parity.py +0 -0
  91. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_public_validation.py +0 -0
  92. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_robust_scaler.py +0 -0
  93. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_simple_imputer.py +0 -0
  94. {r_scikit_learn-0.1.0 → r_scikit_learn-0.1.2}/tests/test_validation.py +0 -0
@@ -0,0 +1,35 @@
1
+ # Changelog
2
+
3
+ All notable changes to r-scikit-learn are documented here. Release tags and
4
+ published package versions are immutable.
5
+
6
+ ## Unreleased
7
+
8
+ ## 0.1.2 - 2026-06-24
9
+
10
+ - Added dense brute-force `KNeighborsClassifier` with Rust-backed neighbor
11
+ search, class voting, `predict`, `predict_proba`, and `kneighbors`.
12
+ - Added scikit-learn parity tests and benchmarks for nearest-neighbor
13
+ classification.
14
+ - Optimized the dense Euclidean neighbor search path with blocked dot products,
15
+ reusable work buffers, and macOS Accelerate/CBLAS acceleration with a portable
16
+ `matrixmultiply` fallback.
17
+ - Added sparse `StandardScaler(with_mean=False)` and `MaxAbsScaler` with
18
+ Rust-backed CSR/CSC reductions and column scaling.
19
+
20
+ ## 0.1.1 - 2026-06-15
21
+
22
+ - Added wheel and source-distribution installation testing across supported
23
+ operating systems and Python versions.
24
+ - Added a numerical-safety fallback for ill-conditioned tall least-squares
25
+ problems.
26
+ - Added TestPyPI, cross-platform benchmark, and immutable manual release
27
+ workflows.
28
+
29
+ ## 0.1.0
30
+
31
+ - Added Rust-powered preprocessing, categorical encoding, sparse
32
+ infrastructure, composition, metrics, model selection, and linear models.
33
+ - Added Linux, macOS, and Windows wheel builds for Python 3.10 through 3.13.
34
+ - Added Rust-native tall-matrix least squares and multinomial logistic
35
+ optimization.
@@ -998,9 +998,10 @@ dependencies = [
998
998
 
999
999
  [[package]]
1000
1000
  name = "r-scikit-learn-core"
1001
- version = "0.1.0"
1001
+ version = "0.1.2"
1002
1002
  dependencies = [
1003
1003
  "faer",
1004
+ "matrixmultiply",
1004
1005
  "nalgebra",
1005
1006
  "numpy",
1006
1007
  "pyo3",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "r-scikit-learn-core"
3
- version = "0.1.0"
3
+ version = "0.1.2"
4
4
  edition = "2021"
5
5
  license = "MIT"
6
6
  description = "Rust computational core for r-scikit-learn"
@@ -9,6 +9,7 @@ repository = "https://github.com/rishib42/r-scikit-learn"
9
9
  include = [
10
10
  "/Cargo.lock",
11
11
  "/Cargo.toml",
12
+ "/CHANGELOG.md",
12
13
  "/LICENSE",
13
14
  "/README.md",
14
15
  "/benches/*.py",
@@ -28,6 +29,7 @@ crate-type = ["cdylib", "rlib"]
28
29
 
29
30
  [dependencies]
30
31
  faer = { version = "0.24", default-features = false, features = ["std", "rayon", "linalg"] }
32
+ matrixmultiply = "0.3"
31
33
  nalgebra = { version = "0.34", default-features = false, features = ["std"] }
32
34
  numpy = "0.28"
33
35
  pyo3 = "0.28"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: r-scikit-learn
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Classifier: Development Status :: 3 - Alpha
5
5
  Classifier: License :: OSI Approved :: MIT License
6
6
  Classifier: Programming Language :: Python :: 3
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Rust
12
12
  Classifier: Typing :: Typed
13
13
  Requires-Dist: numpy>=1.23
14
14
  Requires-Dist: scipy>=1.10
15
+ Requires-Dist: hypothesis>=6.100,<7 ; extra == 'dev'
15
16
  Requires-Dist: maturin>=1.9,<2.0 ; extra == 'dev'
16
17
  Requires-Dist: pytest>=8 ; extra == 'dev'
17
18
  Requires-Dist: ruff>=0.11 ; extra == 'dev'
@@ -25,6 +26,7 @@ Author: r-scikit-learn contributors
25
26
  License-Expression: MIT
26
27
  Requires-Python: >=3.10
27
28
  Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
29
+ Project-URL: Changelog, https://github.com/rishib42/r-scikit-learn/blob/main/CHANGELOG.md
28
30
  Project-URL: Homepage, https://github.com/rishib42/r-scikit-learn
29
31
  Project-URL: Issues, https://github.com/rishib42/r-scikit-learn/issues
30
32
  Project-URL: Repository, https://github.com/rishib42/r-scikit-learn
@@ -34,7 +36,7 @@ Project-URL: Repository, https://github.com/rishib42/r-scikit-learn
34
36
  Fast, familiar machine-learning building blocks powered by safe Rust. 🦀
35
37
 
36
38
  `r-scikit-learn` combines a Rust computational core with lightweight,
37
- scikit-learn-style Python estimators. Version 0.1.0 includes:
39
+ scikit-learn-style Python estimators. Version 0.1.1 includes:
38
40
 
39
41
  - Preprocessing, categorical encoding, and missing-value imputation
40
42
  - Pipelines and column transformers
@@ -124,6 +126,13 @@ encoder = OneHotEncoder(handle_unknown="ignore")
124
126
  X_one_hot = encoder.fit_transform([["small"], ["large"], ["small"]])
125
127
  ```
126
128
 
129
+ ```python
130
+ from rsklearn.preprocessing import MaxAbsScaler, StandardScaler
131
+
132
+ X_sparse_scaled = StandardScaler(with_mean=False).fit_transform(X_one_hot)
133
+ X_sparse_maxabs = MaxAbsScaler().fit_transform(X_one_hot)
134
+ ```
135
+
127
136
  ```python
128
137
  import numpy as np
129
138
  from rsklearn.impute import SimpleImputer
@@ -193,7 +202,10 @@ probabilities = classifier.predict_proba(X_test)
193
202
  - Uses float64 fitted statistics and native float32 kernels where supported.
194
203
  - Ignores NaNs while fitting, preserves them while transforming, and rejects
195
204
  infinity.
196
- - Supports incremental `partial_fit` for `StandardScaler` and `MinMaxScaler`.
205
+ - Supports incremental `partial_fit` for `StandardScaler`, `MaxAbsScaler`, and
206
+ `MinMaxScaler`.
207
+ - Supports CSR/CSC sparse `StandardScaler(with_mean=False)` and `MaxAbsScaler`
208
+ without densifying input.
197
209
  - Supports L1, L2, and max row normalization.
198
210
  - Provides quantile-based `RobustScaler` fitting and inverse transforms.
199
211
 
@@ -274,8 +286,6 @@ The core implemented behavior is tested and packaged across Linux, macOS, and
274
286
  Windows, but the project remains alpha software. Before a stable 1.0 release,
275
287
  the following compatibility and operational work remains:
276
288
 
277
- - Sparse-aware estimator behavior, including non-centering `StandardScaler`
278
- operation. Shared CSR/CSC validation and Rust kernels are implemented.
279
289
  - `sample_weight` support for `StandardScaler.partial_fit`.
280
290
  - Comprehensive `get_feature_names_out` support and configurable output
281
291
  containers across estimators.
@@ -327,14 +337,22 @@ Substantial numerical loops release the Python GIL.
327
337
 
328
338
  ## Release
329
339
 
330
- 1. Run all development checks and build a release wheel.
331
- 2. Install the wheel into a clean virtual environment and run the import smoke
332
- test.
333
- 3. Verify the distribution name on PyPI.
334
- 4. Tag the release as `v0.1.0` and push the tag.
335
- 5. Approve the GitHub Actions Trusted Publishing environment.
336
-
337
- The release workflow uses PyPI Trusted Publishing and contains no API token.
340
+ 1. Update the matching versions in `pyproject.toml`, `Cargo.toml`, and
341
+ `python/rsklearn/__init__.py`, then update `CHANGELOG.md`.
342
+ 2. Push the release commit and wait for CI, including manylinux and sdist
343
+ installation checks, to pass.
344
+ 3. Run the manual TestPyPI workflow and verify its distributions.
345
+ 4. Run the manual Release workflow with the version number without a `v`
346
+ prefix.
347
+ 5. Approve the PyPI environment if required.
348
+
349
+ The release workflow refuses existing versions, installs every wheel on
350
+ Python 3.10-3.13 across Linux, macOS, and Windows, verifies sdist installation,
351
+ publishes through PyPI Trusted Publishing, creates the immutable GitHub tag and
352
+ release, attaches artifacts, and verifies installation from PyPI. No API token
353
+ is stored in the repository. Configure separate `pypi` and `testpypi` GitHub
354
+ environments and matching Trusted Publishers for `release.yml` and
355
+ `test-pypi.yml`, respectively.
338
356
 
339
357
  ## Roadmap
340
358
 
@@ -3,7 +3,7 @@
3
3
  Fast, familiar machine-learning building blocks powered by safe Rust. 🦀
4
4
 
5
5
  `r-scikit-learn` combines a Rust computational core with lightweight,
6
- scikit-learn-style Python estimators. Version 0.1.0 includes:
6
+ scikit-learn-style Python estimators. Version 0.1.1 includes:
7
7
 
8
8
  - Preprocessing, categorical encoding, and missing-value imputation
9
9
  - Pipelines and column transformers
@@ -93,6 +93,13 @@ encoder = OneHotEncoder(handle_unknown="ignore")
93
93
  X_one_hot = encoder.fit_transform([["small"], ["large"], ["small"]])
94
94
  ```
95
95
 
96
+ ```python
97
+ from rsklearn.preprocessing import MaxAbsScaler, StandardScaler
98
+
99
+ X_sparse_scaled = StandardScaler(with_mean=False).fit_transform(X_one_hot)
100
+ X_sparse_maxabs = MaxAbsScaler().fit_transform(X_one_hot)
101
+ ```
102
+
96
103
  ```python
97
104
  import numpy as np
98
105
  from rsklearn.impute import SimpleImputer
@@ -162,7 +169,10 @@ probabilities = classifier.predict_proba(X_test)
162
169
  - Uses float64 fitted statistics and native float32 kernels where supported.
163
170
  - Ignores NaNs while fitting, preserves them while transforming, and rejects
164
171
  infinity.
165
- - Supports incremental `partial_fit` for `StandardScaler` and `MinMaxScaler`.
172
+ - Supports incremental `partial_fit` for `StandardScaler`, `MaxAbsScaler`, and
173
+ `MinMaxScaler`.
174
+ - Supports CSR/CSC sparse `StandardScaler(with_mean=False)` and `MaxAbsScaler`
175
+ without densifying input.
166
176
  - Supports L1, L2, and max row normalization.
167
177
  - Provides quantile-based `RobustScaler` fitting and inverse transforms.
168
178
 
@@ -243,8 +253,6 @@ The core implemented behavior is tested and packaged across Linux, macOS, and
243
253
  Windows, but the project remains alpha software. Before a stable 1.0 release,
244
254
  the following compatibility and operational work remains:
245
255
 
246
- - Sparse-aware estimator behavior, including non-centering `StandardScaler`
247
- operation. Shared CSR/CSC validation and Rust kernels are implemented.
248
256
  - `sample_weight` support for `StandardScaler.partial_fit`.
249
257
  - Comprehensive `get_feature_names_out` support and configurable output
250
258
  containers across estimators.
@@ -296,14 +304,22 @@ Substantial numerical loops release the Python GIL.
296
304
 
297
305
  ## Release
298
306
 
299
- 1. Run all development checks and build a release wheel.
300
- 2. Install the wheel into a clean virtual environment and run the import smoke
301
- test.
302
- 3. Verify the distribution name on PyPI.
303
- 4. Tag the release as `v0.1.0` and push the tag.
304
- 5. Approve the GitHub Actions Trusted Publishing environment.
305
-
306
- The release workflow uses PyPI Trusted Publishing and contains no API token.
307
+ 1. Update the matching versions in `pyproject.toml`, `Cargo.toml`, and
308
+ `python/rsklearn/__init__.py`, then update `CHANGELOG.md`.
309
+ 2. Push the release commit and wait for CI, including manylinux and sdist
310
+ installation checks, to pass.
311
+ 3. Run the manual TestPyPI workflow and verify its distributions.
312
+ 4. Run the manual Release workflow with the version number without a `v`
313
+ prefix.
314
+ 5. Approve the PyPI environment if required.
315
+
316
+ The release workflow refuses existing versions, installs every wheel on
317
+ Python 3.10-3.13 across Linux, macOS, and Windows, verifies sdist installation,
318
+ publishes through PyPI Trusted Publishing, creates the immutable GitHub tag and
319
+ release, attaches artifacts, and verifies installation from PyPI. No API token
320
+ is stored in the repository. Configure separate `pypi` and `testpypi` GitHub
321
+ environments and matching Trusted Publishers for `release.yml` and
322
+ `test-pypi.yml`, respectively.
307
323
 
308
324
  ## Roadmap
309
325
 
@@ -10,6 +10,8 @@ from collections.abc import Callable
10
10
 
11
11
  import numpy as np
12
12
  import rsklearn.linear_model as rlinear
13
+ import scipy
14
+ import sklearn
13
15
  import sklearn.linear_model as slinear
14
16
  from rsklearn import _core
15
17
 
@@ -65,6 +67,10 @@ def main() -> None:
65
67
  )
66
68
  print(f"Python: {sys.executable}")
67
69
  print(f"Rust extension: {_core.__file__} ({profile})")
70
+ print(
71
+ f"Dependencies: numpy {np.__version__}, scipy {scipy.__version__}, "
72
+ f"scikit-learn {sklearn.__version__}"
73
+ )
68
74
  rng = np.random.default_rng(20260614)
69
75
  X = rng.normal(size=(args.samples, args.features))
70
76
  coefficients = rng.normal(size=args.features)
@@ -0,0 +1,124 @@
1
+ """Compare r-scikit-learn and scikit-learn nearest-neighbor performance."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import statistics
7
+ import sys
8
+ import time
9
+ from collections.abc import Callable
10
+
11
+ import numpy as np
12
+ import rsklearn.neighbors as rneighbors
13
+ import scipy
14
+ import sklearn
15
+ import sklearn.neighbors as sneighbors
16
+ from rsklearn import _core
17
+
18
+
19
+ def measure(
20
+ function: Callable[[], object], repetitions: int, warmups: int
21
+ ) -> tuple[float, float]:
22
+ for _ in range(warmups):
23
+ function()
24
+ values = []
25
+ for _ in range(repetitions):
26
+ started = time.perf_counter()
27
+ function()
28
+ values.append(time.perf_counter() - started)
29
+ return statistics.mean(values), statistics.stdev(values) if repetitions > 1 else 0
30
+
31
+
32
+ def report(
33
+ name: str,
34
+ ours: Callable[[], object],
35
+ theirs: Callable[[], object],
36
+ repetitions: int,
37
+ warmups: int,
38
+ ) -> None:
39
+ ours_mean, ours_stdev = measure(ours, repetitions, warmups)
40
+ theirs_mean, theirs_stdev = measure(theirs, repetitions, warmups)
41
+ improvement = (theirs_mean - ours_mean) / theirs_mean * 100
42
+ print(
43
+ f"{name:<32} r-scikit-learn {ours_mean:9.6f}s ± {ours_stdev:9.6f}s "
44
+ f"scikit-learn {theirs_mean:9.6f}s ± {theirs_stdev:9.6f}s "
45
+ f"impr. {improvement:+7.2f}%"
46
+ )
47
+
48
+
49
+ def main() -> None:
50
+ parser = argparse.ArgumentParser()
51
+ parser.add_argument("--train-samples", type=int, default=20_000)
52
+ parser.add_argument("--query-samples", type=int, default=1_000)
53
+ parser.add_argument("--features", type=int, default=20)
54
+ parser.add_argument("--classes", type=int, default=5)
55
+ parser.add_argument("--neighbors", type=int, default=5)
56
+ parser.add_argument("--repetitions", type=int, default=5)
57
+ parser.add_argument("--warmups", type=int, default=2)
58
+ parser.add_argument(
59
+ "--allow-debug",
60
+ action="store_true",
61
+ help="run even when r-scikit-learn's Rust extension is a debug build",
62
+ )
63
+ args = parser.parse_args()
64
+ profile = _core.build_profile()
65
+ if profile != "release" and not args.allow_debug:
66
+ raise SystemExit(
67
+ "Refusing to benchmark a debug Rust extension. Install a release build "
68
+ "with `maturin develop --release`, then rerun. Pass --allow-debug only "
69
+ "when intentionally measuring debug code."
70
+ )
71
+ print(f"Python: {sys.executable}")
72
+ print(f"Rust extension: {_core.__file__} ({profile})")
73
+ print(
74
+ f"Dependencies: numpy {np.__version__}, scipy {scipy.__version__}, "
75
+ f"scikit-learn {sklearn.__version__}"
76
+ )
77
+ rng = np.random.default_rng(20260616)
78
+ X_train = rng.normal(size=(args.train_samples, args.features))
79
+ X_query = rng.normal(size=(args.query_samples, args.features))
80
+ y = rng.integers(0, args.classes, size=args.train_samples, dtype=np.int64)
81
+ options = {
82
+ "n_neighbors": args.neighbors,
83
+ "weights": "uniform",
84
+ "algorithm": "brute",
85
+ "metric": "euclidean",
86
+ }
87
+ print(
88
+ f"Train matrix: {args.train_samples:,} x {args.features:,}; "
89
+ f"query matrix: {args.query_samples:,} x {args.features:,}"
90
+ )
91
+ report(
92
+ "KNeighborsClassifier fit",
93
+ lambda: rneighbors.KNeighborsClassifier(**options).fit(X_train, y),
94
+ lambda: sneighbors.KNeighborsClassifier(**options).fit(X_train, y),
95
+ args.repetitions,
96
+ args.warmups,
97
+ )
98
+ ours = rneighbors.KNeighborsClassifier(**options).fit(X_train, y)
99
+ theirs = sneighbors.KNeighborsClassifier(**options).fit(X_train, y)
100
+ report(
101
+ "KNeighborsClassifier kneighbors",
102
+ lambda: ours.kneighbors(X_query),
103
+ lambda: theirs.kneighbors(X_query),
104
+ args.repetitions,
105
+ args.warmups,
106
+ )
107
+ report(
108
+ "KNeighborsClassifier predict",
109
+ lambda: ours.predict(X_query),
110
+ lambda: theirs.predict(X_query),
111
+ args.repetitions,
112
+ args.warmups,
113
+ )
114
+ report(
115
+ "KNeighborsClassifier proba",
116
+ lambda: ours.predict_proba(X_query),
117
+ lambda: theirs.predict_proba(X_query),
118
+ args.repetitions,
119
+ args.warmups,
120
+ )
121
+
122
+
123
+ if __name__ == "__main__":
124
+ main()
@@ -13,6 +13,7 @@ from rsklearn.base import BaseEstimator
13
13
  from rsklearn.impute import SimpleImputer
14
14
  from rsklearn.preprocessing import (
15
15
  LabelEncoder,
16
+ MaxAbsScaler,
16
17
  MinMaxScaler,
17
18
  Normalizer,
18
19
  OneHotEncoder,
@@ -27,6 +28,7 @@ from sklearn.impute import SimpleImputer as ScikitSimpleImputer
27
28
 
28
29
  # The scikit-learn distribution intentionally exposes the `sklearn` import package.
29
30
  from sklearn.preprocessing import LabelEncoder as ScikitLabelEncoder
31
+ from sklearn.preprocessing import MaxAbsScaler as ScikitMaxAbsScaler
30
32
  from sklearn.preprocessing import MinMaxScaler as ScikitMinMaxScaler
31
33
  from sklearn.preprocessing import Normalizer as ScikitNormalizer
32
34
  from sklearn.preprocessing import OneHotEncoder as ScikitOneHotEncoder
@@ -88,6 +90,7 @@ def benchmark_matrix(rows: int, columns: int, repetitions: int) -> None:
88
90
  )
89
91
  for name, ours, theirs in [
90
92
  ("StandardScaler", StandardScaler, ScikitStandardScaler),
93
+ ("MaxAbsScaler", MaxAbsScaler, ScikitMaxAbsScaler),
91
94
  ("MinMaxScaler", MinMaxScaler, ScikitMinMaxScaler),
92
95
  ("Normalizer", Normalizer, ScikitNormalizer),
93
96
  ("RobustScaler", RobustScaler, ScikitRobustScaler),
@@ -294,6 +297,34 @@ def benchmark_sparse(repetitions: int) -> None:
294
297
  scikit_scale,
295
298
  repetitions,
296
299
  )
300
+ ours_standard = StandardScaler(with_mean=False).fit(matrix)
301
+ theirs_standard = ScikitStandardScaler(with_mean=False).fit(matrix)
302
+ report_comparison(
303
+ "Sparse StandardScaler fit",
304
+ lambda: StandardScaler(with_mean=False).fit(matrix),
305
+ lambda: ScikitStandardScaler(with_mean=False).fit(matrix),
306
+ repetitions,
307
+ )
308
+ report_comparison(
309
+ "Sparse StandardScaler transform",
310
+ lambda: ours_standard.transform(matrix),
311
+ lambda: theirs_standard.transform(matrix),
312
+ repetitions,
313
+ )
314
+ ours_maxabs = MaxAbsScaler().fit(matrix)
315
+ theirs_maxabs = ScikitMaxAbsScaler().fit(matrix)
316
+ report_comparison(
317
+ "Sparse MaxAbsScaler fit",
318
+ lambda: MaxAbsScaler().fit(matrix),
319
+ lambda: ScikitMaxAbsScaler().fit(matrix),
320
+ repetitions,
321
+ )
322
+ report_comparison(
323
+ "Sparse MaxAbsScaler transform",
324
+ lambda: ours_maxabs.transform(matrix),
325
+ lambda: theirs_maxabs.transform(matrix),
326
+ repetitions,
327
+ )
297
328
 
298
329
 
299
330
  def main() -> None:
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "r-scikit-learn"
7
- version = "0.1.0"
7
+ version = "0.1.2"
8
8
  description = "High-performance scikit-learn-style machine learning powered by safe Rust"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -26,6 +26,7 @@ dependencies = ["numpy>=1.23", "scipy>=1.10"]
26
26
 
27
27
  [project.optional-dependencies]
28
28
  dev = [
29
+ "hypothesis>=6.100,<7",
29
30
  "maturin>=1.9,<2.0",
30
31
  "pytest>=8",
31
32
  "ruff>=0.11",
@@ -36,6 +37,7 @@ dev = [
36
37
  Homepage = "https://github.com/rishib42/r-scikit-learn"
37
38
  Repository = "https://github.com/rishib42/r-scikit-learn"
38
39
  Issues = "https://github.com/rishib42/r-scikit-learn/issues"
40
+ Changelog = "https://github.com/rishib42/r-scikit-learn/blob/main/CHANGELOG.md"
39
41
 
40
42
  [tool.maturin]
41
43
  python-source = "python"
@@ -10,6 +10,7 @@ from .base import (
10
10
  from .compose import ColumnTransformer, make_column_transformer
11
11
  from .impute import SimpleImputer
12
12
  from .linear_model import ElasticNet, Lasso, LinearRegression, LogisticRegression, Ridge
13
+ from .neighbors import KNeighborsClassifier
13
14
  from .pipeline import Pipeline, make_pipeline
14
15
  from .preprocessing import (
15
16
  LabelEncoder,
@@ -26,6 +27,7 @@ __all__ = [
26
27
  "ClassifierMixin",
27
28
  "ColumnTransformer",
28
29
  "ElasticNet",
30
+ "KNeighborsClassifier",
29
31
  "LabelEncoder",
30
32
  "Lasso",
31
33
  "LinearRegression",
@@ -45,4 +47,4 @@ __all__ = [
45
47
  "make_column_transformer",
46
48
  "make_pipeline",
47
49
  ]
48
- __version__ = "0.1.0"
50
+ __version__ = "0.1.2"
@@ -12,6 +12,26 @@ from rsklearn.base import BaseEstimator, RegressorMixin
12
12
 
13
13
  from ._base import LinearModel, validate_regression_fit
14
14
 
15
+ # Normal equations square the condition number. This cutoff limits the
16
+ # resulting float64 error amplification before selecting the fast Gram path.
17
+ _GRAM_MIN_SINGULAR_RATIO = np.finfo(np.float64).eps ** 0.25
18
+ _GRAM_RANK_RESOLUTION = np.sqrt(np.finfo(np.float64).eps)
19
+
20
+
21
+ def _tall_solution_is_stable(singular: np.ndarray, rank: int, tolerance: float) -> bool:
22
+ """Return whether normal-equation accuracy is reliable for this spectrum."""
23
+ if rank == 0 or singular.size == 0 or not np.isfinite(singular).all():
24
+ return False
25
+ if rank < singular.size and tolerance < _GRAM_RANK_RESOLUTION:
26
+ return False
27
+ largest = singular[0]
28
+ smallest_retained = singular[rank - 1]
29
+ return (
30
+ largest > 0
31
+ and smallest_retained > 0
32
+ and smallest_retained / largest >= _GRAM_MIN_SINGULAR_RATIO
33
+ )
34
+
15
35
 
16
36
  def _fit_lstsq(
17
37
  X: np.ndarray,
@@ -22,7 +42,9 @@ def _fit_lstsq(
22
42
  ) -> tuple[np.ndarray, np.ndarray, int, np.ndarray]:
23
43
  """Solve unregularized least squares through a shape-aware dense backend."""
24
44
  if X.shape[0] >= 4 * X.shape[1]:
25
- return _core.linear_fit_tall(X, y, weights, fit_intercept, tolerance)
45
+ tall_fit = _core.linear_fit_tall(X, y, weights, fit_intercept, tolerance)
46
+ if _tall_solution_is_stable(tall_fit[3], tall_fit[2], tolerance):
47
+ return tall_fit
26
48
  uniform_weights = np.all(weights == weights[0])
27
49
  if fit_intercept:
28
50
  if uniform_weights:
@@ -0,0 +1,5 @@
1
+ """Nearest-neighbor estimators."""
2
+
3
+ from ._classification import KNeighborsClassifier
4
+
5
+ __all__ = ["KNeighborsClassifier"]