scikit-learn-intelex 2025.4.0__py313-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- daal4py/__init__.py +73 -0
- daal4py/__main__.py +58 -0
- daal4py/_daal4py.cpython-313-x86_64-linux-gnu.so +0 -0
- daal4py/doc/third-party-programs.txt +424 -0
- daal4py/mb/__init__.py +19 -0
- daal4py/mb/model_builders.py +377 -0
- daal4py/mpi_transceiver.cpython-313-x86_64-linux-gnu.so +0 -0
- daal4py/sklearn/__init__.py +40 -0
- daal4py/sklearn/_n_jobs_support.py +248 -0
- daal4py/sklearn/_utils.py +245 -0
- daal4py/sklearn/cluster/__init__.py +20 -0
- daal4py/sklearn/cluster/dbscan.py +165 -0
- daal4py/sklearn/cluster/k_means.py +597 -0
- daal4py/sklearn/cluster/tests/test_dbscan.py +109 -0
- daal4py/sklearn/decomposition/__init__.py +19 -0
- daal4py/sklearn/decomposition/_pca.py +524 -0
- daal4py/sklearn/ensemble/AdaBoostClassifier.py +196 -0
- daal4py/sklearn/ensemble/GBTDAAL.py +337 -0
- daal4py/sklearn/ensemble/__init__.py +27 -0
- daal4py/sklearn/ensemble/_forest.py +1397 -0
- daal4py/sklearn/ensemble/tests/test_decision_forest.py +206 -0
- daal4py/sklearn/linear_model/__init__.py +29 -0
- daal4py/sklearn/linear_model/_coordinate_descent.py +848 -0
- daal4py/sklearn/linear_model/_linear.py +272 -0
- daal4py/sklearn/linear_model/_ridge.py +325 -0
- daal4py/sklearn/linear_model/coordinate_descent.py +17 -0
- daal4py/sklearn/linear_model/linear.py +17 -0
- daal4py/sklearn/linear_model/logistic_loss.py +195 -0
- daal4py/sklearn/linear_model/logistic_path.py +1026 -0
- daal4py/sklearn/linear_model/ridge.py +17 -0
- daal4py/sklearn/linear_model/tests/test_linear.py +208 -0
- daal4py/sklearn/linear_model/tests/test_ridge.py +69 -0
- daal4py/sklearn/manifold/__init__.py +19 -0
- daal4py/sklearn/manifold/_t_sne.py +405 -0
- daal4py/sklearn/metrics/__init__.py +20 -0
- daal4py/sklearn/metrics/_pairwise.py +236 -0
- daal4py/sklearn/metrics/_ranking.py +210 -0
- daal4py/sklearn/model_selection/__init__.py +19 -0
- daal4py/sklearn/model_selection/_split.py +309 -0
- daal4py/sklearn/model_selection/tests/test_split.py +56 -0
- daal4py/sklearn/monkeypatch/__init__.py +0 -0
- daal4py/sklearn/monkeypatch/dispatcher.py +232 -0
- daal4py/sklearn/monkeypatch/tests/_models_info.py +161 -0
- daal4py/sklearn/monkeypatch/tests/test_monkeypatch.py +71 -0
- daal4py/sklearn/monkeypatch/tests/test_patching.py +90 -0
- daal4py/sklearn/monkeypatch/tests/utils/_launch_algorithms.py +117 -0
- daal4py/sklearn/neighbors/__init__.py +21 -0
- daal4py/sklearn/neighbors/_base.py +503 -0
- daal4py/sklearn/neighbors/_classification.py +139 -0
- daal4py/sklearn/neighbors/_regression.py +74 -0
- daal4py/sklearn/neighbors/_unsupervised.py +55 -0
- daal4py/sklearn/neighbors/tests/test_kneighbors.py +113 -0
- daal4py/sklearn/svm/__init__.py +19 -0
- daal4py/sklearn/svm/svm.py +734 -0
- daal4py/sklearn/utils/__init__.py +21 -0
- daal4py/sklearn/utils/base.py +75 -0
- daal4py/sklearn/utils/tests/test_utils.py +51 -0
- daal4py/sklearn/utils/validation.py +696 -0
- onedal/__init__.py +83 -0
- onedal/_config.py +54 -0
- onedal/_device_offload.py +204 -0
- onedal/_onedal_py_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_host.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_spmd_dpc.cpython-313-x86_64-linux-gnu.so +0 -0
- onedal/basic_statistics/__init__.py +20 -0
- onedal/basic_statistics/basic_statistics.py +107 -0
- onedal/basic_statistics/incremental_basic_statistics.py +175 -0
- onedal/basic_statistics/tests/test_basic_statistics.py +242 -0
- onedal/basic_statistics/tests/test_incremental_basic_statistics.py +279 -0
- onedal/basic_statistics/tests/utils.py +50 -0
- onedal/cluster/__init__.py +27 -0
- onedal/cluster/dbscan.py +105 -0
- onedal/cluster/kmeans.py +557 -0
- onedal/cluster/kmeans_init.py +112 -0
- onedal/cluster/tests/test_dbscan.py +125 -0
- onedal/cluster/tests/test_kmeans.py +88 -0
- onedal/cluster/tests/test_kmeans_init.py +93 -0
- onedal/common/_base.py +38 -0
- onedal/common/_estimator_checks.py +47 -0
- onedal/common/_mixin.py +62 -0
- onedal/common/_policy.py +55 -0
- onedal/common/_spmd_policy.py +30 -0
- onedal/common/hyperparameters.py +125 -0
- onedal/common/tests/test_policy.py +76 -0
- onedal/common/tests/test_sycl.py +128 -0
- onedal/covariance/__init__.py +20 -0
- onedal/covariance/covariance.py +122 -0
- onedal/covariance/incremental_covariance.py +161 -0
- onedal/covariance/tests/test_covariance.py +50 -0
- onedal/covariance/tests/test_incremental_covariance.py +190 -0
- onedal/datatypes/__init__.py +19 -0
- onedal/datatypes/_data_conversion.py +121 -0
- onedal/datatypes/tests/common.py +126 -0
- onedal/datatypes/tests/test_data.py +475 -0
- onedal/decomposition/__init__.py +20 -0
- onedal/decomposition/incremental_pca.py +214 -0
- onedal/decomposition/pca.py +186 -0
- onedal/decomposition/tests/test_incremental_pca.py +285 -0
- onedal/ensemble/__init__.py +29 -0
- onedal/ensemble/forest.py +736 -0
- onedal/ensemble/tests/test_random_forest.py +97 -0
- onedal/linear_model/__init__.py +27 -0
- onedal/linear_model/incremental_linear_model.py +292 -0
- onedal/linear_model/linear_model.py +325 -0
- onedal/linear_model/logistic_regression.py +247 -0
- onedal/linear_model/tests/test_incremental_linear_regression.py +213 -0
- onedal/linear_model/tests/test_incremental_ridge_regression.py +171 -0
- onedal/linear_model/tests/test_linear_regression.py +259 -0
- onedal/linear_model/tests/test_logistic_regression.py +95 -0
- onedal/linear_model/tests/test_ridge.py +95 -0
- onedal/neighbors/__init__.py +19 -0
- onedal/neighbors/neighbors.py +763 -0
- onedal/neighbors/tests/test_knn_classification.py +49 -0
- onedal/primitives/__init__.py +27 -0
- onedal/primitives/get_tree.py +25 -0
- onedal/primitives/kernel_functions.py +152 -0
- onedal/primitives/tests/test_kernel_functions.py +159 -0
- onedal/spmd/__init__.py +25 -0
- onedal/spmd/_base.py +30 -0
- onedal/spmd/basic_statistics/__init__.py +20 -0
- onedal/spmd/basic_statistics/basic_statistics.py +30 -0
- onedal/spmd/basic_statistics/incremental_basic_statistics.py +71 -0
- onedal/spmd/cluster/__init__.py +28 -0
- onedal/spmd/cluster/dbscan.py +23 -0
- onedal/spmd/cluster/kmeans.py +56 -0
- onedal/spmd/covariance/__init__.py +20 -0
- onedal/spmd/covariance/covariance.py +26 -0
- onedal/spmd/covariance/incremental_covariance.py +83 -0
- onedal/spmd/decomposition/__init__.py +20 -0
- onedal/spmd/decomposition/incremental_pca.py +124 -0
- onedal/spmd/decomposition/pca.py +26 -0
- onedal/spmd/ensemble/__init__.py +19 -0
- onedal/spmd/ensemble/forest.py +28 -0
- onedal/spmd/linear_model/__init__.py +21 -0
- onedal/spmd/linear_model/incremental_linear_model.py +101 -0
- onedal/spmd/linear_model/linear_model.py +30 -0
- onedal/spmd/linear_model/logistic_regression.py +38 -0
- onedal/spmd/neighbors/__init__.py +19 -0
- onedal/spmd/neighbors/neighbors.py +75 -0
- onedal/svm/__init__.py +19 -0
- onedal/svm/svm.py +556 -0
- onedal/svm/tests/test_csr_svm.py +351 -0
- onedal/svm/tests/test_nusvc.py +204 -0
- onedal/svm/tests/test_nusvr.py +210 -0
- onedal/svm/tests/test_svc.py +176 -0
- onedal/svm/tests/test_svr.py +243 -0
- onedal/tests/test_common.py +57 -0
- onedal/tests/utils/_dataframes_support.py +162 -0
- onedal/tests/utils/_device_selection.py +102 -0
- onedal/utils/__init__.py +49 -0
- onedal/utils/_array_api.py +81 -0
- onedal/utils/_dpep_helpers.py +56 -0
- onedal/utils/tests/test_validation.py +142 -0
- onedal/utils/validation.py +464 -0
- scikit_learn_intelex-2025.4.0.dist-info/LICENSE.txt +202 -0
- scikit_learn_intelex-2025.4.0.dist-info/METADATA +190 -0
- scikit_learn_intelex-2025.4.0.dist-info/RECORD +282 -0
- scikit_learn_intelex-2025.4.0.dist-info/WHEEL +5 -0
- scikit_learn_intelex-2025.4.0.dist-info/top_level.txt +3 -0
- sklearnex/__init__.py +66 -0
- sklearnex/__main__.py +58 -0
- sklearnex/_config.py +116 -0
- sklearnex/_device_offload.py +126 -0
- sklearnex/_utils.py +177 -0
- sklearnex/basic_statistics/__init__.py +20 -0
- sklearnex/basic_statistics/basic_statistics.py +261 -0
- sklearnex/basic_statistics/incremental_basic_statistics.py +352 -0
- sklearnex/basic_statistics/tests/test_basic_statistics.py +405 -0
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +455 -0
- sklearnex/cluster/__init__.py +20 -0
- sklearnex/cluster/dbscan.py +197 -0
- sklearnex/cluster/k_means.py +397 -0
- sklearnex/cluster/tests/test_dbscan.py +38 -0
- sklearnex/cluster/tests/test_kmeans.py +157 -0
- sklearnex/conftest.py +82 -0
- sklearnex/covariance/__init__.py +19 -0
- sklearnex/covariance/incremental_covariance.py +405 -0
- sklearnex/covariance/tests/test_incremental_covariance.py +287 -0
- sklearnex/decomposition/__init__.py +19 -0
- sklearnex/decomposition/pca.py +427 -0
- sklearnex/decomposition/tests/test_pca.py +58 -0
- sklearnex/dispatcher.py +534 -0
- sklearnex/doc/third-party-programs.txt +424 -0
- sklearnex/ensemble/__init__.py +29 -0
- sklearnex/ensemble/_forest.py +2029 -0
- sklearnex/ensemble/tests/test_forest.py +140 -0
- sklearnex/glob/__main__.py +72 -0
- sklearnex/glob/dispatcher.py +101 -0
- sklearnex/linear_model/__init__.py +32 -0
- sklearnex/linear_model/coordinate_descent.py +30 -0
- sklearnex/linear_model/incremental_linear.py +495 -0
- sklearnex/linear_model/incremental_ridge.py +432 -0
- sklearnex/linear_model/linear.py +346 -0
- sklearnex/linear_model/logistic_regression.py +415 -0
- sklearnex/linear_model/ridge.py +390 -0
- sklearnex/linear_model/tests/test_incremental_linear.py +267 -0
- sklearnex/linear_model/tests/test_incremental_ridge.py +214 -0
- sklearnex/linear_model/tests/test_linear.py +142 -0
- sklearnex/linear_model/tests/test_logreg.py +134 -0
- sklearnex/linear_model/tests/test_ridge.py +256 -0
- sklearnex/manifold/__init__.py +19 -0
- sklearnex/manifold/t_sne.py +26 -0
- sklearnex/manifold/tests/test_tsne.py +250 -0
- sklearnex/metrics/__init__.py +23 -0
- sklearnex/metrics/pairwise.py +22 -0
- sklearnex/metrics/ranking.py +20 -0
- sklearnex/metrics/tests/test_metrics.py +39 -0
- sklearnex/model_selection/__init__.py +21 -0
- sklearnex/model_selection/split.py +22 -0
- sklearnex/model_selection/tests/test_model_selection.py +34 -0
- sklearnex/neighbors/__init__.py +27 -0
- sklearnex/neighbors/_lof.py +236 -0
- sklearnex/neighbors/common.py +310 -0
- sklearnex/neighbors/knn_classification.py +231 -0
- sklearnex/neighbors/knn_regression.py +207 -0
- sklearnex/neighbors/knn_unsupervised.py +178 -0
- sklearnex/neighbors/tests/test_neighbors.py +82 -0
- sklearnex/preview/__init__.py +17 -0
- sklearnex/preview/covariance/__init__.py +19 -0
- sklearnex/preview/covariance/covariance.py +142 -0
- sklearnex/preview/covariance/tests/test_covariance.py +66 -0
- sklearnex/preview/decomposition/__init__.py +19 -0
- sklearnex/preview/decomposition/incremental_pca.py +244 -0
- sklearnex/preview/decomposition/tests/test_incremental_pca.py +336 -0
- sklearnex/spmd/__init__.py +25 -0
- sklearnex/spmd/basic_statistics/__init__.py +20 -0
- sklearnex/spmd/basic_statistics/basic_statistics.py +21 -0
- sklearnex/spmd/basic_statistics/incremental_basic_statistics.py +30 -0
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
- sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +306 -0
- sklearnex/spmd/cluster/__init__.py +30 -0
- sklearnex/spmd/cluster/dbscan.py +50 -0
- sklearnex/spmd/cluster/kmeans.py +21 -0
- sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +173 -0
- sklearnex/spmd/covariance/__init__.py +20 -0
- sklearnex/spmd/covariance/covariance.py +21 -0
- sklearnex/spmd/covariance/incremental_covariance.py +37 -0
- sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
- sklearnex/spmd/covariance/tests/test_incremental_covariance_spmd.py +184 -0
- sklearnex/spmd/decomposition/__init__.py +20 -0
- sklearnex/spmd/decomposition/incremental_pca.py +30 -0
- sklearnex/spmd/decomposition/pca.py +21 -0
- sklearnex/spmd/decomposition/tests/test_incremental_pca_spmd.py +269 -0
- sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
- sklearnex/spmd/ensemble/__init__.py +19 -0
- sklearnex/spmd/ensemble/forest.py +71 -0
- sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
- sklearnex/spmd/linear_model/__init__.py +21 -0
- sklearnex/spmd/linear_model/incremental_linear_model.py +35 -0
- sklearnex/spmd/linear_model/linear_model.py +21 -0
- sklearnex/spmd/linear_model/logistic_regression.py +21 -0
- sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +331 -0
- sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
- sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +162 -0
- sklearnex/spmd/neighbors/__init__.py +19 -0
- sklearnex/spmd/neighbors/neighbors.py +25 -0
- sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
- sklearnex/svm/__init__.py +29 -0
- sklearnex/svm/_common.py +339 -0
- sklearnex/svm/nusvc.py +371 -0
- sklearnex/svm/nusvr.py +170 -0
- sklearnex/svm/svc.py +399 -0
- sklearnex/svm/svr.py +167 -0
- sklearnex/svm/tests/test_svm.py +93 -0
- sklearnex/tests/test_common.py +491 -0
- sklearnex/tests/test_config.py +123 -0
- sklearnex/tests/test_hyperparameters.py +43 -0
- sklearnex/tests/test_memory_usage.py +347 -0
- sklearnex/tests/test_monkeypatch.py +269 -0
- sklearnex/tests/test_n_jobs_support.py +108 -0
- sklearnex/tests/test_parallel.py +48 -0
- sklearnex/tests/test_patching.py +377 -0
- sklearnex/tests/test_run_to_run_stability.py +326 -0
- sklearnex/tests/utils/__init__.py +48 -0
- sklearnex/tests/utils/base.py +436 -0
- sklearnex/tests/utils/spmd.py +198 -0
- sklearnex/utils/__init__.py +19 -0
- sklearnex/utils/_array_api.py +82 -0
- sklearnex/utils/parallel.py +59 -0
- sklearnex/utils/tests/test_validation.py +238 -0
- sklearnex/utils/validation.py +208 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pytest
|
|
19
|
+
from numpy.testing import assert_allclose
|
|
20
|
+
from sklearn.datasets import load_diabetes
|
|
21
|
+
from sklearn.metrics import mean_squared_error
|
|
22
|
+
from sklearn.model_selection import train_test_split
|
|
23
|
+
|
|
24
|
+
from onedal.datatypes import from_table
|
|
25
|
+
from onedal.linear_model import IncrementalLinearRegression
|
|
26
|
+
from onedal.tests.utils._device_selection import get_queues
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
30
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
31
|
+
def test_diabetes(queue, dtype):
|
|
32
|
+
X, y = load_diabetes(return_X_y=True)
|
|
33
|
+
X, y = X.astype(dtype), y.astype(dtype)
|
|
34
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
35
|
+
X, y, train_size=0.8, random_state=777
|
|
36
|
+
)
|
|
37
|
+
X_train_split = np.array_split(X_train, 2)
|
|
38
|
+
y_train_split = np.array_split(y_train, 2)
|
|
39
|
+
model = IncrementalLinearRegression(fit_intercept=True)
|
|
40
|
+
for i in range(2):
|
|
41
|
+
model.partial_fit(X_train_split[i], y_train_split[i], queue=queue)
|
|
42
|
+
model.finalize_fit()
|
|
43
|
+
y_pred = model.predict(X_test, queue=queue)
|
|
44
|
+
assert mean_squared_error(y_test, y_pred) < 2396
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
48
|
+
@pytest.mark.parametrize("num_blocks", [1, 2, 10])
|
|
49
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
50
|
+
def test_full_results(queue, num_blocks, dtype):
|
|
51
|
+
seed = 42
|
|
52
|
+
num_features, num_targets = 19, 7
|
|
53
|
+
num_samples_train, num_samples_test = 3500, 1999
|
|
54
|
+
|
|
55
|
+
gen = np.random.default_rng(seed)
|
|
56
|
+
intercept = gen.random(size=num_targets, dtype=dtype)
|
|
57
|
+
coef = gen.random(size=(num_targets, num_features), dtype=dtype).T
|
|
58
|
+
|
|
59
|
+
X = gen.random(size=(num_samples_train, num_features), dtype=dtype)
|
|
60
|
+
y = X @ coef + intercept[np.newaxis, :]
|
|
61
|
+
X_split = np.array_split(X, num_blocks)
|
|
62
|
+
y_split = np.array_split(y, num_blocks)
|
|
63
|
+
|
|
64
|
+
model = IncrementalLinearRegression(fit_intercept=True)
|
|
65
|
+
for i in range(num_blocks):
|
|
66
|
+
model.partial_fit(X_split[i], y_split[i], queue=queue)
|
|
67
|
+
model.finalize_fit()
|
|
68
|
+
|
|
69
|
+
if queue and queue.sycl_device.is_gpu:
|
|
70
|
+
tol = 5e-3 if model.coef_.dtype == np.float32 else 1e-5
|
|
71
|
+
else:
|
|
72
|
+
tol = 3e-3 if model.coef_.dtype == np.float32 else 1e-5
|
|
73
|
+
atol = 1e-4 if model.coef_.dtype == np.float32 else 1e-6
|
|
74
|
+
assert_allclose(coef, model.coef_.T, rtol=tol, atol=atol)
|
|
75
|
+
|
|
76
|
+
tol = 3e-3 if model.intercept_.dtype == np.float32 else 1e-5
|
|
77
|
+
assert_allclose(intercept, model.intercept_, rtol=tol)
|
|
78
|
+
|
|
79
|
+
Xt = gen.random(size=(num_samples_test, num_features), dtype=dtype)
|
|
80
|
+
gtr = Xt @ coef + intercept[np.newaxis, :]
|
|
81
|
+
|
|
82
|
+
res = model.predict(Xt, queue=queue)
|
|
83
|
+
|
|
84
|
+
tol = 2e-4 if res.dtype == np.float32 else 1e-7
|
|
85
|
+
assert_allclose(gtr, res, rtol=tol)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
89
|
+
@pytest.mark.parametrize("num_blocks", [1, 2, 10])
|
|
90
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
91
|
+
def test_no_intercept_results(queue, num_blocks, dtype):
|
|
92
|
+
seed = 42
|
|
93
|
+
num_features, num_targets = 19, 7
|
|
94
|
+
num_samples_train, num_samples_test = 3500, 1999
|
|
95
|
+
|
|
96
|
+
gen = np.random.default_rng(seed)
|
|
97
|
+
coef = gen.random(size=(num_targets, num_features), dtype=dtype).T
|
|
98
|
+
|
|
99
|
+
X = gen.random(size=(num_samples_train, num_features), dtype=dtype)
|
|
100
|
+
y = X @ coef
|
|
101
|
+
|
|
102
|
+
X_split = np.array_split(X, num_blocks)
|
|
103
|
+
y_split = np.array_split(y, num_blocks)
|
|
104
|
+
|
|
105
|
+
model = IncrementalLinearRegression(fit_intercept=False)
|
|
106
|
+
for i in range(num_blocks):
|
|
107
|
+
model.partial_fit(X_split[i], y_split[i], queue=queue)
|
|
108
|
+
model.finalize_fit()
|
|
109
|
+
|
|
110
|
+
# TODO Find out is it necessary to have accuracy so different for float32 and float64
|
|
111
|
+
if queue and queue.sycl_device.is_gpu:
|
|
112
|
+
tol = 3e-3 if model.coef_.dtype == np.float32 else 1e-7
|
|
113
|
+
else:
|
|
114
|
+
tol = 2e-3 if model.coef_.dtype == np.float32 else 1e-7
|
|
115
|
+
assert_allclose(coef, model.coef_.T, rtol=tol)
|
|
116
|
+
|
|
117
|
+
Xt = gen.random(size=(num_samples_test, num_features), dtype=dtype)
|
|
118
|
+
gtr = Xt @ coef
|
|
119
|
+
|
|
120
|
+
res = model.predict(Xt, queue=queue)
|
|
121
|
+
|
|
122
|
+
tol = 5e-5 if res.dtype == np.float32 else 1e-7
|
|
123
|
+
assert_allclose(gtr, res, rtol=tol)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
127
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
128
|
+
def test_reconstruct_model(queue, dtype):
|
|
129
|
+
seed = 42
|
|
130
|
+
num_samples = 3500
|
|
131
|
+
num_features, num_targets = 14, 9
|
|
132
|
+
|
|
133
|
+
gen = np.random.default_rng(seed)
|
|
134
|
+
intercept = gen.random(size=num_targets, dtype=dtype)
|
|
135
|
+
coef = gen.random(size=(num_targets, num_features), dtype=dtype).T
|
|
136
|
+
|
|
137
|
+
X = gen.random(size=(num_samples, num_features), dtype=dtype)
|
|
138
|
+
gtr = X @ coef + intercept[np.newaxis, :]
|
|
139
|
+
|
|
140
|
+
model = IncrementalLinearRegression(fit_intercept=True)
|
|
141
|
+
model.coef_ = coef.T
|
|
142
|
+
model.intercept_ = intercept
|
|
143
|
+
|
|
144
|
+
res = model.predict(X, queue=queue)
|
|
145
|
+
|
|
146
|
+
tol = 1e-5 if res.dtype == np.float32 else 1e-7
|
|
147
|
+
assert_allclose(gtr, res, rtol=tol)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
151
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
152
|
+
def test_incremental_estimator_pickle(queue, dtype):
|
|
153
|
+
import pickle
|
|
154
|
+
|
|
155
|
+
from onedal.linear_model import IncrementalLinearRegression
|
|
156
|
+
|
|
157
|
+
inclr = IncrementalLinearRegression()
|
|
158
|
+
|
|
159
|
+
# Check that estimator can be serialized without any data.
|
|
160
|
+
dump = pickle.dumps(inclr)
|
|
161
|
+
inclr_loaded = pickle.loads(dump)
|
|
162
|
+
seed = 77
|
|
163
|
+
gen = np.random.default_rng(seed)
|
|
164
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(10, 10))
|
|
165
|
+
X = X.astype(dtype)
|
|
166
|
+
coef = gen.random(size=(1, 10), dtype=dtype).T
|
|
167
|
+
y = X @ coef
|
|
168
|
+
X_split = np.array_split(X, 2)
|
|
169
|
+
y_split = np.array_split(y, 2)
|
|
170
|
+
inclr.partial_fit(X_split[0], y_split[0], queue=queue)
|
|
171
|
+
inclr_loaded.partial_fit(X_split[0], y_split[0], queue=queue)
|
|
172
|
+
|
|
173
|
+
# inclr.finalize_fit()
|
|
174
|
+
|
|
175
|
+
assert inclr._need_to_finalize == True
|
|
176
|
+
assert inclr_loaded._need_to_finalize == True
|
|
177
|
+
|
|
178
|
+
# Check that estimator can be serialized after partial_fit call.
|
|
179
|
+
dump = pickle.dumps(inclr)
|
|
180
|
+
inclr_loaded = pickle.loads(dump)
|
|
181
|
+
|
|
182
|
+
partial_xtx = from_table(inclr._partial_result.partial_xtx)
|
|
183
|
+
partial_xtx_loaded = from_table(inclr_loaded._partial_result.partial_xtx)
|
|
184
|
+
assert_allclose(partial_xtx, partial_xtx_loaded)
|
|
185
|
+
|
|
186
|
+
partial_xty = from_table(inclr._partial_result.partial_xty)
|
|
187
|
+
partial_xty_loaded = from_table(inclr_loaded._partial_result.partial_xty)
|
|
188
|
+
assert_allclose(partial_xty, partial_xty_loaded)
|
|
189
|
+
|
|
190
|
+
assert inclr._need_to_finalize == False
|
|
191
|
+
# Finalize is called during serialization to make sure partial results are finalized correctly.
|
|
192
|
+
assert inclr_loaded._need_to_finalize == False
|
|
193
|
+
|
|
194
|
+
inclr.partial_fit(X_split[1], y_split[1], queue=queue)
|
|
195
|
+
inclr_loaded.partial_fit(X_split[1], y_split[1], queue=queue)
|
|
196
|
+
assert inclr._need_to_finalize == True
|
|
197
|
+
assert inclr_loaded._need_to_finalize == True
|
|
198
|
+
|
|
199
|
+
dump = pickle.dumps(inclr_loaded)
|
|
200
|
+
inclr_loaded = pickle.loads(dump)
|
|
201
|
+
|
|
202
|
+
assert inclr._need_to_finalize == True
|
|
203
|
+
assert inclr_loaded._need_to_finalize == False
|
|
204
|
+
|
|
205
|
+
inclr.finalize_fit()
|
|
206
|
+
inclr_loaded.finalize_fit()
|
|
207
|
+
|
|
208
|
+
# Check that finalized estimator can be serialized.
|
|
209
|
+
dump = pickle.dumps(inclr_loaded)
|
|
210
|
+
inclr_loaded = pickle.loads(dump)
|
|
211
|
+
|
|
212
|
+
assert_allclose(inclr.coef_, inclr_loaded.coef_, atol=1e-6)
|
|
213
|
+
assert_allclose(inclr.intercept_, inclr_loaded.intercept_, atol=1e-6)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# ==============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ==============================================================================
|
|
16
|
+
|
|
17
|
+
from daal4py.sklearn._utils import daal_check_version
|
|
18
|
+
|
|
19
|
+
if daal_check_version((2024, "P", 600)):
|
|
20
|
+
import numpy as np
|
|
21
|
+
import pytest
|
|
22
|
+
from numpy.testing import assert_allclose, assert_array_equal
|
|
23
|
+
from sklearn.datasets import load_diabetes
|
|
24
|
+
from sklearn.metrics import mean_squared_error
|
|
25
|
+
from sklearn.model_selection import train_test_split
|
|
26
|
+
|
|
27
|
+
from onedal.datatypes import from_table
|
|
28
|
+
from onedal.linear_model import IncrementalRidge
|
|
29
|
+
from onedal.tests.utils._device_selection import get_queues
|
|
30
|
+
|
|
31
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
32
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
33
|
+
def test_diabetes(queue, dtype):
|
|
34
|
+
X, y = load_diabetes(return_X_y=True)
|
|
35
|
+
X, y = X.astype(dtype), y.astype(dtype)
|
|
36
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
37
|
+
X, y, train_size=0.8, random_state=777
|
|
38
|
+
)
|
|
39
|
+
X_train_split = np.array_split(X_train, 2)
|
|
40
|
+
y_train_split = np.array_split(y_train, 2)
|
|
41
|
+
model = IncrementalRidge(fit_intercept=True, alpha=0.1)
|
|
42
|
+
for i in range(2):
|
|
43
|
+
model.partial_fit(X_train_split[i], y_train_split[i], queue=queue)
|
|
44
|
+
model.finalize_fit()
|
|
45
|
+
y_pred = model.predict(X_test, queue=queue)
|
|
46
|
+
assert_allclose(mean_squared_error(y_test, y_pred), 2388.775, rtol=1e-5)
|
|
47
|
+
|
|
48
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
49
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
50
|
+
@pytest.mark.skip(reason="pickling not implemented for oneDAL entities")
|
|
51
|
+
def test_pickle(queue, dtype):
|
|
52
|
+
# TODO Implement pickling for oneDAL entities
|
|
53
|
+
X, y = load_diabetes(return_X_y=True)
|
|
54
|
+
X, y = X.astype(dtype), y.astype(dtype)
|
|
55
|
+
model = IncrementalRidge(fit_intercept=True, alpha=0.5)
|
|
56
|
+
model.partial_fit(X, y, queue=queue)
|
|
57
|
+
model.finalize_fit()
|
|
58
|
+
expected = model.predict(X, queue=queue)
|
|
59
|
+
|
|
60
|
+
import pickle
|
|
61
|
+
|
|
62
|
+
dump = pickle.dumps(model)
|
|
63
|
+
model2 = pickle.loads(dump)
|
|
64
|
+
|
|
65
|
+
assert isinstance(model2, model.__class__)
|
|
66
|
+
result = model2.predict(X, queue=queue)
|
|
67
|
+
|
|
68
|
+
assert_array_equal(expected, result)
|
|
69
|
+
|
|
70
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
71
|
+
@pytest.mark.parametrize("num_blocks", [1, 2, 10])
|
|
72
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
73
|
+
def test_no_intercept_results(queue, num_blocks, dtype):
|
|
74
|
+
seed = 42
|
|
75
|
+
n_features, n_targets = 19, 7
|
|
76
|
+
n_train_samples, n_test_samples = 3500, 1999
|
|
77
|
+
|
|
78
|
+
gen = np.random.default_rng(seed)
|
|
79
|
+
|
|
80
|
+
X = gen.random(size=(n_train_samples, n_features), dtype=dtype)
|
|
81
|
+
y = gen.random(size=(n_train_samples, n_targets), dtype=dtype)
|
|
82
|
+
X_split = np.array_split(X, num_blocks)
|
|
83
|
+
y_split = np.array_split(y, num_blocks)
|
|
84
|
+
alpha = 0.5
|
|
85
|
+
|
|
86
|
+
lambda_identity = alpha * np.eye(X.shape[1])
|
|
87
|
+
inverse_term = np.linalg.inv(np.dot(X.T, X) + lambda_identity)
|
|
88
|
+
xt_y = np.dot(X.T, y)
|
|
89
|
+
coef = np.dot(inverse_term, xt_y)
|
|
90
|
+
|
|
91
|
+
model = IncrementalRidge(fit_intercept=False, alpha=alpha)
|
|
92
|
+
for i in range(num_blocks):
|
|
93
|
+
model.partial_fit(X_split[i], y_split[i], queue=queue)
|
|
94
|
+
model.finalize_fit()
|
|
95
|
+
|
|
96
|
+
if queue and queue.sycl_device.is_gpu:
|
|
97
|
+
tol = 5e-3 if model.coef_.dtype == np.float32 else 1e-5
|
|
98
|
+
else:
|
|
99
|
+
tol = 2e-3 if model.coef_.dtype == np.float32 else 1e-5
|
|
100
|
+
assert_allclose(coef, model.coef_.T, rtol=tol)
|
|
101
|
+
|
|
102
|
+
Xt = gen.random(size=(n_test_samples, n_features), dtype=dtype)
|
|
103
|
+
gtr = Xt @ coef
|
|
104
|
+
|
|
105
|
+
res = model.predict(Xt, queue=queue)
|
|
106
|
+
|
|
107
|
+
tol = 2e-4 if res.dtype == np.float32 else 1e-7
|
|
108
|
+
assert_allclose(gtr, res, rtol=tol)
|
|
109
|
+
|
|
110
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
111
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
112
|
+
def test_incremental_estimator_pickle(queue, dtype):
|
|
113
|
+
import pickle
|
|
114
|
+
|
|
115
|
+
model = IncrementalRidge()
|
|
116
|
+
|
|
117
|
+
# Check that estimator can be serialized without any data.
|
|
118
|
+
dump = pickle.dumps(model)
|
|
119
|
+
model_loaded = pickle.loads(dump)
|
|
120
|
+
seed = 77
|
|
121
|
+
gen = np.random.default_rng(seed)
|
|
122
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(10, 10))
|
|
123
|
+
X = X.astype(dtype)
|
|
124
|
+
coef = gen.random(size=(1, 10), dtype=dtype).T
|
|
125
|
+
y = X @ coef
|
|
126
|
+
X_split = np.array_split(X, 2)
|
|
127
|
+
y_split = np.array_split(y, 2)
|
|
128
|
+
model.partial_fit(X_split[0], y_split[0], queue=queue)
|
|
129
|
+
model_loaded.partial_fit(X_split[0], y_split[0], queue=queue)
|
|
130
|
+
|
|
131
|
+
# model.finalize_fit()
|
|
132
|
+
|
|
133
|
+
assert model._need_to_finalize == True
|
|
134
|
+
assert model_loaded._need_to_finalize == True
|
|
135
|
+
|
|
136
|
+
# Check that estimator can be serialized after partial_fit call.
|
|
137
|
+
dump = pickle.dumps(model)
|
|
138
|
+
model_loaded = pickle.loads(dump)
|
|
139
|
+
|
|
140
|
+
partial_xtx = from_table(model._partial_result.partial_xtx)
|
|
141
|
+
partial_xtx_loaded = from_table(model_loaded._partial_result.partial_xtx)
|
|
142
|
+
assert_allclose(partial_xtx, partial_xtx_loaded)
|
|
143
|
+
|
|
144
|
+
partial_xty = from_table(model._partial_result.partial_xty)
|
|
145
|
+
partial_xty_loaded = from_table(model_loaded._partial_result.partial_xty)
|
|
146
|
+
assert_allclose(partial_xty, partial_xty_loaded)
|
|
147
|
+
|
|
148
|
+
assert model._need_to_finalize == False
|
|
149
|
+
# Finalize is called during serialization to make sure partial results are finalized correctly.
|
|
150
|
+
assert model_loaded._need_to_finalize == False
|
|
151
|
+
|
|
152
|
+
model.partial_fit(X_split[1], y_split[1], queue=queue)
|
|
153
|
+
model_loaded.partial_fit(X_split[1], y_split[1], queue=queue)
|
|
154
|
+
assert model._need_to_finalize == True
|
|
155
|
+
assert model_loaded._need_to_finalize == True
|
|
156
|
+
|
|
157
|
+
dump = pickle.dumps(model_loaded)
|
|
158
|
+
model_loaded = pickle.loads(dump)
|
|
159
|
+
|
|
160
|
+
assert model._need_to_finalize == True
|
|
161
|
+
assert model_loaded._need_to_finalize == False
|
|
162
|
+
|
|
163
|
+
model.finalize_fit()
|
|
164
|
+
model_loaded.finalize_fit()
|
|
165
|
+
|
|
166
|
+
# Check that finalized estimator can be serialized.
|
|
167
|
+
dump = pickle.dumps(model_loaded)
|
|
168
|
+
model_loaded = pickle.loads(dump)
|
|
169
|
+
|
|
170
|
+
assert_allclose(model.coef_, model_loaded.coef_, atol=1e-6)
|
|
171
|
+
assert_allclose(model.intercept_, model_loaded.intercept_, atol=1e-6)
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# ===============================================================================
|
|
2
|
+
# Copyright 2023 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ===============================================================================
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import pytest
|
|
19
|
+
from numpy.testing import assert_allclose, assert_array_equal
|
|
20
|
+
from sklearn.datasets import load_diabetes
|
|
21
|
+
from sklearn.metrics import mean_squared_error
|
|
22
|
+
from sklearn.model_selection import train_test_split
|
|
23
|
+
|
|
24
|
+
from daal4py.sklearn._utils import daal_check_version
|
|
25
|
+
from onedal.linear_model import LinearRegression
|
|
26
|
+
from onedal.tests.utils._device_selection import get_queues
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
30
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
31
|
+
def test_diabetes(queue, dtype):
|
|
32
|
+
X, y = load_diabetes(return_X_y=True)
|
|
33
|
+
X, y = X.astype(dtype), y.astype(dtype)
|
|
34
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
|
35
|
+
X, y, train_size=0.8, random_state=777
|
|
36
|
+
)
|
|
37
|
+
model = LinearRegression(fit_intercept=True)
|
|
38
|
+
model.fit(X_train, y_train, queue=queue)
|
|
39
|
+
y_pred = model.predict(X_test, queue=queue)
|
|
40
|
+
assert_allclose(mean_squared_error(y_test, y_pred), 2395.567, rtol=1e-5)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
44
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
45
|
+
def test_pickle(queue, dtype):
|
|
46
|
+
X, y = load_diabetes(return_X_y=True)
|
|
47
|
+
X, y = X.astype(dtype), y.astype(dtype)
|
|
48
|
+
model = LinearRegression(fit_intercept=True)
|
|
49
|
+
model.fit(X, y, queue=queue)
|
|
50
|
+
expected = model.predict(X, queue=queue)
|
|
51
|
+
|
|
52
|
+
import pickle
|
|
53
|
+
|
|
54
|
+
dump = pickle.dumps(model)
|
|
55
|
+
model2 = pickle.loads(dump)
|
|
56
|
+
|
|
57
|
+
assert isinstance(model2, model.__class__)
|
|
58
|
+
result = model2.predict(X, queue=queue)
|
|
59
|
+
|
|
60
|
+
assert_array_equal(expected, result)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
64
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
65
|
+
def test_full_results(queue, dtype):
|
|
66
|
+
seed = 42
|
|
67
|
+
f_count, r_count = 19, 7
|
|
68
|
+
s_count, t_count = 3500, 1999
|
|
69
|
+
|
|
70
|
+
gen = np.random.default_rng(seed)
|
|
71
|
+
intp = gen.random(size=r_count, dtype=dtype)
|
|
72
|
+
coef = gen.random(size=(r_count, f_count), dtype=dtype).T
|
|
73
|
+
|
|
74
|
+
X = gen.random(size=(s_count, f_count), dtype=dtype)
|
|
75
|
+
y = X @ coef + intp[np.newaxis, :]
|
|
76
|
+
|
|
77
|
+
model = LinearRegression(fit_intercept=True)
|
|
78
|
+
model.fit(X, y, queue=queue)
|
|
79
|
+
|
|
80
|
+
if queue and queue.sycl_device.is_gpu:
|
|
81
|
+
tol = 5e-3 if model.coef_.dtype == np.float32 else 1e-5
|
|
82
|
+
else:
|
|
83
|
+
tol = 2e-3 if model.coef_.dtype == np.float32 else 1e-5
|
|
84
|
+
assert_allclose(coef, model.coef_.T, rtol=tol)
|
|
85
|
+
|
|
86
|
+
tol = 2e-3 if model.intercept_.dtype == np.float32 else 1e-5
|
|
87
|
+
assert_allclose(intp, model.intercept_, rtol=tol)
|
|
88
|
+
|
|
89
|
+
Xt = gen.random(size=(t_count, f_count), dtype=dtype)
|
|
90
|
+
gtr = Xt @ coef + intp[np.newaxis, :]
|
|
91
|
+
|
|
92
|
+
res = model.predict(Xt, queue=queue)
|
|
93
|
+
|
|
94
|
+
tol = 2e-4 if res.dtype == np.float32 else 1e-7
|
|
95
|
+
assert_allclose(gtr, res, rtol=tol)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
99
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
100
|
+
def test_no_intercept_results(queue, dtype):
|
|
101
|
+
seed = 42
|
|
102
|
+
f_count, r_count = 19, 7
|
|
103
|
+
s_count, t_count = 3500, 1999
|
|
104
|
+
|
|
105
|
+
gen = np.random.default_rng(seed)
|
|
106
|
+
coef = gen.random(size=(r_count, f_count), dtype=dtype).T
|
|
107
|
+
|
|
108
|
+
X = gen.random(size=(s_count, f_count), dtype=dtype)
|
|
109
|
+
y = X @ coef
|
|
110
|
+
|
|
111
|
+
model = LinearRegression(fit_intercept=False)
|
|
112
|
+
model.fit(X, y, queue=queue)
|
|
113
|
+
|
|
114
|
+
if queue and queue.sycl_device.is_gpu:
|
|
115
|
+
tol = 3e-3 if model.coef_.dtype == np.float32 else 1e-7
|
|
116
|
+
else:
|
|
117
|
+
tol = 2e-3 if model.coef_.dtype == np.float32 else 1e-7
|
|
118
|
+
assert_allclose(coef, model.coef_.T, rtol=tol)
|
|
119
|
+
|
|
120
|
+
Xt = gen.random(size=(t_count, f_count), dtype=dtype)
|
|
121
|
+
gtr = Xt @ coef
|
|
122
|
+
|
|
123
|
+
res = model.predict(Xt, queue=queue)
|
|
124
|
+
|
|
125
|
+
tol = 5e-5 if res.dtype == np.float32 else 1e-7
|
|
126
|
+
assert_allclose(gtr, res, rtol=tol)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
130
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
131
|
+
def test_reconstruct_model(queue, dtype):
|
|
132
|
+
seed = 42
|
|
133
|
+
s_count = 3500
|
|
134
|
+
f_count, r_count = 14, 9
|
|
135
|
+
|
|
136
|
+
gen = np.random.default_rng(seed)
|
|
137
|
+
intp = gen.random(size=r_count, dtype=dtype)
|
|
138
|
+
coef = gen.random(size=(r_count, f_count), dtype=dtype).T
|
|
139
|
+
|
|
140
|
+
X = gen.random(size=(s_count, f_count), dtype=dtype)
|
|
141
|
+
gtr = X @ coef + intp[np.newaxis, :]
|
|
142
|
+
|
|
143
|
+
model = LinearRegression(fit_intercept=True)
|
|
144
|
+
model.coef_ = coef.T
|
|
145
|
+
model.intercept_ = intp
|
|
146
|
+
|
|
147
|
+
res = model.predict(X, queue=queue)
|
|
148
|
+
|
|
149
|
+
tol = 1e-5 if res.dtype == np.float32 else 1e-7
|
|
150
|
+
assert_allclose(gtr, res, rtol=tol)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
154
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
155
|
+
@pytest.mark.parametrize("fit_intercept", [False, True])
|
|
156
|
+
@pytest.mark.skipif(
|
|
157
|
+
not daal_check_version((2025, "P", 1)),
|
|
158
|
+
reason="Functionality introduced in later versions",
|
|
159
|
+
)
|
|
160
|
+
def test_overdetermined_system(queue, dtype, fit_intercept):
|
|
161
|
+
if queue and queue.sycl_device.is_gpu and not daal_check_version((2025, "P", 200)):
|
|
162
|
+
pytest.skip("Functionality introduced in later versions")
|
|
163
|
+
gen = np.random.default_rng(seed=123)
|
|
164
|
+
X = gen.standard_normal(size=(10, 20))
|
|
165
|
+
y = gen.standard_normal(size=X.shape[0])
|
|
166
|
+
|
|
167
|
+
model = LinearRegression(fit_intercept=fit_intercept).fit(X, y)
|
|
168
|
+
if not fit_intercept:
|
|
169
|
+
A = X.T @ X
|
|
170
|
+
b = X.T @ y
|
|
171
|
+
x = model.coef_
|
|
172
|
+
else:
|
|
173
|
+
Xi = np.c_[X, np.ones((X.shape[0], 1))]
|
|
174
|
+
A = Xi.T @ Xi
|
|
175
|
+
b = Xi.T @ y
|
|
176
|
+
x = np.r_[model.coef_, model.intercept_]
|
|
177
|
+
residual = A @ x - b
|
|
178
|
+
assert np.all(np.abs(residual) < 1e-6)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
182
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
183
|
+
@pytest.mark.parametrize("fit_intercept", [False, True])
|
|
184
|
+
@pytest.mark.skipif(
|
|
185
|
+
not daal_check_version((2025, "P", 1)),
|
|
186
|
+
reason="Functionality introduced in later versions",
|
|
187
|
+
)
|
|
188
|
+
def test_singular_matrix(queue, dtype, fit_intercept):
|
|
189
|
+
if queue and queue.sycl_device.is_gpu and not daal_check_version((2025, "P", 200)):
|
|
190
|
+
pytest.skip("Functionality introduced in later versions")
|
|
191
|
+
gen = np.random.default_rng(seed=123)
|
|
192
|
+
X = gen.standard_normal(size=(20, 4))
|
|
193
|
+
X[:, 2] = X[:, 3]
|
|
194
|
+
y = gen.standard_normal(size=X.shape[0])
|
|
195
|
+
|
|
196
|
+
model = LinearRegression(fit_intercept=fit_intercept).fit(X, y)
|
|
197
|
+
if not fit_intercept:
|
|
198
|
+
A = X.T @ X
|
|
199
|
+
b = X.T @ y
|
|
200
|
+
x = model.coef_
|
|
201
|
+
else:
|
|
202
|
+
Xi = np.c_[X, np.ones((X.shape[0], 1))]
|
|
203
|
+
A = Xi.T @ Xi
|
|
204
|
+
b = Xi.T @ y
|
|
205
|
+
x = np.r_[model.coef_, model.intercept_]
|
|
206
|
+
residual = A @ x - b
|
|
207
|
+
assert np.all(np.abs(residual) < 1e-6)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
211
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
212
|
+
@pytest.mark.parametrize("fit_intercept", [False, True])
|
|
213
|
+
@pytest.mark.parametrize("problem_type", ["regular", "overdetermined", "singular"])
|
|
214
|
+
@pytest.mark.skipif(
|
|
215
|
+
not daal_check_version((2025, "P", 1)),
|
|
216
|
+
reason="Functionality introduced in the versions >= 2025.0",
|
|
217
|
+
)
|
|
218
|
+
def test_multioutput_regression(queue, dtype, fit_intercept, problem_type):
|
|
219
|
+
if (
|
|
220
|
+
problem_type != "regular"
|
|
221
|
+
and queue
|
|
222
|
+
and queue.sycl_device.is_gpu
|
|
223
|
+
and not daal_check_version((2025, "P", 200))
|
|
224
|
+
):
|
|
225
|
+
pytest.skip("Functionality introduced in later versions")
|
|
226
|
+
gen = np.random.default_rng(seed=123)
|
|
227
|
+
if problem_type == "regular":
|
|
228
|
+
X = gen.standard_normal(size=(20, 5))
|
|
229
|
+
elif problem_type == "singular":
|
|
230
|
+
X = gen.standard_normal(size=(20, 4))
|
|
231
|
+
X[:, 3] = X[:, 2]
|
|
232
|
+
else:
|
|
233
|
+
X = gen.standard_normal(size=(10, 20))
|
|
234
|
+
Y = gen.standard_normal(size=(X.shape[0], 3), dtype=dtype)
|
|
235
|
+
|
|
236
|
+
model = LinearRegression(fit_intercept=fit_intercept).fit(X, Y)
|
|
237
|
+
if not fit_intercept:
|
|
238
|
+
A = X.T @ X
|
|
239
|
+
b = X.T @ Y
|
|
240
|
+
x = model.coef_.T
|
|
241
|
+
else:
|
|
242
|
+
Xi = np.c_[X, np.ones((X.shape[0], 1))]
|
|
243
|
+
A = Xi.T @ Xi
|
|
244
|
+
b = Xi.T @ Y
|
|
245
|
+
x = np.r_[model.coef_.T, model.intercept_.reshape((1, -1))]
|
|
246
|
+
residual = A @ x - b
|
|
247
|
+
assert np.all(np.abs(residual) < 1e-5)
|
|
248
|
+
|
|
249
|
+
pred = model.predict(X, queue=queue)
|
|
250
|
+
expected_pred = X @ model.coef_.T + model.intercept_.reshape((1, -1))
|
|
251
|
+
tol = 1e-5 if pred.dtype == np.float32 else 1e-7
|
|
252
|
+
assert_allclose(pred, expected_pred, rtol=tol)
|
|
253
|
+
|
|
254
|
+
# check that it also works when 'y' is a list of lists
|
|
255
|
+
Y_lists = Y.tolist()
|
|
256
|
+
model_lists = LinearRegression(fit_intercept=fit_intercept).fit(X, Y_lists)
|
|
257
|
+
assert_allclose(model.coef_, model_lists.coef_)
|
|
258
|
+
if fit_intercept:
|
|
259
|
+
assert_allclose(model.intercept_, model_lists.intercept_)
|