scikit-learn-intelex 2025.1.0__py312-none-manylinux_2_28_x86_64.whl → 2025.4.0__py312-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scikit-learn-intelex might be problematic. Click here for more details.
- daal4py/_daal4py.cpython-312-x86_64-linux-gnu.so +0 -0
- daal4py/mpi_transceiver.cpython-312-x86_64-linux-gnu.so +0 -0
- daal4py/sklearn/utils/validation.py +6 -3
- onedal/_device_offload.py +10 -28
- onedal/_onedal_py_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_host.cpython-312-x86_64-linux-gnu.so +0 -0
- onedal/_onedal_py_spmd_dpc.cpython-312-x86_64-linux-gnu.so +0 -0
- onedal/basic_statistics/basic_statistics.py +5 -5
- onedal/basic_statistics/incremental_basic_statistics.py +34 -19
- onedal/basic_statistics/tests/test_basic_statistics.py +16 -72
- onedal/basic_statistics/tests/test_incremental_basic_statistics.py +100 -17
- onedal/basic_statistics/tests/utils.py +50 -0
- onedal/cluster/dbscan.py +5 -10
- onedal/cluster/kmeans.py +9 -16
- onedal/cluster/kmeans_init.py +7 -10
- onedal/common/_policy.py +0 -4
- onedal/common/tests/test_sycl.py +128 -0
- onedal/covariance/covariance.py +6 -9
- onedal/covariance/incremental_covariance.py +41 -26
- onedal/covariance/tests/test_incremental_covariance.py +69 -1
- onedal/datatypes/__init__.py +2 -2
- onedal/datatypes/_data_conversion.py +10 -43
- onedal/datatypes/tests/test_data.py +83 -22
- onedal/decomposition/incremental_pca.py +42 -32
- onedal/decomposition/pca.py +7 -7
- onedal/decomposition/tests/test_incremental_pca.py +87 -0
- onedal/ensemble/forest.py +20 -11
- onedal/linear_model/incremental_linear_model.py +86 -52
- onedal/linear_model/linear_model.py +19 -23
- onedal/linear_model/logistic_regression.py +9 -11
- onedal/linear_model/tests/test_incremental_linear_regression.py +70 -25
- onedal/linear_model/tests/test_incremental_ridge_regression.py +64 -0
- onedal/linear_model/tests/test_linear_regression.py +13 -4
- onedal/neighbors/neighbors.py +21 -25
- onedal/primitives/kernel_functions.py +3 -4
- onedal/spmd/basic_statistics/incremental_basic_statistics.py +7 -5
- onedal/spmd/covariance/incremental_covariance.py +6 -5
- onedal/spmd/decomposition/incremental_pca.py +14 -7
- onedal/spmd/linear_model/incremental_linear_model.py +12 -8
- onedal/svm/svm.py +9 -9
- onedal/utils/tests/test_validation.py +142 -0
- onedal/utils/validation.py +38 -14
- {scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/METADATA +39 -80
- {scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/RECORD +94 -92
- sklearnex/_utils.py +58 -13
- sklearnex/basic_statistics/basic_statistics.py +39 -8
- sklearnex/basic_statistics/incremental_basic_statistics.py +7 -0
- sklearnex/basic_statistics/tests/test_basic_statistics.py +171 -36
- sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +79 -28
- sklearnex/cluster/dbscan.py +2 -2
- sklearnex/cluster/k_means.py +4 -2
- sklearnex/cluster/tests/test_kmeans.py +0 -2
- sklearnex/covariance/incremental_covariance.py +7 -0
- sklearnex/covariance/tests/test_incremental_covariance.py +50 -0
- sklearnex/decomposition/pca.py +3 -1
- sklearnex/dispatcher.py +1 -10
- sklearnex/ensemble/_forest.py +2 -2
- sklearnex/ensemble/tests/test_forest.py +23 -18
- sklearnex/linear_model/incremental_linear.py +25 -12
- sklearnex/linear_model/incremental_ridge.py +15 -8
- sklearnex/linear_model/linear.py +8 -3
- sklearnex/linear_model/logistic_regression.py +15 -13
- sklearnex/linear_model/ridge.py +374 -8
- sklearnex/linear_model/tests/test_incremental_linear.py +65 -5
- sklearnex/linear_model/tests/test_incremental_ridge.py +61 -0
- sklearnex/linear_model/tests/test_linear.py +11 -36
- sklearnex/linear_model/tests/test_ridge.py +256 -0
- sklearnex/manifold/t_sne.py +5 -0
- sklearnex/manifold/tests/test_tsne.py +226 -2
- sklearnex/neighbors/common.py +2 -2
- sklearnex/preview/__init__.py +1 -1
- sklearnex/preview/covariance/covariance.py +6 -2
- sklearnex/preview/covariance/tests/test_covariance.py +1 -1
- sklearnex/preview/decomposition/incremental_pca.py +14 -3
- sklearnex/preview/decomposition/tests/test_incremental_pca.py +70 -0
- sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +3 -3
- sklearnex/spmd/basic_statistics/tests/test_incremental_basic_statistics_spmd.py +6 -7
- sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +3 -2
- sklearnex/spmd/linear_model/tests/test_incremental_linear_spmd.py +16 -14
- sklearnex/svm/_common.py +2 -2
- sklearnex/tests/test_common.py +130 -29
- sklearnex/tests/test_hyperparameters.py +43 -0
- sklearnex/tests/test_memory_usage.py +7 -39
- sklearnex/tests/test_monkeypatch.py +4 -11
- sklearnex/tests/test_patching.py +0 -8
- sklearnex/tests/test_run_to_run_stability.py +7 -2
- sklearnex/tests/utils/__init__.py +4 -0
- sklearnex/tests/utils/base.py +65 -0
- sklearnex/utils/__init__.py +2 -2
- sklearnex/utils/tests/test_validation.py +238 -0
- sklearnex/utils/validation.py +192 -1
- sklearnex/preview/linear_model/__init__.py +0 -19
- sklearnex/preview/linear_model/ridge.py +0 -424
- sklearnex/preview/linear_model/tests/test_ridge.py +0 -102
- sklearnex/utils/tests/test_finite.py +0 -89
- {scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/LICENSE.txt +0 -0
- {scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/WHEEL +0 -0
- {scikit_learn_intelex-2025.1.0.dist-info → scikit_learn_intelex-2025.4.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
Binary file
|
|
@@ -98,9 +98,12 @@ def _assert_all_finite(
|
|
|
98
98
|
)
|
|
99
99
|
_dal_ready = _patching_status.and_conditions(
|
|
100
100
|
[
|
|
101
|
-
(X.ndim in [1, 2], "
|
|
102
|
-
(not np.any(np.equal(X.shape, 0)), "
|
|
103
|
-
(
|
|
101
|
+
(X.ndim in [1, 2], f"Input {input_name} does not have 1 or 2 dimensions."),
|
|
102
|
+
(not np.any(np.equal(X.shape, 0)), f"Input {input_name} shape contains a 0."),
|
|
103
|
+
(
|
|
104
|
+
dt in [np.float32, np.float64],
|
|
105
|
+
f"Input {input_name} dtype is not float32 or float64.",
|
|
106
|
+
),
|
|
104
107
|
]
|
|
105
108
|
)
|
|
106
109
|
_patching_status.write_log()
|
onedal/_device_offload.py
CHANGED
|
@@ -29,6 +29,14 @@ if dpctl_available:
|
|
|
29
29
|
from dpctl import SyclQueue
|
|
30
30
|
from dpctl.memory import MemoryUSMDevice, as_usm_memory
|
|
31
31
|
from dpctl.tensor import usm_ndarray
|
|
32
|
+
else:
|
|
33
|
+
import onedal
|
|
34
|
+
|
|
35
|
+
# setting fallback to `object` will make if isinstance call
|
|
36
|
+
# in _get_global_queue always true for situations without the
|
|
37
|
+
# dpc backend when `device_offload` is used. Instead, it will
|
|
38
|
+
# fail at the policy check phase yielding a RuntimeError
|
|
39
|
+
SyclQueue = getattr(onedal._backend, "SyclQueue", object)
|
|
32
40
|
|
|
33
41
|
if dpnp_available:
|
|
34
42
|
import dpnp
|
|
@@ -36,30 +44,6 @@ if dpnp_available:
|
|
|
36
44
|
from .utils._array_api import _convert_to_dpnp
|
|
37
45
|
|
|
38
46
|
|
|
39
|
-
class DummySyclQueue:
|
|
40
|
-
"""This class is designed to act like dpctl.SyclQueue
|
|
41
|
-
to allow device dispatching in scenarios when dpctl is not available"""
|
|
42
|
-
|
|
43
|
-
class DummySyclDevice:
|
|
44
|
-
def __init__(self, filter_string):
|
|
45
|
-
self._filter_string = filter_string
|
|
46
|
-
self.is_cpu = "cpu" in filter_string
|
|
47
|
-
self.is_gpu = "gpu" in filter_string
|
|
48
|
-
self.has_aspect_fp64 = self.is_cpu
|
|
49
|
-
|
|
50
|
-
if not (self.is_cpu):
|
|
51
|
-
logging.warning(
|
|
52
|
-
"Device support is limited. "
|
|
53
|
-
"Please install dpctl for full experience"
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
def get_filter_string(self):
|
|
57
|
-
return self._filter_string
|
|
58
|
-
|
|
59
|
-
def __init__(self, filter_string):
|
|
60
|
-
self.sycl_device = self.DummySyclDevice(filter_string)
|
|
61
|
-
|
|
62
|
-
|
|
63
47
|
def _copy_to_usm(queue, array):
|
|
64
48
|
if not dpctl_available:
|
|
65
49
|
raise RuntimeError(
|
|
@@ -139,12 +123,10 @@ def _transfer_to_host(queue, *data):
|
|
|
139
123
|
def _get_global_queue():
|
|
140
124
|
target = _get_config()["target_offload"]
|
|
141
125
|
|
|
142
|
-
QueueClass = DummySyclQueue if not dpctl_available else SyclQueue
|
|
143
|
-
|
|
144
126
|
if target != "auto":
|
|
145
|
-
if isinstance(target,
|
|
127
|
+
if isinstance(target, SyclQueue):
|
|
146
128
|
return target
|
|
147
|
-
return
|
|
129
|
+
return SyclQueue(target)
|
|
148
130
|
return None
|
|
149
131
|
|
|
150
132
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -20,7 +20,7 @@ from abc import ABCMeta, abstractmethod
|
|
|
20
20
|
import numpy as np
|
|
21
21
|
|
|
22
22
|
from ..common._base import BaseEstimator
|
|
23
|
-
from ..datatypes import
|
|
23
|
+
from ..datatypes import from_table, to_table
|
|
24
24
|
from ..utils import _is_csr
|
|
25
25
|
from ..utils.validation import _check_array
|
|
26
26
|
|
|
@@ -57,7 +57,7 @@ class BaseBasicStatistics(BaseEstimator, metaclass=ABCMeta):
|
|
|
57
57
|
def _get_onedal_params(self, is_csr, dtype=np.float32):
|
|
58
58
|
options = self._get_result_options(self.options)
|
|
59
59
|
return {
|
|
60
|
-
"fptype":
|
|
60
|
+
"fptype": dtype,
|
|
61
61
|
"method": "sparse" if is_csr else self.algorithm,
|
|
62
62
|
"result_option": options,
|
|
63
63
|
}
|
|
@@ -81,11 +81,11 @@ class BasicStatistics(BaseBasicStatistics):
|
|
|
81
81
|
if sample_weight is not None:
|
|
82
82
|
sample_weight = _check_array(sample_weight, ensure_2d=False)
|
|
83
83
|
|
|
84
|
-
data, sample_weight = _convert_to_supported(policy, data, sample_weight)
|
|
85
84
|
is_single_dim = data.ndim == 1
|
|
86
|
-
data_table, weights_table = to_table(data, sample_weight)
|
|
87
85
|
|
|
88
|
-
|
|
86
|
+
data_table, weights_table = to_table(data, sample_weight, queue=queue)
|
|
87
|
+
|
|
88
|
+
dtype = data_table.dtype
|
|
89
89
|
raw_result = self._compute_raw(data_table, weights_table, policy, dtype, is_csr)
|
|
90
90
|
for opt, raw_value in raw_result.items():
|
|
91
91
|
value = from_table(raw_value).ravel()
|
|
@@ -18,7 +18,7 @@ import numpy as np
|
|
|
18
18
|
|
|
19
19
|
from daal4py.sklearn._utils import get_dtype
|
|
20
20
|
|
|
21
|
-
from ..datatypes import
|
|
21
|
+
from ..datatypes import from_table, to_table
|
|
22
22
|
from ..utils import _check_array
|
|
23
23
|
from .basic_statistics import BaseBasicStatistics
|
|
24
24
|
|
|
@@ -70,10 +70,21 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
|
|
|
70
70
|
self._reset()
|
|
71
71
|
|
|
72
72
|
def _reset(self):
|
|
73
|
+
self._need_to_finalize = False
|
|
73
74
|
self._partial_result = self._get_backend(
|
|
74
75
|
"basic_statistics", None, "partial_compute_result"
|
|
75
76
|
)
|
|
76
77
|
|
|
78
|
+
def __getstate__(self):
|
|
79
|
+
# Since finalize_fit can't be dispatched without directly provided queue
|
|
80
|
+
# and the dispatching policy can't be serialized, the computation is finalized
|
|
81
|
+
# here and the policy is not saved in serialized data.
|
|
82
|
+
self.finalize_fit()
|
|
83
|
+
data = self.__dict__.copy()
|
|
84
|
+
data.pop("_queue", None)
|
|
85
|
+
|
|
86
|
+
return data
|
|
87
|
+
|
|
77
88
|
def partial_fit(self, X, weights=None, queue=None):
|
|
78
89
|
"""
|
|
79
90
|
Computes partial data for basic statistics
|
|
@@ -95,7 +106,6 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
|
|
|
95
106
|
"""
|
|
96
107
|
self._queue = queue
|
|
97
108
|
policy = self._get_policy(queue, X)
|
|
98
|
-
X, weights = _convert_to_supported(policy, X, weights)
|
|
99
109
|
|
|
100
110
|
X = _check_array(
|
|
101
111
|
X, dtype=[np.float64, np.float32], ensure_2d=False, force_all_finite=False
|
|
@@ -112,7 +122,7 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
|
|
|
112
122
|
dtype = get_dtype(X)
|
|
113
123
|
self._onedal_params = self._get_onedal_params(False, dtype=dtype)
|
|
114
124
|
|
|
115
|
-
X_table, weights_table = to_table(X, weights)
|
|
125
|
+
X_table, weights_table = to_table(X, weights, queue=queue)
|
|
116
126
|
self._partial_result = self._get_backend(
|
|
117
127
|
"basic_statistics",
|
|
118
128
|
None,
|
|
@@ -124,6 +134,9 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
|
|
|
124
134
|
weights_table,
|
|
125
135
|
)
|
|
126
136
|
|
|
137
|
+
self._need_to_finalize = True
|
|
138
|
+
return self
|
|
139
|
+
|
|
127
140
|
def finalize_fit(self, queue=None):
|
|
128
141
|
"""
|
|
129
142
|
Finalizes basic statistics computation and obtains result
|
|
@@ -139,22 +152,24 @@ class IncrementalBasicStatistics(BaseBasicStatistics):
|
|
|
139
152
|
self : object
|
|
140
153
|
Returns the instance itself.
|
|
141
154
|
"""
|
|
155
|
+
if self._need_to_finalize:
|
|
156
|
+
if queue is not None:
|
|
157
|
+
policy = self._get_policy(queue)
|
|
158
|
+
else:
|
|
159
|
+
policy = self._get_policy(self._queue)
|
|
160
|
+
|
|
161
|
+
result = self._get_backend(
|
|
162
|
+
"basic_statistics",
|
|
163
|
+
None,
|
|
164
|
+
"finalize_compute",
|
|
165
|
+
policy,
|
|
166
|
+
self._onedal_params,
|
|
167
|
+
self._partial_result,
|
|
168
|
+
)
|
|
169
|
+
options = self._get_result_options(self.options).split("|")
|
|
170
|
+
for opt in options:
|
|
171
|
+
setattr(self, opt, from_table(getattr(result, opt)).ravel())
|
|
142
172
|
|
|
143
|
-
|
|
144
|
-
policy = self._get_policy(queue)
|
|
145
|
-
else:
|
|
146
|
-
policy = self._get_policy(self._queue)
|
|
147
|
-
|
|
148
|
-
result = self._get_backend(
|
|
149
|
-
"basic_statistics",
|
|
150
|
-
None,
|
|
151
|
-
"finalize_compute",
|
|
152
|
-
policy,
|
|
153
|
-
self._onedal_params,
|
|
154
|
-
self._partial_result,
|
|
155
|
-
)
|
|
156
|
-
options = self._get_result_options(self.options).split("|")
|
|
157
|
-
for opt in options:
|
|
158
|
-
setattr(self, opt, from_table(getattr(result, opt)).ravel())
|
|
173
|
+
self._need_to_finalize = False
|
|
159
174
|
|
|
160
175
|
return self
|
|
@@ -21,66 +21,9 @@ from scipy import sparse as sp
|
|
|
21
21
|
|
|
22
22
|
from daal4py.sklearn._utils import daal_check_version
|
|
23
23
|
from onedal.basic_statistics import BasicStatistics
|
|
24
|
+
from onedal.basic_statistics.tests.utils import options_and_tests
|
|
24
25
|
from onedal.tests.utils._device_selection import get_queues
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
def expected_sum(X):
|
|
28
|
-
return np.sum(X, axis=0)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def expected_max(X):
|
|
32
|
-
return np.max(X, axis=0)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def expected_min(X):
|
|
36
|
-
return np.min(X, axis=0)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def expected_mean(X):
|
|
40
|
-
return np.mean(X, axis=0)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def expected_standard_deviation(X):
|
|
44
|
-
return np.std(X, axis=0)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def expected_variance(X):
|
|
48
|
-
return np.var(X, axis=0)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def expected_variation(X):
|
|
52
|
-
return expected_standard_deviation(X) / expected_mean(X)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def expected_sum_squares(X):
|
|
56
|
-
return np.sum(np.square(X), axis=0)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def expected_sum_squares_centered(X):
|
|
60
|
-
return np.sum(np.square(X - expected_mean(X)), axis=0)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def expected_standard_deviation(X):
|
|
64
|
-
return np.sqrt(expected_variance(X))
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def expected_second_order_raw_moment(X):
|
|
68
|
-
return np.mean(np.square(X), axis=0)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
options_and_tests = [
|
|
72
|
-
("sum", expected_sum, (5e-4, 1e-7)),
|
|
73
|
-
("min", expected_min, (1e-7, 1e-7)),
|
|
74
|
-
("max", expected_max, (1e-7, 1e-7)),
|
|
75
|
-
("mean", expected_mean, (5e-7, 1e-7)),
|
|
76
|
-
("variance", expected_variance, (2e-3, 2e-3)),
|
|
77
|
-
("variation", expected_variation, (5e-2, 5e-2)),
|
|
78
|
-
("sum_squares", expected_sum_squares, (2e-4, 1e-7)),
|
|
79
|
-
("sum_squares_centered", expected_sum_squares_centered, (2e-4, 1e-7)),
|
|
80
|
-
("standard_deviation", expected_standard_deviation, (2e-3, 2e-3)),
|
|
81
|
-
("second_order_raw_moment", expected_second_order_raw_moment, (1e-6, 1e-7)),
|
|
82
|
-
]
|
|
83
|
-
|
|
84
27
|
options_and_tests_csr = [
|
|
85
28
|
("sum", "sum", (5e-6, 1e-9)),
|
|
86
29
|
("min", "min", (0, 0)),
|
|
@@ -90,15 +33,15 @@ options_and_tests_csr = [
|
|
|
90
33
|
|
|
91
34
|
|
|
92
35
|
@pytest.mark.parametrize("queue", get_queues())
|
|
93
|
-
@pytest.mark.parametrize("
|
|
36
|
+
@pytest.mark.parametrize("result_option", options_and_tests.keys())
|
|
94
37
|
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
95
38
|
@pytest.mark.parametrize("column_count", [10, 100])
|
|
96
39
|
@pytest.mark.parametrize("weighted", [True, False])
|
|
97
40
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
98
41
|
def test_single_option_on_random_data(
|
|
99
|
-
queue,
|
|
42
|
+
queue, result_option, row_count, column_count, weighted, dtype
|
|
100
43
|
):
|
|
101
|
-
|
|
44
|
+
function, tols = options_and_tests[result_option]
|
|
102
45
|
fp32tol, fp64tol = tols
|
|
103
46
|
seed = 77
|
|
104
47
|
gen = np.random.default_rng(seed)
|
|
@@ -150,15 +93,15 @@ def test_multiple_options_on_random_data(queue, row_count, column_count, weighte
|
|
|
150
93
|
if weighted:
|
|
151
94
|
weighted_data = np.diag(weights) @ data
|
|
152
95
|
gtr_mean, gtr_max, gtr_sum = (
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
96
|
+
options_and_tests["mean"][0](weighted_data),
|
|
97
|
+
options_and_tests["max"][0](weighted_data),
|
|
98
|
+
options_and_tests["sum"][0](weighted_data),
|
|
156
99
|
)
|
|
157
100
|
else:
|
|
158
101
|
gtr_mean, gtr_max, gtr_sum = (
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
102
|
+
options_and_tests["mean"][0](data),
|
|
103
|
+
options_and_tests["max"][0](data),
|
|
104
|
+
options_and_tests["sum"][0](data),
|
|
162
105
|
)
|
|
163
106
|
|
|
164
107
|
tol = 5e-4 if res_mean.dtype == np.float32 else 1e-7
|
|
@@ -190,8 +133,8 @@ def test_all_option_on_random_data(queue, row_count, column_count, weighted, dty
|
|
|
190
133
|
if weighted:
|
|
191
134
|
weighted_data = np.diag(weights) @ data
|
|
192
135
|
|
|
193
|
-
for
|
|
194
|
-
|
|
136
|
+
for result_option in options_and_tests:
|
|
137
|
+
function, tols = options_and_tests[result_option]
|
|
195
138
|
fp32tol, fp64tol = tols
|
|
196
139
|
res = getattr(result, result_option)
|
|
197
140
|
if weighted:
|
|
@@ -203,12 +146,13 @@ def test_all_option_on_random_data(queue, row_count, column_count, weighted, dty
|
|
|
203
146
|
|
|
204
147
|
|
|
205
148
|
@pytest.mark.parametrize("queue", get_queues())
|
|
206
|
-
@pytest.mark.parametrize("
|
|
149
|
+
@pytest.mark.parametrize("result_option", options_and_tests.keys())
|
|
207
150
|
@pytest.mark.parametrize("data_size", [100, 1000])
|
|
208
151
|
@pytest.mark.parametrize("weighted", [True, False])
|
|
209
152
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
210
|
-
def test_1d_input_on_random_data(queue,
|
|
211
|
-
|
|
153
|
+
def test_1d_input_on_random_data(queue, result_option, data_size, weighted, dtype):
|
|
154
|
+
|
|
155
|
+
function, tols = options_and_tests[result_option]
|
|
212
156
|
fp32tol, fp64tol = tols
|
|
213
157
|
seed = 77
|
|
214
158
|
gen = np.random.default_rng(seed)
|
|
@@ -19,12 +19,8 @@ import pytest
|
|
|
19
19
|
from numpy.testing import assert_allclose
|
|
20
20
|
|
|
21
21
|
from onedal.basic_statistics import IncrementalBasicStatistics
|
|
22
|
-
from onedal.basic_statistics.tests.
|
|
23
|
-
|
|
24
|
-
expected_mean,
|
|
25
|
-
expected_sum,
|
|
26
|
-
options_and_tests,
|
|
27
|
-
)
|
|
22
|
+
from onedal.basic_statistics.tests.utils import options_and_tests
|
|
23
|
+
from onedal.datatypes import from_table
|
|
28
24
|
from onedal.tests.utils._device_selection import get_queues
|
|
29
25
|
|
|
30
26
|
|
|
@@ -67,15 +63,15 @@ def test_multiple_options_on_gold_data(queue, weighted, dtype):
|
|
|
67
63
|
|
|
68
64
|
@pytest.mark.parametrize("queue", get_queues())
|
|
69
65
|
@pytest.mark.parametrize("num_batches", [2, 10])
|
|
70
|
-
@pytest.mark.parametrize("
|
|
66
|
+
@pytest.mark.parametrize("result_option", options_and_tests.keys())
|
|
71
67
|
@pytest.mark.parametrize("row_count", [100, 1000])
|
|
72
68
|
@pytest.mark.parametrize("column_count", [10, 100])
|
|
73
69
|
@pytest.mark.parametrize("weighted", [True, False])
|
|
74
70
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
75
71
|
def test_single_option_on_random_data(
|
|
76
|
-
queue, num_batches,
|
|
72
|
+
queue, num_batches, result_option, row_count, column_count, weighted, dtype
|
|
77
73
|
):
|
|
78
|
-
|
|
74
|
+
function, tols = options_and_tests[result_option]
|
|
79
75
|
fp32tol, fp64tol = tols
|
|
80
76
|
seed = 77
|
|
81
77
|
gen = np.random.default_rng(seed)
|
|
@@ -137,15 +133,15 @@ def test_multiple_options_on_random_data(
|
|
|
137
133
|
if weighted:
|
|
138
134
|
weighted_data = np.diag(weights) @ data
|
|
139
135
|
gtr_mean, gtr_max, gtr_sum = (
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
136
|
+
options_and_tests["mean"][0](weighted_data),
|
|
137
|
+
options_and_tests["max"][0](weighted_data),
|
|
138
|
+
options_and_tests["sum"][0](weighted_data),
|
|
143
139
|
)
|
|
144
140
|
else:
|
|
145
141
|
gtr_mean, gtr_max, gtr_sum = (
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
142
|
+
options_and_tests["mean"][0](data),
|
|
143
|
+
options_and_tests["max"][0](data),
|
|
144
|
+
options_and_tests["sum"][0](data),
|
|
149
145
|
)
|
|
150
146
|
|
|
151
147
|
tol = 3e-4 if res_mean.dtype == np.float32 else 1e-7
|
|
@@ -184,8 +180,8 @@ def test_all_option_on_random_data(
|
|
|
184
180
|
if weighted:
|
|
185
181
|
weighted_data = np.diag(weights) @ data
|
|
186
182
|
|
|
187
|
-
for
|
|
188
|
-
|
|
183
|
+
for result_option in options_and_tests:
|
|
184
|
+
function, tols = options_and_tests[result_option]
|
|
189
185
|
fp32tol, fp64tol = tols
|
|
190
186
|
res = getattr(result, result_option)
|
|
191
187
|
if weighted:
|
|
@@ -194,3 +190,90 @@ def test_all_option_on_random_data(
|
|
|
194
190
|
gtr = function(data)
|
|
195
191
|
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
196
192
|
assert_allclose(gtr, res, atol=tol)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@pytest.mark.parametrize("queue", get_queues())
|
|
196
|
+
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
|
197
|
+
def test_incremental_estimator_pickle(queue, dtype):
|
|
198
|
+
import pickle
|
|
199
|
+
|
|
200
|
+
from onedal.basic_statistics import IncrementalBasicStatistics
|
|
201
|
+
|
|
202
|
+
incbs = IncrementalBasicStatistics()
|
|
203
|
+
|
|
204
|
+
# Check that estimator can be serialized without any data.
|
|
205
|
+
dump = pickle.dumps(incbs)
|
|
206
|
+
incbs_loaded = pickle.loads(dump)
|
|
207
|
+
seed = 77
|
|
208
|
+
gen = np.random.default_rng(seed)
|
|
209
|
+
X = gen.uniform(low=-0.3, high=+0.7, size=(10, 10))
|
|
210
|
+
X = X.astype(dtype)
|
|
211
|
+
X_split = np.array_split(X, 2)
|
|
212
|
+
incbs.partial_fit(X_split[0], queue=queue)
|
|
213
|
+
incbs_loaded.partial_fit(X_split[0], queue=queue)
|
|
214
|
+
|
|
215
|
+
assert incbs._need_to_finalize == True
|
|
216
|
+
assert incbs_loaded._need_to_finalize == True
|
|
217
|
+
|
|
218
|
+
# Check that estimator can be serialized after partial_fit call.
|
|
219
|
+
dump = pickle.dumps(incbs)
|
|
220
|
+
incbs_loaded = pickle.loads(dump)
|
|
221
|
+
assert incbs._need_to_finalize == False
|
|
222
|
+
# Finalize is called during serialization to make sure partial results are finalized correctly.
|
|
223
|
+
assert incbs_loaded._need_to_finalize == False
|
|
224
|
+
|
|
225
|
+
partial_n_rows = from_table(incbs._partial_result.partial_n_rows)
|
|
226
|
+
partial_n_rows_loaded = from_table(incbs_loaded._partial_result.partial_n_rows)
|
|
227
|
+
assert_allclose(partial_n_rows, partial_n_rows_loaded)
|
|
228
|
+
|
|
229
|
+
partial_min = from_table(incbs._partial_result.partial_min)
|
|
230
|
+
partial_min_loaded = from_table(incbs_loaded._partial_result.partial_min)
|
|
231
|
+
assert_allclose(partial_min, partial_min_loaded)
|
|
232
|
+
|
|
233
|
+
partial_max = from_table(incbs._partial_result.partial_max)
|
|
234
|
+
partial_max_loaded = from_table(incbs_loaded._partial_result.partial_max)
|
|
235
|
+
assert_allclose(partial_max, partial_max_loaded)
|
|
236
|
+
|
|
237
|
+
partial_sum = from_table(incbs._partial_result.partial_sum)
|
|
238
|
+
partial_sum_loaded = from_table(incbs_loaded._partial_result.partial_sum)
|
|
239
|
+
assert_allclose(partial_sum, partial_sum_loaded)
|
|
240
|
+
|
|
241
|
+
partial_sum_squares = from_table(incbs._partial_result.partial_sum_squares)
|
|
242
|
+
partial_sum_squares_loaded = from_table(
|
|
243
|
+
incbs_loaded._partial_result.partial_sum_squares
|
|
244
|
+
)
|
|
245
|
+
assert_allclose(partial_sum_squares, partial_sum_squares_loaded)
|
|
246
|
+
|
|
247
|
+
partial_sum_squares_centered = from_table(
|
|
248
|
+
incbs._partial_result.partial_sum_squares_centered
|
|
249
|
+
)
|
|
250
|
+
partial_sum_squares_centered_loaded = from_table(
|
|
251
|
+
incbs_loaded._partial_result.partial_sum_squares_centered
|
|
252
|
+
)
|
|
253
|
+
assert_allclose(partial_sum_squares_centered, partial_sum_squares_centered_loaded)
|
|
254
|
+
|
|
255
|
+
incbs.partial_fit(X_split[1], queue=queue)
|
|
256
|
+
incbs_loaded.partial_fit(X_split[1], queue=queue)
|
|
257
|
+
assert incbs._need_to_finalize == True
|
|
258
|
+
assert incbs_loaded._need_to_finalize == True
|
|
259
|
+
|
|
260
|
+
dump = pickle.dumps(incbs_loaded)
|
|
261
|
+
incbs_loaded = pickle.loads(dump)
|
|
262
|
+
|
|
263
|
+
assert incbs._need_to_finalize == True
|
|
264
|
+
assert incbs_loaded._need_to_finalize == False
|
|
265
|
+
|
|
266
|
+
incbs.finalize_fit()
|
|
267
|
+
incbs_loaded.finalize_fit()
|
|
268
|
+
|
|
269
|
+
# Check that finalized estimator can be serialized.
|
|
270
|
+
dump = pickle.dumps(incbs_loaded)
|
|
271
|
+
incbs_loaded = pickle.loads(dump)
|
|
272
|
+
|
|
273
|
+
for result_option in options_and_tests:
|
|
274
|
+
_, tols = options_and_tests[result_option]
|
|
275
|
+
fp32tol, fp64tol = tols
|
|
276
|
+
res = getattr(incbs, result_option)
|
|
277
|
+
res_loaded = getattr(incbs_loaded, result_option)
|
|
278
|
+
tol = fp32tol if res.dtype == np.float32 else fp64tol
|
|
279
|
+
assert_allclose(res, res_loaded, atol=tol)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# ===============================================================================
|
|
2
|
+
# Copyright 2024 Intel Corporation
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
# ===============================================================================
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Compute unbiased variation for the columns of array-like X
|
|
21
|
+
def variation(X):
|
|
22
|
+
X_mean = np.mean(X, axis=0)
|
|
23
|
+
if np.all(X_mean):
|
|
24
|
+
# Avoid division by zero
|
|
25
|
+
return np.std(X, axis=0, ddof=1) / X_mean
|
|
26
|
+
else:
|
|
27
|
+
return np.array(
|
|
28
|
+
[
|
|
29
|
+
x / y if y != 0 else np.nan
|
|
30
|
+
for x, y in zip(np.std(X, axis=0, ddof=1), X_mean)
|
|
31
|
+
]
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
options_and_tests = {
|
|
36
|
+
"sum": (lambda X: np.sum(X, axis=0), (5e-4, 1e-7)),
|
|
37
|
+
"min": (lambda X: np.min(X, axis=0), (1e-7, 1e-7)),
|
|
38
|
+
"max": (lambda X: np.max(X, axis=0), (1e-7, 1e-7)),
|
|
39
|
+
"mean": (lambda X: np.mean(X, axis=0), (5e-7, 1e-7)),
|
|
40
|
+
# sklearnex computes unbiased variance and standard deviation that is why ddof=1
|
|
41
|
+
"variance": (lambda X: np.var(X, axis=0, ddof=1), (2e-4, 1e-7)),
|
|
42
|
+
"variation": (lambda X: variation(X), (1e-3, 1e-6)),
|
|
43
|
+
"sum_squares": (lambda X: np.sum(np.square(X), axis=0), (2e-4, 1e-7)),
|
|
44
|
+
"sum_squares_centered": (
|
|
45
|
+
lambda X: np.sum(np.square(X - np.mean(X, axis=0)), axis=0),
|
|
46
|
+
(1e-3, 1e-7),
|
|
47
|
+
),
|
|
48
|
+
"standard_deviation": (lambda X: np.std(X, axis=0, ddof=1), (2e-3, 1e-7)),
|
|
49
|
+
"second_order_raw_moment": (lambda X: np.mean(np.square(X), axis=0), (1e-6, 1e-7)),
|
|
50
|
+
}
|
onedal/cluster/dbscan.py
CHANGED
|
@@ -20,7 +20,7 @@ from daal4py.sklearn._utils import get_dtype, make2d
|
|
|
20
20
|
|
|
21
21
|
from ..common._base import BaseEstimator
|
|
22
22
|
from ..common._mixin import ClusterMixin
|
|
23
|
-
from ..datatypes import
|
|
23
|
+
from ..datatypes import from_table, to_table
|
|
24
24
|
from ..utils import _check_array
|
|
25
25
|
|
|
26
26
|
|
|
@@ -48,7 +48,7 @@ class BaseDBSCAN(BaseEstimator, ClusterMixin):
|
|
|
48
48
|
|
|
49
49
|
def _get_onedal_params(self, dtype=np.float32):
|
|
50
50
|
return {
|
|
51
|
-
"fptype":
|
|
51
|
+
"fptype": dtype,
|
|
52
52
|
"method": "by_default",
|
|
53
53
|
"min_observations": int(self.min_samples),
|
|
54
54
|
"epsilon": float(self.eps),
|
|
@@ -60,15 +60,10 @@ class BaseDBSCAN(BaseEstimator, ClusterMixin):
|
|
|
60
60
|
policy = self._get_policy(queue, X)
|
|
61
61
|
X = _check_array(X, accept_sparse="csr", dtype=[np.float64, np.float32])
|
|
62
62
|
sample_weight = make2d(sample_weight) if sample_weight is not None else None
|
|
63
|
-
|
|
63
|
+
X_table, sample_weight_table = to_table(X, sample_weight, queue=queue)
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
X = X.astype(np.float64)
|
|
68
|
-
X = _convert_to_supported(policy, X)
|
|
69
|
-
dtype = get_dtype(X)
|
|
70
|
-
params = self._get_onedal_params(dtype)
|
|
71
|
-
result = module.compute(policy, params, to_table(X), to_table(sample_weight))
|
|
65
|
+
params = self._get_onedal_params(X_table.dtype)
|
|
66
|
+
result = module.compute(policy, params, X_table, sample_weight_table)
|
|
72
67
|
|
|
73
68
|
self.labels_ = from_table(result.responses).ravel()
|
|
74
69
|
if result.core_observation_indices is not None:
|
onedal/cluster/kmeans.py
CHANGED
|
@@ -34,7 +34,7 @@ from sklearn.utils import check_random_state
|
|
|
34
34
|
|
|
35
35
|
from ..common._base import BaseEstimator as onedal_BaseEstimator
|
|
36
36
|
from ..common._mixin import ClusterMixin, TransformerMixin
|
|
37
|
-
from ..datatypes import
|
|
37
|
+
from ..datatypes import from_table, to_table
|
|
38
38
|
from ..utils import _check_array, _is_arraylike_not_scalar, _is_csr
|
|
39
39
|
|
|
40
40
|
|
|
@@ -145,7 +145,7 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
|
|
|
145
145
|
def _get_onedal_params(self, is_csr=False, dtype=np.float32, result_options=None):
|
|
146
146
|
thr = self._tol if hasattr(self, "_tol") else self.tol
|
|
147
147
|
return {
|
|
148
|
-
"fptype":
|
|
148
|
+
"fptype": dtype,
|
|
149
149
|
"method": "lloyd_csr" if is_csr else "by_default",
|
|
150
150
|
"seed": -1,
|
|
151
151
|
"max_iteration_count": self.max_iter,
|
|
@@ -205,8 +205,7 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
|
|
|
205
205
|
assert centers.shape[1] == X_table.column_count
|
|
206
206
|
# KMeans is implemented on both CPU and GPU for Dense and CSR data
|
|
207
207
|
# The original policy can be used here
|
|
208
|
-
|
|
209
|
-
centers_table = to_table(centers)
|
|
208
|
+
centers_table = to_table(centers, queue=getattr(policy, "_queue", None))
|
|
210
209
|
else:
|
|
211
210
|
raise TypeError("Unsupported type of the `init` value")
|
|
212
211
|
|
|
@@ -240,16 +239,14 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
|
|
|
240
239
|
f"callable, got '{ init }' instead."
|
|
241
240
|
)
|
|
242
241
|
|
|
243
|
-
centers =
|
|
244
|
-
return to_table(centers)
|
|
242
|
+
return to_table(centers, queue=getattr(policy, "_queue", None))
|
|
245
243
|
|
|
246
244
|
def _fit_backend(
|
|
247
245
|
self, X_table, centroids_table, module, policy, dtype=np.float32, is_csr=False
|
|
248
246
|
):
|
|
249
247
|
params = self._get_onedal_params(is_csr, dtype)
|
|
250
248
|
|
|
251
|
-
|
|
252
|
-
assert meta.get_npy_dtype(0) == dtype
|
|
249
|
+
assert X_table.dtype == dtype
|
|
253
250
|
|
|
254
251
|
result = module.train(policy, params, X_table, centroids_table)
|
|
255
252
|
|
|
@@ -266,14 +263,11 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
|
|
|
266
263
|
X = _check_array(
|
|
267
264
|
X, dtype=[np.float64, np.float32], accept_sparse="csr", force_all_finite=False
|
|
268
265
|
)
|
|
269
|
-
|
|
270
|
-
dtype =
|
|
271
|
-
X_table = to_table(X)
|
|
266
|
+
X_table = to_table(X, queue=queue)
|
|
267
|
+
dtype = X_table.dtype
|
|
272
268
|
|
|
273
269
|
self._check_params_vs_input(X_table, is_csr, policy, dtype=dtype)
|
|
274
270
|
|
|
275
|
-
params = self._get_onedal_params(is_csr, dtype)
|
|
276
|
-
|
|
277
271
|
self.n_features_in_ = X_table.column_count
|
|
278
272
|
|
|
279
273
|
best_model, best_n_iter = None, None
|
|
@@ -381,9 +375,8 @@ class _BaseKMeans(onedal_BaseEstimator, TransformerMixin, ClusterMixin, ABC):
|
|
|
381
375
|
is_csr = _is_csr(X)
|
|
382
376
|
|
|
383
377
|
policy = self._get_policy(queue, X)
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
params = self._get_onedal_params(is_csr, dtype, result_options)
|
|
378
|
+
X_table = to_table(X, queue=queue)
|
|
379
|
+
params = self._get_onedal_params(is_csr, X_table.dtype, result_options)
|
|
387
380
|
|
|
388
381
|
result = module.infer(policy, params, self.model_, X_table)
|
|
389
382
|
|