scikit-learn-intelex 2024.1.0__py311-none-manylinux1_x86_64.whl → 2024.4.0__py311-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (62) hide show
  1. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/METADATA +2 -2
  2. scikit_learn_intelex-2024.4.0.dist-info/RECORD +101 -0
  3. sklearnex/__init__.py +9 -7
  4. sklearnex/_device_offload.py +31 -4
  5. sklearnex/basic_statistics/__init__.py +2 -1
  6. sklearnex/basic_statistics/incremental_basic_statistics.py +288 -0
  7. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +386 -0
  8. sklearnex/cluster/dbscan.py +6 -4
  9. sklearnex/conftest.py +63 -0
  10. sklearnex/{preview/decomposition → covariance}/__init__.py +19 -19
  11. sklearnex/covariance/incremental_covariance.py +130 -0
  12. sklearnex/covariance/tests/test_incremental_covariance.py +143 -0
  13. sklearnex/decomposition/pca.py +319 -1
  14. sklearnex/decomposition/tests/test_pca.py +34 -5
  15. sklearnex/dispatcher.py +93 -61
  16. sklearnex/ensemble/_forest.py +81 -97
  17. sklearnex/ensemble/tests/test_forest.py +15 -19
  18. sklearnex/linear_model/__init__.py +1 -2
  19. sklearnex/linear_model/linear.py +275 -347
  20. sklearnex/{preview/linear_model → linear_model}/logistic_regression.py +83 -50
  21. sklearnex/linear_model/tests/test_linear.py +40 -5
  22. sklearnex/linear_model/tests/test_logreg.py +70 -7
  23. sklearnex/neighbors/__init__.py +1 -1
  24. sklearnex/neighbors/_lof.py +221 -0
  25. sklearnex/neighbors/common.py +4 -1
  26. sklearnex/neighbors/knn_classification.py +47 -137
  27. sklearnex/neighbors/knn_regression.py +20 -132
  28. sklearnex/neighbors/knn_unsupervised.py +16 -93
  29. sklearnex/neighbors/tests/test_neighbors.py +12 -16
  30. sklearnex/preview/__init__.py +1 -1
  31. sklearnex/preview/cluster/k_means.py +8 -81
  32. sklearnex/preview/covariance/covariance.py +51 -16
  33. sklearnex/preview/covariance/tests/test_covariance.py +18 -5
  34. sklearnex/spmd/__init__.py +1 -0
  35. sklearnex/{preview/linear_model → spmd/covariance}/__init__.py +5 -5
  36. sklearnex/spmd/covariance/covariance.py +21 -0
  37. sklearnex/spmd/ensemble/forest.py +4 -12
  38. sklearnex/spmd/linear_model/__init__.py +2 -1
  39. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  40. sklearnex/svm/_common.py +4 -7
  41. sklearnex/svm/nusvc.py +74 -55
  42. sklearnex/svm/nusvr.py +9 -56
  43. sklearnex/svm/svc.py +74 -56
  44. sklearnex/svm/svr.py +6 -53
  45. sklearnex/tests/_utils.py +164 -0
  46. sklearnex/tests/test_memory_usage.py +9 -7
  47. sklearnex/tests/test_monkeypatch.py +179 -138
  48. sklearnex/tests/test_n_jobs_support.py +77 -9
  49. sklearnex/tests/test_parallel.py +6 -8
  50. sklearnex/tests/test_patching.py +338 -89
  51. sklearnex/utils/__init__.py +2 -1
  52. sklearnex/utils/_namespace.py +97 -0
  53. scikit_learn_intelex-2024.1.0.dist-info/RECORD +0 -97
  54. sklearnex/neighbors/lof.py +0 -436
  55. sklearnex/preview/decomposition/pca.py +0 -376
  56. sklearnex/preview/decomposition/tests/test_preview_pca.py +0 -42
  57. sklearnex/preview/linear_model/tests/test_preview_logistic_regression.py +0 -59
  58. sklearnex/tests/_models_info.py +0 -170
  59. sklearnex/tests/utils/_launch_algorithms.py +0 -118
  60. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/LICENSE.txt +0 -0
  61. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/WHEEL +0 -0
  62. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,386 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.basic_statistics.tests.test_incremental_basic_statistics import (
22
+ expected_max,
23
+ expected_mean,
24
+ expected_sum,
25
+ options_and_tests,
26
+ )
27
+ from onedal.tests.utils._dataframes_support import (
28
+ _convert_to_dataframe,
29
+ get_dataframes_and_queues,
30
+ )
31
+ from sklearnex.basic_statistics import IncrementalBasicStatistics
32
+
33
+
34
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
35
+ @pytest.mark.parametrize("weighted", [True, False])
36
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
37
+ def test_partial_fit_multiple_options_on_gold_data(dataframe, queue, weighted, dtype):
38
+ X = np.array([[0, 0], [1, 1]])
39
+ X = X.astype(dtype=dtype)
40
+ X_split = np.array_split(X, 2)
41
+ if weighted:
42
+ weights = np.array([1, 0.5])
43
+ weights = weights.astype(dtype=dtype)
44
+ weights_split = np.array_split(weights, 2)
45
+
46
+ incbs = IncrementalBasicStatistics()
47
+ for i in range(2):
48
+ X_split_df = _convert_to_dataframe(
49
+ X_split[i], sycl_queue=queue, target_df=dataframe
50
+ )
51
+ if weighted:
52
+ weights_split_df = _convert_to_dataframe(
53
+ weights_split[i], sycl_queue=queue, target_df=dataframe
54
+ )
55
+ result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df)
56
+ else:
57
+ result = incbs.partial_fit(X_split_df)
58
+
59
+ if weighted:
60
+ expected_weighted_mean = np.array([0.25, 0.25])
61
+ expected_weighted_min = np.array([0, 0])
62
+ expected_weighted_max = np.array([0.5, 0.5])
63
+ assert_allclose(expected_weighted_mean, result.mean)
64
+ assert_allclose(expected_weighted_max, result.max)
65
+ assert_allclose(expected_weighted_min, result.min)
66
+ else:
67
+ expected_mean = np.array([0.5, 0.5])
68
+ expected_min = np.array([0, 0])
69
+ expected_max = np.array([1, 1])
70
+ assert_allclose(expected_mean, result.mean)
71
+ assert_allclose(expected_max, result.max)
72
+ assert_allclose(expected_min, result.min)
73
+
74
+
75
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
76
+ @pytest.mark.parametrize("num_batches", [2, 10])
77
+ @pytest.mark.parametrize("option", options_and_tests)
78
+ @pytest.mark.parametrize("row_count", [100, 1000])
79
+ @pytest.mark.parametrize("column_count", [10, 100])
80
+ @pytest.mark.parametrize("weighted", [True, False])
81
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
82
+ def test_partial_fit_single_option_on_random_data(
83
+ dataframe, queue, num_batches, option, row_count, column_count, weighted, dtype
84
+ ):
85
+ result_option, function, tols = option
86
+ fp32tol, fp64tol = tols
87
+ seed = 77
88
+ gen = np.random.default_rng(seed)
89
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
90
+ X = X.astype(dtype=dtype)
91
+ X_split = np.array_split(X, num_batches)
92
+ if weighted:
93
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
94
+ weights = weights.astype(dtype=dtype)
95
+ weights_split = np.array_split(weights, num_batches)
96
+ incbs = IncrementalBasicStatistics(result_options=result_option)
97
+
98
+ for i in range(num_batches):
99
+ X_split_df = _convert_to_dataframe(
100
+ X_split[i], sycl_queue=queue, target_df=dataframe
101
+ )
102
+ if weighted:
103
+ weights_split_df = _convert_to_dataframe(
104
+ weights_split[i], sycl_queue=queue, target_df=dataframe
105
+ )
106
+ result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df)
107
+ else:
108
+ result = incbs.partial_fit(X_split_df)
109
+
110
+ res = getattr(result, result_option)
111
+ if weighted:
112
+ weighted_data = np.diag(weights) @ X
113
+ gtr = function(weighted_data)
114
+ else:
115
+ gtr = function(X)
116
+
117
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
118
+ assert_allclose(gtr, res, atol=tol)
119
+
120
+
121
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
122
+ @pytest.mark.parametrize("num_batches", [2, 10])
123
+ @pytest.mark.parametrize("row_count", [100, 1000])
124
+ @pytest.mark.parametrize("column_count", [10, 100])
125
+ @pytest.mark.parametrize("weighted", [True, False])
126
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
127
+ def test_partial_fit_multiple_options_on_random_data(
128
+ dataframe, queue, num_batches, row_count, column_count, weighted, dtype
129
+ ):
130
+ seed = 42
131
+ gen = np.random.default_rng(seed)
132
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
133
+ X = X.astype(dtype=dtype)
134
+ X_split = np.array_split(X, num_batches)
135
+ if weighted:
136
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
137
+ weights = weights.astype(dtype=dtype)
138
+ weights_split = np.array_split(weights, num_batches)
139
+ incbs = IncrementalBasicStatistics(result_options=["mean", "max", "sum"])
140
+
141
+ for i in range(num_batches):
142
+ X_split_df = _convert_to_dataframe(
143
+ X_split[i], sycl_queue=queue, target_df=dataframe
144
+ )
145
+ if weighted:
146
+ weights_split_df = _convert_to_dataframe(
147
+ weights_split[i], sycl_queue=queue, target_df=dataframe
148
+ )
149
+ result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df)
150
+ else:
151
+ result = incbs.partial_fit(X_split_df)
152
+
153
+ res_mean, res_max, res_sum = result.mean, result.max, result.sum
154
+ if weighted:
155
+ weighted_data = np.diag(weights) @ X
156
+ gtr_mean, gtr_max, gtr_sum = (
157
+ expected_mean(weighted_data),
158
+ expected_max(weighted_data),
159
+ expected_sum(weighted_data),
160
+ )
161
+ else:
162
+ gtr_mean, gtr_max, gtr_sum = (
163
+ expected_mean(X),
164
+ expected_max(X),
165
+ expected_sum(X),
166
+ )
167
+
168
+ tol = 1e-5 if res_mean.dtype == np.float32 else 1e-7
169
+ assert_allclose(gtr_mean, res_mean, atol=tol)
170
+ assert_allclose(gtr_max, res_max, atol=tol)
171
+ assert_allclose(gtr_sum, res_sum, atol=tol)
172
+
173
+
174
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
175
+ @pytest.mark.parametrize("num_batches", [2, 10])
176
+ @pytest.mark.parametrize("row_count", [100, 1000])
177
+ @pytest.mark.parametrize("column_count", [10, 100])
178
+ @pytest.mark.parametrize("weighted", [True, False])
179
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
180
+ def test_partial_fit_all_option_on_random_data(
181
+ dataframe, queue, num_batches, row_count, column_count, weighted, dtype
182
+ ):
183
+ seed = 77
184
+ gen = np.random.default_rng(seed)
185
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
186
+ X = X.astype(dtype=dtype)
187
+ X_split = np.array_split(X, num_batches)
188
+ if weighted:
189
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
190
+ weights = weights.astype(dtype=dtype)
191
+ weights_split = np.array_split(weights, num_batches)
192
+ incbs = IncrementalBasicStatistics(result_options="all")
193
+
194
+ for i in range(num_batches):
195
+ X_split_df = _convert_to_dataframe(
196
+ X_split[i], sycl_queue=queue, target_df=dataframe
197
+ )
198
+ if weighted:
199
+ weights_split_df = _convert_to_dataframe(
200
+ weights_split[i], sycl_queue=queue, target_df=dataframe
201
+ )
202
+ result = incbs.partial_fit(X_split_df, sample_weight=weights_split_df)
203
+ else:
204
+ result = incbs.partial_fit(X_split_df)
205
+
206
+ if weighted:
207
+ weighted_data = np.diag(weights) @ X
208
+
209
+ for option in options_and_tests:
210
+ result_option, function, tols = option
211
+ print(result_option)
212
+ fp32tol, fp64tol = tols
213
+ res = getattr(result, result_option)
214
+ if weighted:
215
+ gtr = function(weighted_data)
216
+ else:
217
+ gtr = function(X)
218
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
219
+ assert_allclose(gtr, res, atol=tol)
220
+
221
+
222
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
223
+ @pytest.mark.parametrize("weighted", [True, False])
224
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
225
+ def test_fit_multiple_options_on_gold_data(dataframe, queue, weighted, dtype):
226
+ X = np.array([[0, 0], [1, 1]])
227
+ X = X.astype(dtype=dtype)
228
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
229
+ if weighted:
230
+ weights = np.array([1, 0.5])
231
+ weights = weights.astype(dtype=dtype)
232
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
233
+ incbs = IncrementalBasicStatistics(batch_size=1)
234
+
235
+ if weighted:
236
+ result = incbs.fit(X_df, sample_weight=weights_df)
237
+ else:
238
+ result = incbs.fit(X_df)
239
+
240
+ if weighted:
241
+ expected_weighted_mean = np.array([0.25, 0.25])
242
+ expected_weighted_min = np.array([0, 0])
243
+ expected_weighted_max = np.array([0.5, 0.5])
244
+ assert_allclose(expected_weighted_mean, result.mean)
245
+ assert_allclose(expected_weighted_max, result.max)
246
+ assert_allclose(expected_weighted_min, result.min)
247
+ else:
248
+ expected_mean = np.array([0.5, 0.5])
249
+ expected_min = np.array([0, 0])
250
+ expected_max = np.array([1, 1])
251
+ assert_allclose(expected_mean, result.mean)
252
+ assert_allclose(expected_max, result.max)
253
+ assert_allclose(expected_min, result.min)
254
+
255
+
256
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
257
+ @pytest.mark.parametrize("num_batches", [2, 10])
258
+ @pytest.mark.parametrize("option", options_and_tests)
259
+ @pytest.mark.parametrize("row_count", [100, 1000])
260
+ @pytest.mark.parametrize("column_count", [10, 100])
261
+ @pytest.mark.parametrize("weighted", [True, False])
262
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
263
+ def test_fit_single_option_on_random_data(
264
+ dataframe, queue, num_batches, option, row_count, column_count, weighted, dtype
265
+ ):
266
+ result_option, function, tols = option
267
+ fp32tol, fp64tol = tols
268
+ seed = 77
269
+ gen = np.random.default_rng(seed)
270
+ batch_size = row_count // num_batches
271
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
272
+ X = X.astype(dtype=dtype)
273
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
274
+ if weighted:
275
+ weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
276
+ weights = weights.astype(dtype=dtype)
277
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
278
+ incbs = IncrementalBasicStatistics(
279
+ result_options=result_option, batch_size=batch_size
280
+ )
281
+
282
+ if weighted:
283
+ result = incbs.fit(X_df, sample_weight=weights_df)
284
+ else:
285
+ result = incbs.fit(X_df)
286
+
287
+ res = getattr(result, result_option)
288
+ if weighted:
289
+ weighted_data = np.diag(weights) @ X
290
+ gtr = function(weighted_data)
291
+ else:
292
+ gtr = function(X)
293
+
294
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
295
+ assert_allclose(gtr, res, atol=tol)
296
+
297
+
298
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
299
+ @pytest.mark.parametrize("num_batches", [2, 10])
300
+ @pytest.mark.parametrize("row_count", [100, 1000])
301
+ @pytest.mark.parametrize("column_count", [10, 100])
302
+ @pytest.mark.parametrize("weighted", [True, False])
303
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
304
+ def test_partial_fit_multiple_options_on_random_data(
305
+ dataframe, queue, num_batches, row_count, column_count, weighted, dtype
306
+ ):
307
+ seed = 77
308
+ gen = np.random.default_rng(seed)
309
+ batch_size = row_count // num_batches
310
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
311
+ X = X.astype(dtype=dtype)
312
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
313
+ if weighted:
314
+ weights = gen.uniform(low=-0.5, high=1.0, size=row_count)
315
+ weights = weights.astype(dtype=dtype)
316
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
317
+ incbs = IncrementalBasicStatistics(
318
+ result_options=["mean", "max", "sum"], batch_size=batch_size
319
+ )
320
+
321
+ if weighted:
322
+ result = incbs.fit(X_df, sample_weight=weights_df)
323
+ else:
324
+ result = incbs.fit(X_df)
325
+
326
+ res_mean, res_max, res_sum = result.mean, result.max, result.sum
327
+ if weighted:
328
+ weighted_data = np.diag(weights) @ X
329
+ gtr_mean, gtr_max, gtr_sum = (
330
+ expected_mean(weighted_data),
331
+ expected_max(weighted_data),
332
+ expected_sum(weighted_data),
333
+ )
334
+ else:
335
+ gtr_mean, gtr_max, gtr_sum = (
336
+ expected_mean(X),
337
+ expected_max(X),
338
+ expected_sum(X),
339
+ )
340
+
341
+ tol = 3e-4 if res_mean.dtype == np.float32 else 1e-7
342
+ assert_allclose(gtr_mean, res_mean, atol=tol)
343
+ assert_allclose(gtr_max, res_max, atol=tol)
344
+ assert_allclose(gtr_sum, res_sum, atol=tol)
345
+
346
+
347
+ @pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
348
+ @pytest.mark.parametrize("num_batches", [2, 10])
349
+ @pytest.mark.parametrize("row_count", [100, 1000])
350
+ @pytest.mark.parametrize("column_count", [10, 100])
351
+ @pytest.mark.parametrize("weighted", [True, False])
352
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
353
+ def test_fit_all_option_on_random_data(
354
+ dataframe, queue, num_batches, row_count, column_count, weighted, dtype
355
+ ):
356
+ seed = 77
357
+ gen = np.random.default_rng(seed)
358
+ batch_size = row_count // num_batches
359
+ X = gen.uniform(low=-0.3, high=+0.7, size=(row_count, column_count))
360
+ X = X.astype(dtype=dtype)
361
+ X_df = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe)
362
+ if weighted:
363
+ weights = gen.uniform(low=-0.5, high=+1.0, size=row_count)
364
+ weights = weights.astype(dtype=dtype)
365
+ weights_df = _convert_to_dataframe(weights, sycl_queue=queue, target_df=dataframe)
366
+ incbs = IncrementalBasicStatistics(result_options="all", batch_size=batch_size)
367
+
368
+ if weighted:
369
+ result = incbs.fit(X_df, sample_weight=weights_df)
370
+ else:
371
+ result = incbs.fit(X_df)
372
+
373
+ if weighted:
374
+ weighted_data = np.diag(weights) @ X
375
+
376
+ for option in options_and_tests:
377
+ result_option, function, tols = option
378
+ print(result_option)
379
+ fp32tol, fp64tol = tols
380
+ res = getattr(result, result_option)
381
+ if weighted:
382
+ gtr = function(weighted_data)
383
+ else:
384
+ gtr = function(X)
385
+ tol = fp32tol if res.dtype == np.float32 else fp64tol
386
+ assert_allclose(gtr, res, atol=tol)
@@ -22,10 +22,11 @@ from scipy import sparse as sp
22
22
  from sklearn.cluster import DBSCAN as sklearn_DBSCAN
23
23
  from sklearn.utils.validation import _check_sample_weight
24
24
 
25
- from daal4py.sklearn._utils import control_n_jobs, run_with_n_jobs, sklearn_check_version
25
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
26
+ from daal4py.sklearn._utils import sklearn_check_version
26
27
  from onedal.cluster import DBSCAN as onedal_DBSCAN
27
28
 
28
- from .._device_offload import dispatch, wrap_output_data
29
+ from .._device_offload import dispatch
29
30
  from .._utils import PatchingConditionsChain
30
31
 
31
32
  if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
@@ -45,7 +46,7 @@ class BaseDBSCAN(ABC):
45
46
  self.n_features_in_ = self._onedal_estimator.n_features_in_
46
47
 
47
48
 
48
- @control_n_jobs
49
+ @control_n_jobs(decorated_methods=["fit"])
49
50
  class DBSCAN(sklearn_DBSCAN, BaseDBSCAN):
50
51
  __doc__ = sklearn_DBSCAN.__doc__
51
52
 
@@ -83,7 +84,6 @@ class DBSCAN(sklearn_DBSCAN, BaseDBSCAN):
83
84
  self.p = p
84
85
  self.n_jobs = n_jobs
85
86
 
86
- @run_with_n_jobs
87
87
  def _onedal_fit(self, X, y, sample_weight=None, queue=None):
88
88
  onedal_params = {
89
89
  "eps": self.eps,
@@ -186,3 +186,5 @@ class DBSCAN(sklearn_DBSCAN, BaseDBSCAN):
186
186
  )
187
187
 
188
188
  return self
189
+
190
+ fit.__doc__ = sklearn_DBSCAN.fit.__doc__
sklearnex/conftest.py ADDED
@@ -0,0 +1,63 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import io
18
+ import logging
19
+
20
+ import pytest
21
+
22
+ from sklearnex import patch_sklearn, unpatch_sklearn
23
+
24
+
25
+ def pytest_configure(config):
26
+ config.addinivalue_line(
27
+ "markers", "allow_sklearn_fallback: mark test to not check for sklearnex usage"
28
+ )
29
+
30
+
31
+ @pytest.hookimpl(hookwrapper=True)
32
+ def pytest_runtest_call(item):
33
+ # setup logger to check for sklearn fallback
34
+ if not item.get_closest_marker("allow_sklearn_fallback"):
35
+ log_stream = io.StringIO()
36
+ log_handler = logging.StreamHandler(log_stream)
37
+ sklearnex_logger = logging.getLogger("sklearnex")
38
+ level = sklearnex_logger.level
39
+ sklearnex_stderr_handler = sklearnex_logger.handlers
40
+ sklearnex_logger.handlers = []
41
+ sklearnex_logger.addHandler(log_handler)
42
+ sklearnex_logger.setLevel(logging.INFO)
43
+ log_handler.setLevel(logging.INFO)
44
+
45
+ yield
46
+
47
+ sklearnex_logger.handlers = sklearnex_stderr_handler
48
+ sklearnex_logger.setLevel(level)
49
+ sklearnex_logger.removeHandler(log_handler)
50
+ text = log_stream.getvalue()
51
+ if "fallback to original Scikit-learn" in text:
52
+ raise TypeError(
53
+ f"test did not properly evaluate sklearnex functionality and fell back to sklearn:\n{text}"
54
+ )
55
+ else:
56
+ yield
57
+
58
+
59
+ @pytest.fixture
60
+ def with_sklearnex():
61
+ patch_sklearn()
62
+ yield
63
+ unpatch_sklearn()
@@ -1,19 +1,19 @@
1
- # ===============================================================================
2
- # Copyright 2023 Intel Corporation
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- # ===============================================================================
16
-
17
- from .pca import PCA
18
-
19
- __all__ = ["PCA"]
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ from .incremental_covariance import IncrementalEmpiricalCovariance
18
+
19
+ __all__ = ["IncrementalEmpiricalCovariance"]
@@ -0,0 +1,130 @@
1
+ # ===============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ===============================================================================
16
+
17
+ import numpy as np
18
+ from sklearn.utils import check_array, gen_batches
19
+
20
+ from daal4py.sklearn._n_jobs_support import control_n_jobs
21
+ from onedal._device_offload import support_usm_ndarray
22
+ from onedal.covariance import (
23
+ IncrementalEmpiricalCovariance as onedal_IncrementalEmpiricalCovariance,
24
+ )
25
+
26
+
27
+ @control_n_jobs(decorated_methods=["partial_fit"])
28
+ class IncrementalEmpiricalCovariance:
29
+ """
30
+ Incremental estimator for covariance.
31
+ Allows to compute empirical covariance estimated by maximum
32
+ likelihood method if data are splitted into batches.
33
+
34
+ Parameters
35
+ ----------
36
+ batch_size : int, default=None
37
+ The number of samples to use for each batch. Only used when calling
38
+ ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
39
+ is inferred from the data and set to ``5 * n_features``, to provide a
40
+ balance between approximation accuracy and memory consumption.
41
+
42
+ Attributes
43
+ ----------
44
+ location_ : ndarray of shape (n_features,)
45
+ Estimated location, i.e. the estimated mean.
46
+
47
+ covariance_ : ndarray of shape (n_features, n_features)
48
+ Estimated covariance matrix
49
+ """
50
+
51
+ _onedal_incremental_covariance = staticmethod(onedal_IncrementalEmpiricalCovariance)
52
+
53
+ def __init__(self, batch_size=None):
54
+ self._need_to_finalize = False # If True then finalize compute should
55
+ # be called to obtain covariance_ or location_ from partial compute data
56
+ self.batch_size = batch_size
57
+
58
+ def _onedal_finalize_fit(self):
59
+ assert hasattr(self, "_onedal_estimator")
60
+ self._onedal_estimator.finalize_fit()
61
+ self._need_to_finalize = False
62
+
63
+ def _onedal_partial_fit(self, X, queue):
64
+ onedal_params = {
65
+ "method": "dense",
66
+ "bias": True,
67
+ }
68
+ if not hasattr(self, "_onedal_estimator"):
69
+ self._onedal_estimator = self._onedal_incremental_covariance(**onedal_params)
70
+ self._onedal_estimator.partial_fit(X, queue)
71
+ self._need_to_finalize = True
72
+
73
+ @property
74
+ def covariance_(self):
75
+ if self._need_to_finalize:
76
+ self._onedal_finalize_fit()
77
+ return self._onedal_estimator.covariance_
78
+
79
+ @property
80
+ def location_(self):
81
+ if self._need_to_finalize:
82
+ self._onedal_finalize_fit()
83
+ return self._onedal_estimator.location_
84
+
85
+ @support_usm_ndarray()
86
+ def partial_fit(self, X, queue=None):
87
+ """
88
+ Incremental fit with X. All of X is processed as a single batch.
89
+
90
+ Parameters
91
+ ----------
92
+ X : array-like of shape (n_samples, n_features)
93
+ Training data, where `n_samples` is the number of samples and
94
+ `n_features` is the number of features.
95
+
96
+ Returns
97
+ -------
98
+ self : object
99
+ Returns the instance itself.
100
+ """
101
+ X = check_array(X, dtype=[np.float64, np.float32])
102
+ self._onedal_partial_fit(X, queue)
103
+ return self
104
+
105
+ def fit(self, X, queue=None):
106
+ """
107
+ Fit the model with X, using minibatches of size batch_size.
108
+
109
+ Parameters
110
+ ----------
111
+ X : array-like of shape (n_samples, n_features)
112
+ Training data, where `n_samples` is the number of samples and
113
+ `n_features` is the number of features.
114
+
115
+ Returns
116
+ -------
117
+ self : object
118
+ Returns the instance itself.
119
+ """
120
+ n_samples, n_features = X.shape
121
+ if self.batch_size is None:
122
+ batch_size_ = 5 * n_features
123
+ else:
124
+ batch_size_ = self.batch_size
125
+ for batch in gen_batches(n_samples, batch_size_):
126
+ X_batch = X[batch]
127
+ self.partial_fit(X_batch, queue=queue)
128
+
129
+ self._onedal_finalize_fit()
130
+ return self