scikit-learn-intelex 2024.1.0__py311-none-manylinux1_x86_64.whl → 2024.2.0__py311-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (40) hide show
  1. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/RECORD +38 -34
  3. sklearnex/cluster/dbscan.py +3 -3
  4. sklearnex/{preview/linear_model → covariance}/__init__.py +3 -3
  5. sklearnex/covariance/incremental_covariance.py +130 -0
  6. sklearnex/covariance/tests/test_incremental_covariance.py +143 -0
  7. sklearnex/dispatcher.py +19 -18
  8. sklearnex/ensemble/_forest.py +5 -10
  9. sklearnex/linear_model/__init__.py +1 -2
  10. sklearnex/linear_model/linear.py +3 -10
  11. sklearnex/{preview/linear_model → linear_model}/logistic_regression.py +19 -38
  12. sklearnex/linear_model/tests/test_logreg.py +70 -5
  13. sklearnex/neighbors/__init__.py +1 -1
  14. sklearnex/neighbors/_lof.py +167 -0
  15. sklearnex/neighbors/knn_classification.py +6 -9
  16. sklearnex/neighbors/knn_regression.py +6 -8
  17. sklearnex/neighbors/knn_unsupervised.py +5 -7
  18. sklearnex/neighbors/tests/test_neighbors.py +12 -11
  19. sklearnex/preview/__init__.py +1 -1
  20. sklearnex/preview/cluster/k_means.py +3 -8
  21. sklearnex/preview/covariance/covariance.py +46 -12
  22. sklearnex/preview/decomposition/pca.py +3 -5
  23. sklearnex/spmd/__init__.py +1 -0
  24. sklearnex/spmd/covariance/__init__.py +19 -0
  25. sklearnex/spmd/covariance/covariance.py +21 -0
  26. sklearnex/spmd/linear_model/__init__.py +2 -1
  27. sklearnex/spmd/linear_model/logistic_regression.py +21 -0
  28. sklearnex/svm/nusvc.py +5 -6
  29. sklearnex/svm/nusvr.py +3 -4
  30. sklearnex/svm/svc.py +5 -6
  31. sklearnex/svm/svr.py +3 -4
  32. sklearnex/tests/test_memory_usage.py +1 -4
  33. sklearnex/tests/test_monkeypatch.py +33 -20
  34. sklearnex/tests/test_n_jobs_support.py +71 -9
  35. sklearnex/tests/test_patching.py +19 -5
  36. sklearnex/neighbors/lof.py +0 -436
  37. sklearnex/preview/linear_model/tests/test_preview_logistic_regression.py +0 -59
  38. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/LICENSE.txt +0 -0
  39. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/WHEEL +0 -0
  40. {scikit_learn_intelex-2024.1.0.dist-info → scikit_learn_intelex-2024.2.0.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@
14
14
  # limitations under the License.
15
15
  # ==============================================================================
16
16
 
17
+ import inspect
17
18
  import os
18
19
  import pathlib
19
20
  import re
@@ -96,15 +97,15 @@ def _load_all_models(patched):
96
97
  if patched:
97
98
  patch_sklearn()
98
99
 
99
- models = []
100
+ models = {}
100
101
  for patch_infos in get_patch_map().values():
101
- maybe_class = getattr(patch_infos[0][0][0], patch_infos[0][0][1])
102
+ maybe_class = getattr(patch_infos[0][0][0], patch_infos[0][0][1], None)
102
103
  if (
103
104
  maybe_class is not None
104
105
  and isclass(maybe_class)
105
106
  and issubclass(maybe_class, BaseEstimator)
106
107
  ):
107
- models.append(maybe_class())
108
+ models[patch_infos[0][0][1]] = maybe_class
108
109
 
109
110
  if patched:
110
111
  unpatch_sklearn()
@@ -116,7 +117,20 @@ PATCHED_MODELS = _load_all_models(patched=True)
116
117
  UNPATCHED_MODELS = _load_all_models(patched=False)
117
118
 
118
119
 
119
- @pytest.mark.parametrize(("patched", "unpatched"), zip(PATCHED_MODELS, UNPATCHED_MODELS))
120
- def test_is_patched_instance(patched, unpatched):
120
+ @pytest.mark.parametrize("estimator", UNPATCHED_MODELS.keys())
121
+ def test_is_patched_instance(estimator):
122
+ patched = PATCHED_MODELS[estimator]
123
+ unpatched = UNPATCHED_MODELS[estimator]
121
124
  assert is_patched_instance(patched), f"{patched} is a patched instance"
122
125
  assert not is_patched_instance(unpatched), f"{unpatched} is an unpatched instance"
126
+
127
+
128
+ @pytest.mark.parametrize("member", ["_onedal_cpu_supported", "_onedal_gpu_supported"])
129
+ @pytest.mark.parametrize(
130
+ "name",
131
+ [i for i in PATCHED_MODELS.keys() if "sklearnex" in PATCHED_MODELS[i].__module__],
132
+ )
133
+ def test_onedal_supported_member(name, member):
134
+ patched = PATCHED_MODELS[name]
135
+ sig = str(inspect.signature(getattr(patched, member)))
136
+ assert "(self, method_name, *data)" == sig
@@ -1,436 +0,0 @@
1
- # ===============================================================================
2
- # Copyright 2023 Intel Corporation
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- # ===============================================================================
16
-
17
- import warnings
18
-
19
- import numpy as np
20
- from sklearn.neighbors._lof import LocalOutlierFactor as sklearn_LocalOutlierFactor
21
-
22
- from .knn_unsupervised import NearestNeighbors
23
-
24
- try:
25
- from sklearn.utils.metaestimators import available_if
26
- except ImportError:
27
- pass
28
-
29
- from sklearn.utils import check_array
30
- from sklearn.utils.validation import check_is_fitted
31
-
32
- from daal4py.sklearn._utils import sklearn_check_version
33
-
34
- from .._config import config_context
35
- from .._device_offload import dispatch, wrap_output_data
36
- from .._utils import PatchingConditionsChain
37
-
38
- if sklearn_check_version("1.0"):
39
-
40
- class LocalOutlierFactor(sklearn_LocalOutlierFactor):
41
- if sklearn_check_version("1.2"):
42
- _parameter_constraints: dict = {
43
- **sklearn_LocalOutlierFactor._parameter_constraints
44
- }
45
-
46
- def __init__(
47
- self,
48
- n_neighbors=20,
49
- *,
50
- algorithm="auto",
51
- leaf_size=30,
52
- metric="minkowski",
53
- p=2,
54
- metric_params=None,
55
- contamination="auto",
56
- novelty=False,
57
- n_jobs=None,
58
- ):
59
- super().__init__(
60
- n_neighbors=n_neighbors,
61
- algorithm=algorithm,
62
- leaf_size=leaf_size,
63
- metric=metric,
64
- p=p,
65
- metric_params=metric_params,
66
- n_jobs=n_jobs,
67
- contamination=contamination,
68
- novelty=novelty,
69
- )
70
-
71
- def _fit(self, X, y, queue=None):
72
- with config_context(target_offload=queue):
73
- if sklearn_check_version("1.2"):
74
- self._validate_params()
75
- self._knn = NearestNeighbors(
76
- n_neighbors=self.n_neighbors,
77
- algorithm=self.algorithm,
78
- leaf_size=self.leaf_size,
79
- metric=self.metric,
80
- p=self.p,
81
- metric_params=self.metric_params,
82
- n_jobs=self.n_jobs,
83
- )
84
- self._knn.fit(X)
85
-
86
- if self.contamination != "auto":
87
- if not (0.0 < self.contamination <= 0.5):
88
- raise ValueError(
89
- "contamination must be in (0, 0.5], "
90
- "got: %f" % self.contamination
91
- )
92
-
93
- n_samples = self._knn.n_samples_fit_
94
-
95
- if self.n_neighbors > n_samples:
96
- warnings.warn(
97
- "n_neighbors (%s) is greater than the "
98
- "total number of samples (%s). n_neighbors "
99
- "will be set to (n_samples - 1) for estimation."
100
- % (self.n_neighbors, n_samples)
101
- )
102
- self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
103
-
104
- self._distances_fit_X_, _neighbors_indices_fit_X_ = self._knn.kneighbors(
105
- n_neighbors=self.n_neighbors_
106
- )
107
-
108
- self._lrd = self._local_reachability_density(
109
- self._distances_fit_X_, _neighbors_indices_fit_X_
110
- )
111
-
112
- # Compute lof score over training samples to define offset_:
113
- lrd_ratios_array = (
114
- self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
115
- )
116
-
117
- self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
118
-
119
- if self.contamination == "auto":
120
- # inliers score around -1 (the higher, the less abnormal).
121
- self.offset_ = -1.5
122
- else:
123
- self.offset_ = np.percentile(
124
- self.negative_outlier_factor_, 100.0 * self.contamination
125
- )
126
-
127
- for knn_prop_name in self._knn.__dict__.keys():
128
- if knn_prop_name not in self.__dict__.keys():
129
- setattr(self, knn_prop_name, self._knn.__dict__[knn_prop_name])
130
-
131
- return self
132
-
133
- def fit(self, X, y=None):
134
- return dispatch(
135
- self,
136
- "neighbors.LocalOutlierFactor.fit",
137
- {
138
- "onedal": self.__class__._fit,
139
- "sklearn": None,
140
- },
141
- X,
142
- y,
143
- )
144
-
145
- def _onedal_predict(self, X, queue=None):
146
- with config_context(target_offload=queue):
147
- check_is_fitted(self)
148
-
149
- if X is not None:
150
- X = check_array(X, accept_sparse="csr")
151
- is_inlier = np.ones(X.shape[0], dtype=int)
152
- is_inlier[self.decision_function(X) < 0] = -1
153
- else:
154
- is_inlier = np.ones(self._knn.n_samples_fit_, dtype=int)
155
- is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
156
-
157
- return is_inlier
158
-
159
- @wrap_output_data
160
- def _predict(self, X=None):
161
- return dispatch(
162
- self,
163
- "neighbors.LocalOutlierFactor.predict",
164
- {
165
- "onedal": self.__class__._onedal_predict,
166
- "sklearn": None,
167
- },
168
- X,
169
- )
170
-
171
- def _score_samples(self, X, queue=None):
172
- with config_context(target_offload=queue):
173
- check_is_fitted(self)
174
- X = check_array(X, accept_sparse="csr")
175
-
176
- distances_X, neighbors_indices_X = self._knn.kneighbors(
177
- X, n_neighbors=self.n_neighbors_
178
- )
179
- X_lrd = self._local_reachability_density(distances_X, neighbors_indices_X)
180
-
181
- lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
182
-
183
- # as bigger is better:
184
- return -np.mean(lrd_ratios_array, axis=1)
185
-
186
- def _check_novelty_score_samples(self):
187
- if not self.novelty:
188
- msg = (
189
- "score_samples is not available when novelty=False. The "
190
- "scores of the training samples are always available "
191
- "through the negative_outlier_factor_ attribute. Use "
192
- "novelty=True if you want to use LOF for novelty detection "
193
- "and compute score_samples for new unseen data."
194
- )
195
- raise AttributeError(msg)
196
- return True
197
-
198
- @available_if(_check_novelty_score_samples)
199
- @wrap_output_data
200
- def score_samples(self, X):
201
- return dispatch(
202
- self,
203
- "neighbors.LocalOutlierFactor.score_samples",
204
- {
205
- "onedal": self.__class__._score_samples,
206
- "sklearn": None,
207
- },
208
- X,
209
- )
210
-
211
- def _check_novelty_fit_predict(self):
212
- if self.novelty:
213
- msg = (
214
- "fit_predict is not available when novelty=True. Use "
215
- "novelty=False if you want to predict on the training set."
216
- )
217
- raise AttributeError(msg)
218
- return True
219
-
220
- def _fit_predict(self, X, y, queue=None):
221
- with config_context(target_offload=queue):
222
- return self.fit(X)._predict()
223
-
224
- @available_if(_check_novelty_fit_predict)
225
- @wrap_output_data
226
- def fit_predict(self, X, y=None):
227
- return dispatch(
228
- self,
229
- "neighbors.LocalOutlierFactor.fit_predict",
230
- {
231
- "onedal": self.__class__._fit_predict,
232
- "sklearn": None,
233
- },
234
- X,
235
- y,
236
- )
237
-
238
- def _onedal_gpu_supported(self, method_name, *data):
239
- class_name = self.__class__.__name__
240
- patching_status = PatchingConditionsChain(
241
- f"sklearn.neighbors.{class_name}.{method_name}"
242
- )
243
- return patching_status
244
-
245
- def _onedal_cpu_supported(self, method_name, *data):
246
- class_name = self.__class__.__name__
247
- patching_status = PatchingConditionsChain(
248
- f"sklearn.neighbors.{class_name}.{method_name}"
249
- )
250
- return patching_status
251
-
252
- else:
253
-
254
- class LocalOutlierFactor(sklearn_LocalOutlierFactor):
255
- def __init__(
256
- self,
257
- n_neighbors=20,
258
- *,
259
- algorithm="auto",
260
- leaf_size=30,
261
- metric="minkowski",
262
- p=2,
263
- metric_params=None,
264
- contamination="auto",
265
- novelty=False,
266
- n_jobs=None,
267
- ):
268
- super().__init__(
269
- n_neighbors=n_neighbors,
270
- algorithm=algorithm,
271
- leaf_size=leaf_size,
272
- metric=metric,
273
- p=p,
274
- metric_params=metric_params,
275
- n_jobs=n_jobs,
276
- contamination=contamination,
277
- novelty=novelty,
278
- )
279
-
280
- def _fit(self, X, y=None, queue=None):
281
- with config_context(target_offload=queue):
282
- self._knn = NearestNeighbors(
283
- n_neighbors=self.n_neighbors,
284
- algorithm=self.algorithm,
285
- leaf_size=self.leaf_size,
286
- metric=self.metric,
287
- p=self.p,
288
- metric_params=self.metric_params,
289
- n_jobs=self.n_jobs,
290
- )
291
- self._knn.fit(X)
292
-
293
- if self.contamination != "auto":
294
- if not (0.0 < self.contamination <= 0.5):
295
- raise ValueError(
296
- "contamination must be in (0, 0.5], "
297
- "got: %f" % self.contamination
298
- )
299
-
300
- n_samples = self._knn.n_samples_fit_
301
-
302
- if self.n_neighbors > n_samples:
303
- warnings.warn(
304
- "n_neighbors (%s) is greater than the "
305
- "total number of samples (%s). n_neighbors "
306
- "will be set to (n_samples - 1) for estimation."
307
- % (self.n_neighbors, n_samples)
308
- )
309
- self.n_neighbors_ = max(1, min(self.n_neighbors, n_samples - 1))
310
-
311
- self._distances_fit_X_, _neighbors_indices_fit_X_ = self._knn.kneighbors(
312
- n_neighbors=self.n_neighbors_
313
- )
314
-
315
- self._lrd = self._local_reachability_density(
316
- self._distances_fit_X_, _neighbors_indices_fit_X_
317
- )
318
-
319
- # Compute lof score over training samples to define offset_:
320
- lrd_ratios_array = (
321
- self._lrd[_neighbors_indices_fit_X_] / self._lrd[:, np.newaxis]
322
- )
323
-
324
- self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
325
-
326
- if self.contamination == "auto":
327
- # inliers score around -1 (the higher, the less abnormal).
328
- self.offset_ = -1.5
329
- else:
330
- self.offset_ = np.percentile(
331
- self.negative_outlier_factor_, 100.0 * self.contamination
332
- )
333
-
334
- for knn_prop_name in self._knn.__dict__.keys():
335
- if knn_prop_name not in self.__dict__.keys():
336
- setattr(self, knn_prop_name, self._knn.__dict__[knn_prop_name])
337
-
338
- return self
339
-
340
- def fit(self, X, y=None):
341
- return dispatch(
342
- self,
343
- "neighbors.LocalOutlierFactor.fit",
344
- {
345
- "onedal": self.__class__._fit,
346
- "sklearn": None,
347
- },
348
- X,
349
- y,
350
- )
351
-
352
- def _onedal_predict(self, X, queue=None):
353
- with config_context(target_offload=queue):
354
- check_is_fitted(self)
355
-
356
- if X is not None:
357
- X = check_array(X, accept_sparse="csr")
358
- is_inlier = np.ones(X.shape[0], dtype=int)
359
- is_inlier[self.decision_function(X) < 0] = -1
360
- else:
361
- is_inlier = np.ones(self._knn.n_samples_fit_, dtype=int)
362
- is_inlier[self.negative_outlier_factor_ < self.offset_] = -1
363
-
364
- return is_inlier
365
-
366
- @wrap_output_data
367
- def _predict(self, X=None):
368
- return dispatch(
369
- self,
370
- "neighbors.LocalOutlierFactor.predict",
371
- {
372
- "onedal": self.__class__._onedal_predict,
373
- "sklearn": None,
374
- },
375
- X,
376
- )
377
-
378
- def _onedal_score_samples(self, X, queue=None):
379
- with config_context(target_offload=queue):
380
- check_is_fitted(self)
381
- X = check_array(X, accept_sparse="csr")
382
-
383
- distances_X, neighbors_indices_X = self._knn.kneighbors(
384
- X, n_neighbors=self.n_neighbors_
385
- )
386
- X_lrd = self._local_reachability_density(distances_X, neighbors_indices_X)
387
-
388
- lrd_ratios_array = self._lrd[neighbors_indices_X] / X_lrd[:, np.newaxis]
389
-
390
- # as bigger is better:
391
- return -np.mean(lrd_ratios_array, axis=1)
392
-
393
- @wrap_output_data
394
- def _score_samples(self, X):
395
- if not self.novelty:
396
- msg = (
397
- "score_samples is not available when novelty=False. The "
398
- "scores of the training samples are always available "
399
- "through the negative_outlier_factor_ attribute. Use "
400
- "novelty=True if you want to use LOF for novelty detection "
401
- "and compute score_samples for new unseen data."
402
- )
403
- raise AttributeError(msg)
404
-
405
- return dispatch(
406
- self,
407
- "neighbors.LocalOutlierFactor.score_samples",
408
- {
409
- "onedal": self.__class__._onedal_score_samples,
410
- "sklearn": None,
411
- },
412
- X,
413
- )
414
-
415
- def _onedal_fit_predict(self, X, y, queue=None):
416
- with config_context(target_offload=queue):
417
- return self.fit(X)._predict()
418
-
419
- @wrap_output_data
420
- def _fit_predict(self, X, y=None):
421
- return dispatch(
422
- self,
423
- "neighbors.LocalOutlierFactor._onedal_fit_predict",
424
- {
425
- "onedal": self.__class__._onedal_fit_predict,
426
- "sklearn": None,
427
- },
428
- X,
429
- y,
430
- )
431
-
432
- def _onedal_gpu_supported(self, method_name, *data):
433
- return True
434
-
435
- def _onedal_cpu_supported(self, method_name, *data):
436
- return True
@@ -1,59 +0,0 @@
1
- # ===============================================================================
2
- # Copyright 2023 Intel Corporation
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- # ===============================================================================
16
-
17
- import numpy as np
18
- import pytest
19
- from numpy.testing import assert_allclose
20
- from sklearn.datasets import load_breast_cancer
21
- from sklearn.metrics import accuracy_score
22
- from sklearn.model_selection import train_test_split
23
-
24
- from daal4py.sklearn._utils import daal_check_version
25
- from onedal.tests.utils._dataframes_support import (
26
- _as_numpy,
27
- _convert_to_dataframe,
28
- get_dataframes_and_queues,
29
- )
30
- from sklearnex import config_context
31
-
32
-
33
- @pytest.mark.parametrize(
34
- "dataframe,queue",
35
- get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
36
- )
37
- def test_sklearnex_import(dataframe, queue):
38
- from sklearnex.preview.linear_model import LogisticRegression
39
-
40
- X, y = load_breast_cancer(return_X_y=True)
41
- X_train, X_test, y_train, y_test = train_test_split(
42
- X, y, train_size=0.8, random_state=42
43
- )
44
- X_train = _convert_to_dataframe(X_train, sycl_queue=queue, target_df=dataframe)
45
- y_train = _convert_to_dataframe(y_train, sycl_queue=queue, target_df=dataframe)
46
- X_test = _convert_to_dataframe(X_test, sycl_queue=queue, target_df=dataframe)
47
-
48
- model = LogisticRegression(fit_intercept=True, solver="newton-cg")
49
- model.fit(X_train, y_train)
50
- y_pred = _as_numpy(model.predict(X_test))
51
- if daal_check_version((2024, "P", 1)):
52
- assert "sklearnex" in model.__module__
53
- else:
54
- assert "daal4py" in model.__module__
55
- # in case dataframe='numpy' algorithm should fallback to sklearn
56
- # as cpu method is not implemented in onedal
57
- if dataframe != "numpy" and daal_check_version((2024, "P", 1)):
58
- assert hasattr(model, "_onedal_estimator")
59
- assert accuracy_score(y_test, y_pred) > 0.95