scikit-learn-intelex 2024.4.0__py312-none-manylinux1_x86_64.whl → 2024.6.0__py312-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/METADATA +2 -2
  2. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/RECORD +43 -36
  3. sklearnex/_device_offload.py +8 -1
  4. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +2 -4
  5. sklearnex/cluster/dbscan.py +3 -0
  6. sklearnex/cluster/tests/test_dbscan.py +8 -6
  7. sklearnex/conftest.py +11 -1
  8. sklearnex/covariance/incremental_covariance.py +217 -30
  9. sklearnex/covariance/tests/test_incremental_covariance.py +54 -17
  10. sklearnex/decomposition/pca.py +68 -13
  11. sklearnex/decomposition/tests/test_pca.py +6 -4
  12. sklearnex/dispatcher.py +46 -1
  13. sklearnex/ensemble/_forest.py +114 -22
  14. sklearnex/ensemble/tests/test_forest.py +13 -3
  15. sklearnex/glob/dispatcher.py +16 -2
  16. sklearnex/linear_model/__init__.py +5 -3
  17. sklearnex/linear_model/incremental_linear.py +464 -0
  18. sklearnex/linear_model/linear.py +27 -9
  19. sklearnex/linear_model/logistic_regression.py +13 -15
  20. sklearnex/linear_model/tests/test_incremental_linear.py +200 -0
  21. sklearnex/linear_model/tests/test_linear.py +2 -2
  22. sklearnex/neighbors/knn_regression.py +24 -0
  23. sklearnex/neighbors/tests/test_neighbors.py +2 -2
  24. sklearnex/preview/__init__.py +1 -1
  25. sklearnex/preview/decomposition/__init__.py +19 -0
  26. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  27. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  28. sklearnex/svm/_common.py +165 -20
  29. sklearnex/svm/nusvc.py +40 -4
  30. sklearnex/svm/nusvr.py +31 -2
  31. sklearnex/svm/svc.py +40 -4
  32. sklearnex/svm/svr.py +31 -2
  33. sklearnex/tests/_utils.py +70 -29
  34. sklearnex/tests/test_common.py +54 -0
  35. sklearnex/tests/test_memory_usage.py +195 -132
  36. sklearnex/tests/test_n_jobs_support.py +4 -0
  37. sklearnex/tests/test_patching.py +22 -10
  38. sklearnex/tests/test_run_to_run_stability.py +283 -0
  39. sklearnex/utils/_namespace.py +1 -1
  40. sklearnex/utils/tests/test_finite.py +89 -0
  41. sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
  42. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/LICENSE.txt +0 -0
  43. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/WHEEL +0 -0
  44. {scikit_learn_intelex-2024.4.0.dist-info → scikit_learn_intelex-2024.6.0.dist-info}/top_level.txt +0 -0
sklearnex/svm/svc.py CHANGED
@@ -85,6 +85,17 @@ class SVC(sklearn_SVC, BaseSVC):
85
85
  def fit(self, X, y, sample_weight=None):
86
86
  if sklearn_check_version("1.2"):
87
87
  self._validate_params()
88
+ elif self.C <= 0:
89
+ # else if added to correct issues with
90
+ # sklearn tests:
91
+ # svm/tests/test_sparse.py::test_error
92
+ # svm/tests/test_svm.py::test_bad_input
93
+ # for sklearn versions < 1.2 (i.e. without
94
+ # validate_params parameter checking)
95
+ # Without this, a segmentation fault with
96
+ # Windows fatal exception: access violation
97
+ # occurs
98
+ raise ValueError("C <= 0")
88
99
  if sklearn_check_version("1.0"):
89
100
  self._check_feature_names(X, reset=True)
90
101
  dispatch(
@@ -96,8 +107,9 @@ class SVC(sklearn_SVC, BaseSVC):
96
107
  },
97
108
  X,
98
109
  y,
99
- sample_weight,
110
+ sample_weight=sample_weight,
100
111
  )
112
+
101
113
  return self
102
114
 
103
115
  @wrap_output_data
@@ -270,12 +282,30 @@ class SVC(sklearn_SVC, BaseSVC):
270
282
  return patching_status
271
283
  raise RuntimeError(f"Unknown method {method_name} in {class_name}")
272
284
 
285
+ def _get_sample_weight(self, X, y, sample_weight=None):
286
+ sample_weight = super()._get_sample_weight(X, y, sample_weight)
287
+ if sample_weight is None:
288
+ return sample_weight
289
+
290
+ if np.any(sample_weight <= 0) and len(np.unique(y[sample_weight > 0])) != len(
291
+ self.classes_
292
+ ):
293
+ raise ValueError(
294
+ "Invalid input - all samples with positive weights "
295
+ "belong to the same class"
296
+ if sklearn_check_version("1.2")
297
+ else "Invalid input - all samples with positive weights "
298
+ "have the same label."
299
+ )
300
+ return sample_weight
301
+
273
302
  def _onedal_fit(self, X, y, sample_weight=None, queue=None):
303
+ X, _, weights = self._onedal_fit_checks(X, y, sample_weight)
274
304
  onedal_params = {
275
305
  "C": self.C,
276
306
  "kernel": self.kernel,
277
307
  "degree": self.degree,
278
- "gamma": self.gamma,
308
+ "gamma": self._compute_gamma_sigma(X),
279
309
  "coef0": self.coef0,
280
310
  "tol": self.tol,
281
311
  "shrinking": self.shrinking,
@@ -287,10 +317,16 @@ class SVC(sklearn_SVC, BaseSVC):
287
317
  }
288
318
 
289
319
  self._onedal_estimator = onedal_SVC(**onedal_params)
290
- self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
320
+ self._onedal_estimator.fit(X, y, weights, queue=queue)
291
321
 
292
322
  if self.probability:
293
- self._fit_proba(X, y, sample_weight, queue=queue)
323
+ self._fit_proba(
324
+ X,
325
+ y,
326
+ sample_weight=sample_weight,
327
+ queue=queue,
328
+ )
329
+
294
330
  self._save_attributes()
295
331
 
296
332
  def _onedal_predict(self, X, queue=None):
sklearnex/svm/svr.py CHANGED
@@ -65,6 +65,17 @@ class SVR(sklearn_SVR, BaseSVR):
65
65
  def fit(self, X, y, sample_weight=None):
66
66
  if sklearn_check_version("1.2"):
67
67
  self._validate_params()
68
+ elif self.C <= 0:
69
+ # else if added to correct issues with
70
+ # sklearn tests:
71
+ # svm/tests/test_sparse.py::test_error
72
+ # svm/tests/test_svm.py::test_bad_input
73
+ # for sklearn versions < 1.2 (i.e. without
74
+ # validate_params parameter checking)
75
+ # Without this, a segmentation fault with
76
+ # Windows fatal exception: access violation
77
+ # occurs
78
+ raise ValueError("C <= 0")
68
79
  if sklearn_check_version("1.0"):
69
80
  self._check_feature_names(X, reset=True)
70
81
  dispatch(
@@ -76,7 +87,7 @@ class SVR(sklearn_SVR, BaseSVR):
76
87
  },
77
88
  X,
78
89
  y,
79
- sample_weight,
90
+ sample_weight=sample_weight,
80
91
  )
81
92
 
82
93
  return self
@@ -95,13 +106,30 @@ class SVR(sklearn_SVR, BaseSVR):
95
106
  X,
96
107
  )
97
108
 
109
+ @wrap_output_data
110
+ def score(self, X, y, sample_weight=None):
111
+ if sklearn_check_version("1.0"):
112
+ self._check_feature_names(X, reset=False)
113
+ return dispatch(
114
+ self,
115
+ "score",
116
+ {
117
+ "onedal": self.__class__._onedal_score,
118
+ "sklearn": sklearn_SVR.score,
119
+ },
120
+ X,
121
+ y,
122
+ sample_weight=sample_weight,
123
+ )
124
+
98
125
  def _onedal_fit(self, X, y, sample_weight=None, queue=None):
126
+ X, _, sample_weight = self._onedal_fit_checks(X, y, sample_weight)
99
127
  onedal_params = {
100
128
  "C": self.C,
101
129
  "epsilon": self.epsilon,
102
130
  "kernel": self.kernel,
103
131
  "degree": self.degree,
104
- "gamma": self.gamma,
132
+ "gamma": self._compute_gamma_sigma(X),
105
133
  "coef0": self.coef0,
106
134
  "tol": self.tol,
107
135
  "shrinking": self.shrinking,
@@ -118,3 +146,4 @@ class SVR(sklearn_SVR, BaseSVR):
118
146
 
119
147
  fit.__doc__ = sklearn_SVR.fit.__doc__
120
148
  predict.__doc__ = sklearn_SVR.predict.__doc__
149
+ score.__doc__ = sklearn_SVR.score.__doc__
sklearnex/tests/_utils.py CHANGED
@@ -14,9 +14,12 @@
14
14
  # limitations under the License.
15
15
  # ==============================================================================
16
16
 
17
+ from functools import partial
17
18
  from inspect import isclass
18
19
 
19
20
  import numpy as np
21
+ from scipy import sparse as sp
22
+ from sklearn import clone
20
23
  from sklearn.base import (
21
24
  BaseEstimator,
22
25
  ClassifierMixin,
@@ -87,18 +90,26 @@ mixin_map = [
87
90
  ]
88
91
 
89
92
 
90
- SPECIAL_INSTANCES = {
91
- str(i): i
92
- for i in [
93
- LocalOutlierFactor(novelty=True),
94
- SVC(probability=True),
95
- NuSVC(probability=True),
96
- KNeighborsClassifier(algorithm="brute"),
97
- KNeighborsRegressor(algorithm="brute"),
98
- NearestNeighbors(algorithm="brute"),
99
- LogisticRegression(solver="newton-cg"),
100
- ]
101
- }
93
+ class _sklearn_clone_dict(dict):
94
+
95
+ def __getitem__(self, key):
96
+ return clone(super().__getitem__(key))
97
+
98
+
99
+ SPECIAL_INSTANCES = _sklearn_clone_dict(
100
+ {
101
+ str(i): i
102
+ for i in [
103
+ LocalOutlierFactor(novelty=True),
104
+ SVC(probability=True),
105
+ NuSVC(probability=True),
106
+ KNeighborsClassifier(algorithm="brute"),
107
+ KNeighborsRegressor(algorithm="brute"),
108
+ NearestNeighbors(algorithm="brute"),
109
+ LogisticRegression(solver="newton-cg"),
110
+ ]
111
+ }
112
+ )
102
113
 
103
114
 
104
115
  def gen_models_info(algorithms):
@@ -107,8 +118,8 @@ def gen_models_info(algorithms):
107
118
 
108
119
  if i in PATCHED_MODELS:
109
120
  est = PATCHED_MODELS[i]
110
- elif i in SPECIAL_INSTANCES:
111
- est = SPECIAL_INSTANCES[i].__class__
121
+ elif isinstance(algorithms[i], BaseEstimator):
122
+ est = algorithms[i].__class__
112
123
  else:
113
124
  raise KeyError(f"Unrecognized sklearnex estimator: {i}")
114
125
 
@@ -129,24 +140,54 @@ def gen_models_info(algorithms):
129
140
  return output
130
141
 
131
142
 
132
- def gen_dataset(estimator, queue=None, target_df=None, dtype=np.float64):
133
- dataset = None
134
- name = estimator.__class__.__name__
135
- est = PATCHED_MODELS[name]
143
+ def gen_dataset_type(est):
144
+ # est should be an estimator or estimator class
145
+ # dataset initialized to classification, but will be swapped
146
+ # for other types as necessary
147
+ dataset = "classification"
148
+ estimator = est.__class__ if isinstance(est, BaseEstimator) else est
149
+
136
150
  for mixin, _, data in mixin_map:
137
- if issubclass(est, mixin) and data is not None:
151
+ if issubclass(estimator, mixin) and data is not None:
138
152
  dataset = data
153
+ return dataset
154
+
155
+
156
+ _dataset_dict = {
157
+ "classification": [partial(load_iris, return_X_y=True)],
158
+ "regression": [partial(load_diabetes, return_X_y=True)],
159
+ }
160
+
161
+
162
+ def gen_dataset(
163
+ est,
164
+ datasets=_dataset_dict,
165
+ sparse=False,
166
+ queue=None,
167
+ target_df=None,
168
+ dtype=None,
169
+ ):
170
+ dataset_type = gen_dataset_type(est)
171
+ output = []
139
172
  # load data
140
- if dataset == "classification" or dataset is None:
141
- X, y = load_iris(return_X_y=True)
142
- elif dataset == "regression":
143
- X, y = load_diabetes(return_X_y=True)
144
- else:
145
- raise ValueError("Unknown dataset type")
146
-
147
- X = _convert_to_dataframe(X, sycl_queue=queue, target_df=target_df, dtype=dtype)
148
- y = _convert_to_dataframe(y, sycl_queue=queue, target_df=target_df, dtype=dtype)
149
- return X, y
173
+ flag = dtype is None
174
+
175
+ for func in datasets[dataset_type]:
176
+ X, y = func()
177
+ if flag:
178
+ dtype = X.dtype if hasattr(X, "dtype") else np.float64
179
+
180
+ if sparse:
181
+ X = sp.csr_matrix(X)
182
+ else:
183
+ X = _convert_to_dataframe(
184
+ X, sycl_queue=queue, target_df=target_df, dtype=dtype
185
+ )
186
+ y = _convert_to_dataframe(
187
+ y, sycl_queue=queue, target_df=target_df, dtype=dtype
188
+ )
189
+ output += [[X, y]]
190
+ return output
150
191
 
151
192
 
152
193
  DTYPES = [
@@ -0,0 +1,54 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import os
18
+ from glob import glob
19
+
20
+ import pytest
21
+
22
+ ALLOWED_LOCATIONS = [
23
+ "_config.py",
24
+ "_device_offload.py",
25
+ "test",
26
+ "svc.py",
27
+ "svm" + os.sep + "_common.py",
28
+ ]
29
+
30
+
31
+ def test_target_offload_ban():
32
+ """This test blocks the use of target_offload in
33
+ in sklearnex files. Offloading computation to devices
34
+ via target_offload should only occur externally, and not
35
+ within the architecture of the sklearnex classes. This
36
+ is for clarity, traceability and maintainability.
37
+ """
38
+ from sklearnex import __file__ as loc
39
+
40
+ path = loc.replace("__init__.py", "")
41
+ files = [y for x in os.walk(path) for y in glob(os.path.join(x[0], "*.py"))]
42
+
43
+ output = []
44
+
45
+ for f in files:
46
+ if open(f, "r").read().find("target_offload") != -1:
47
+ output += [f.replace(path, "sklearnex" + os.sep)]
48
+
49
+ # remove this file from the list
50
+ for allowed in ALLOWED_LOCATIONS:
51
+ output = [i for i in output if allowed not in i]
52
+
53
+ output = "\n".join(output)
54
+ assert output == "", f"sklearn versioning is occuring in: \n{output}"