scikit-learn-intelex 2024.5.0__py312-none-manylinux1_x86_64.whl → 2024.7.0__py312-none-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scikit-learn-intelex might be problematic. Click here for more details.

Files changed (73) hide show
  1. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/METADATA +2 -2
  2. scikit_learn_intelex-2024.7.0.dist-info/RECORD +122 -0
  3. sklearnex/_config.py +3 -15
  4. sklearnex/_device_offload.py +9 -168
  5. sklearnex/basic_statistics/basic_statistics.py +127 -1
  6. sklearnex/basic_statistics/tests/test_basic_statistics.py +251 -0
  7. sklearnex/basic_statistics/tests/test_incremental_basic_statistics.py +1 -1
  8. sklearnex/cluster/dbscan.py +3 -1
  9. sklearnex/cluster/k_means.py +8 -0
  10. sklearnex/cluster/tests/test_dbscan.py +8 -6
  11. sklearnex/cluster/tests/test_kmeans.py +15 -3
  12. sklearnex/conftest.py +11 -1
  13. sklearnex/covariance/incremental_covariance.py +64 -13
  14. sklearnex/covariance/tests/test_incremental_covariance.py +35 -0
  15. sklearnex/decomposition/pca.py +25 -1
  16. sklearnex/decomposition/tests/test_pca.py +4 -2
  17. sklearnex/dispatcher.py +109 -1
  18. sklearnex/ensemble/_forest.py +121 -57
  19. sklearnex/ensemble/tests/test_forest.py +7 -0
  20. sklearnex/glob/dispatcher.py +16 -2
  21. sklearnex/linear_model/coordinate_descent.py +13 -0
  22. sklearnex/linear_model/incremental_linear.py +102 -25
  23. sklearnex/linear_model/linear.py +25 -39
  24. sklearnex/linear_model/logistic_regression.py +92 -74
  25. sklearnex/linear_model/ridge.py +7 -0
  26. sklearnex/linear_model/tests/test_incremental_linear.py +10 -10
  27. sklearnex/linear_model/tests/test_linear.py +30 -5
  28. sklearnex/linear_model/tests/test_logreg.py +45 -3
  29. sklearnex/manifold/t_sne.py +4 -0
  30. sklearnex/metrics/pairwise.py +5 -0
  31. sklearnex/metrics/ranking.py +3 -0
  32. sklearnex/model_selection/split.py +3 -0
  33. sklearnex/neighbors/_lof.py +9 -0
  34. sklearnex/neighbors/common.py +45 -1
  35. sklearnex/neighbors/knn_classification.py +1 -20
  36. sklearnex/neighbors/knn_regression.py +25 -20
  37. sklearnex/neighbors/knn_unsupervised.py +31 -7
  38. sklearnex/preview/__init__.py +1 -1
  39. sklearnex/preview/decomposition/__init__.py +19 -0
  40. sklearnex/preview/decomposition/incremental_pca.py +228 -0
  41. sklearnex/preview/decomposition/tests/test_incremental_pca.py +266 -0
  42. sklearnex/preview/linear_model/__init__.py +19 -0
  43. sklearnex/preview/linear_model/ridge.py +419 -0
  44. sklearnex/preview/linear_model/tests/test_ridge.py +102 -0
  45. sklearnex/spmd/basic_statistics/tests/test_basic_statistics_spmd.py +107 -0
  46. sklearnex/spmd/cluster/tests/test_dbscan_spmd.py +97 -0
  47. sklearnex/spmd/cluster/tests/test_kmeans_spmd.py +172 -0
  48. sklearnex/spmd/covariance/tests/test_covariance_spmd.py +107 -0
  49. sklearnex/spmd/decomposition/tests/test_pca_spmd.py +128 -0
  50. sklearnex/spmd/ensemble/tests/test_forest_spmd.py +265 -0
  51. sklearnex/spmd/linear_model/tests/test_linear_regression_spmd.py +145 -0
  52. sklearnex/spmd/linear_model/tests/test_logistic_regression_spmd.py +163 -0
  53. sklearnex/spmd/neighbors/tests/test_neighbors_spmd.py +288 -0
  54. sklearnex/svm/_common.py +163 -20
  55. sklearnex/svm/nusvc.py +40 -4
  56. sklearnex/svm/nusvr.py +31 -2
  57. sklearnex/svm/svc.py +40 -4
  58. sklearnex/svm/svr.py +31 -2
  59. sklearnex/svm/tests/test_svm.py +12 -20
  60. sklearnex/tests/_utils.py +185 -30
  61. sklearnex/tests/_utils_spmd.py +185 -0
  62. sklearnex/tests/test_common.py +54 -0
  63. sklearnex/tests/test_config.py +4 -0
  64. sklearnex/tests/test_memory_usage.py +185 -126
  65. sklearnex/tests/test_monkeypatch.py +12 -4
  66. sklearnex/tests/test_patching.py +21 -25
  67. sklearnex/tests/test_run_to_run_stability.py +295 -0
  68. sklearnex/utils/_namespace.py +1 -1
  69. scikit_learn_intelex-2024.5.0.dist-info/RECORD +0 -104
  70. sklearnex/tests/test_run_to_run_stability_tests.py +0 -428
  71. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/LICENSE.txt +0 -0
  72. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/WHEEL +0 -0
  73. {scikit_learn_intelex-2024.5.0.dist-info → scikit_learn_intelex-2024.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,288 @@
1
+ # ==============================================================================
2
+ # Copyright 2024 Intel Corporation
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ # ==============================================================================
16
+
17
+ import numpy as np
18
+ import pytest
19
+ from numpy.testing import assert_allclose
20
+
21
+ from onedal.tests.utils._dataframes_support import (
22
+ _convert_to_dataframe,
23
+ get_dataframes_and_queues,
24
+ )
25
+ from sklearnex.tests._utils_spmd import (
26
+ _assert_unordered_allclose,
27
+ _generate_classification_data,
28
+ _generate_regression_data,
29
+ _get_local_tensor,
30
+ _mpi_libs_and_gpu_available,
31
+ _spmd_assert_allclose,
32
+ )
33
+
34
+
35
+ @pytest.mark.skipif(
36
+ not _mpi_libs_and_gpu_available,
37
+ reason="GPU device and MPI libs required for test",
38
+ )
39
+ @pytest.mark.parametrize(
40
+ "dataframe,queue",
41
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
42
+ )
43
+ @pytest.mark.mpi
44
+ def test_knncls_spmd_gold(dataframe, queue):
45
+ # Import spmd and batch algo
46
+ from sklearnex.neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch
47
+ from sklearnex.spmd.neighbors import KNeighborsClassifier as KNeighborsClassifier_SPMD
48
+
49
+ # Create gold data and convert to dataframe
50
+ X_train = np.array(
51
+ [
52
+ [0.0, 0.0],
53
+ [0.0, 1.0],
54
+ [1.0, 0.0],
55
+ [0.0, 2.0],
56
+ [2.0, 0.0],
57
+ [0.9, 1.0],
58
+ [0.0, -1.0],
59
+ [-1.0, 0.0],
60
+ [-1.0, -1.0],
61
+ ]
62
+ )
63
+ # TODO: handle situations where not all classes are present on all ranks?
64
+ y_train = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0])
65
+ X_test = np.array(
66
+ [
67
+ [1.0, -0.5],
68
+ [-5.0, 1.0],
69
+ [0.0, 1.0],
70
+ [10.0, -10.0],
71
+ ]
72
+ )
73
+
74
+ local_dpt_X_train = _convert_to_dataframe(
75
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
76
+ )
77
+ local_dpt_y_train = _convert_to_dataframe(
78
+ _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe
79
+ )
80
+ local_dpt_X_test = _convert_to_dataframe(
81
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
82
+ )
83
+
84
+ # Ensure predictions of batch algo match spmd
85
+ spmd_model = KNeighborsClassifier_SPMD(n_neighbors=1, algorithm="brute").fit(
86
+ local_dpt_X_train, local_dpt_y_train
87
+ )
88
+ batch_model = KNeighborsClassifier_Batch(n_neighbors=1, algorithm="brute").fit(
89
+ X_train, y_train
90
+ )
91
+ spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test)
92
+ batch_dists, batch_indcs = batch_model.kneighbors(X_test)
93
+ spmd_result = spmd_model.predict(local_dpt_X_test)
94
+ batch_result = batch_model.predict(X_test)
95
+
96
+ _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True)
97
+ _assert_unordered_allclose(spmd_dists, batch_dists, localize=True)
98
+ _spmd_assert_allclose(spmd_result, batch_result)
99
+
100
+
101
+ @pytest.mark.skipif(
102
+ not _mpi_libs_and_gpu_available,
103
+ reason="GPU device and MPI libs required for test",
104
+ )
105
+ @pytest.mark.parametrize("n_samples", [200, 10000])
106
+ @pytest.mark.parametrize("n_features_and_classes", [(5, 2), (25, 2), (25, 10)])
107
+ @pytest.mark.parametrize("n_neighbors", [1, 5, 20])
108
+ @pytest.mark.parametrize("weights", ["uniform", "distance"])
109
+ @pytest.mark.parametrize(
110
+ "dataframe,queue",
111
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
112
+ )
113
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
114
+ @pytest.mark.mpi
115
+ def test_knncls_spmd_synthetic(
116
+ n_samples,
117
+ n_features_and_classes,
118
+ n_neighbors,
119
+ weights,
120
+ dataframe,
121
+ queue,
122
+ dtype,
123
+ metric="euclidean",
124
+ ):
125
+ n_features, n_classes = n_features_and_classes
126
+ # Import spmd and batch algo
127
+ from sklearnex.neighbors import KNeighborsClassifier as KNeighborsClassifier_Batch
128
+ from sklearnex.spmd.neighbors import KNeighborsClassifier as KNeighborsClassifier_SPMD
129
+
130
+ # Generate data and convert to dataframe
131
+ X_train, X_test, y_train, _ = _generate_classification_data(
132
+ n_samples, n_features, n_classes, dtype=dtype
133
+ )
134
+
135
+ local_dpt_X_train = _convert_to_dataframe(
136
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
137
+ )
138
+ local_dpt_y_train = _convert_to_dataframe(
139
+ _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe
140
+ )
141
+ local_dpt_X_test = _convert_to_dataframe(
142
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
143
+ )
144
+
145
+ # Ensure predictions of batch algo match spmd
146
+ spmd_model = KNeighborsClassifier_SPMD(
147
+ n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute"
148
+ ).fit(local_dpt_X_train, local_dpt_y_train)
149
+ batch_model = KNeighborsClassifier_Batch(
150
+ n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute"
151
+ ).fit(X_train, y_train)
152
+ spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test)
153
+ batch_dists, batch_indcs = batch_model.kneighbors(X_test)
154
+ spmd_result = spmd_model.predict(local_dpt_X_test)
155
+ batch_result = batch_model.predict(X_test)
156
+
157
+ tol = 1e-4
158
+ if dtype == np.float64:
159
+ _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True)
160
+ _assert_unordered_allclose(
161
+ spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol
162
+ )
163
+ _spmd_assert_allclose(spmd_result, batch_result)
164
+
165
+
166
+ @pytest.mark.skipif(
167
+ not _mpi_libs_and_gpu_available,
168
+ reason="GPU device and MPI libs required for test",
169
+ )
170
+ @pytest.mark.parametrize(
171
+ "dataframe,queue",
172
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
173
+ )
174
+ @pytest.mark.mpi
175
+ def test_knnreg_spmd_gold(dataframe, queue):
176
+ # Import spmd and batch algo
177
+ from sklearnex.neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch
178
+ from sklearnex.spmd.neighbors import KNeighborsRegressor as KNeighborsRegressor_SPMD
179
+
180
+ # Create gold data and convert to dataframe
181
+ X_train = np.array(
182
+ [
183
+ [0.0, 0.0],
184
+ [0.0, 1.0],
185
+ [1.0, 0.0],
186
+ [0.0, 2.0],
187
+ [2.0, 0.0],
188
+ [1.0, 1.0],
189
+ [0.0, -1.0],
190
+ [-1.0, 0.0],
191
+ [-1.0, -1.0],
192
+ ]
193
+ )
194
+ y_train = np.array([3.0, 5.0, 4.0, 7.0, 5.0, 6.0, 1.0, 2.0, 0.0])
195
+ X_test = np.array(
196
+ [
197
+ [1.0, -0.5],
198
+ [-5.0, 1.0],
199
+ [0.0, 1.0],
200
+ [10.0, -10.0],
201
+ ]
202
+ )
203
+
204
+ local_dpt_X_train = _convert_to_dataframe(
205
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
206
+ )
207
+ local_dpt_y_train = _convert_to_dataframe(
208
+ _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe
209
+ )
210
+ local_dpt_X_test = _convert_to_dataframe(
211
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
212
+ )
213
+
214
+ # Ensure predictions of batch algo match spmd
215
+ spmd_model = KNeighborsRegressor_SPMD(n_neighbors=1, algorithm="brute").fit(
216
+ local_dpt_X_train, local_dpt_y_train
217
+ )
218
+ batch_model = KNeighborsRegressor_Batch(n_neighbors=1, algorithm="brute").fit(
219
+ X_train, y_train
220
+ )
221
+ spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test)
222
+ batch_dists, batch_indcs = batch_model.kneighbors(X_test)
223
+ spmd_result = spmd_model.predict(local_dpt_X_test)
224
+ batch_result = batch_model.predict(X_test)
225
+
226
+ _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True)
227
+ _assert_unordered_allclose(spmd_dists, batch_dists, localize=True)
228
+ _spmd_assert_allclose(spmd_result, batch_result)
229
+
230
+
231
+ @pytest.mark.skipif(
232
+ not _mpi_libs_and_gpu_available,
233
+ reason="GPU device and MPI libs required for test",
234
+ )
235
+ @pytest.mark.parametrize("n_samples", [200, 10000])
236
+ @pytest.mark.parametrize("n_features", [5, 25])
237
+ @pytest.mark.parametrize("n_neighbors", [1, 5, 20])
238
+ @pytest.mark.parametrize("weights", ["uniform", "distance"])
239
+ @pytest.mark.parametrize(
240
+ "metric", ["euclidean", "manhattan", "minkowski", "chebyshev", "cosine"]
241
+ )
242
+ @pytest.mark.parametrize(
243
+ "dataframe,queue",
244
+ get_dataframes_and_queues(dataframe_filter_="dpnp,dpctl", device_filter_="gpu"),
245
+ )
246
+ @pytest.mark.parametrize("dtype", [np.float32, np.float64])
247
+ @pytest.mark.mpi
248
+ def test_knnreg_spmd_synthetic(
249
+ n_samples, n_features, n_neighbors, weights, metric, dataframe, queue, dtype
250
+ ):
251
+ # Import spmd and batch algo
252
+ from sklearnex.neighbors import KNeighborsRegressor as KNeighborsRegressor_Batch
253
+ from sklearnex.spmd.neighbors import KNeighborsRegressor as KNeighborsRegressor_SPMD
254
+
255
+ # Generate data and convert to dataframe
256
+ X_train, X_test, y_train, _ = _generate_regression_data(
257
+ n_samples, n_features, dtype=dtype
258
+ )
259
+
260
+ local_dpt_X_train = _convert_to_dataframe(
261
+ _get_local_tensor(X_train), sycl_queue=queue, target_df=dataframe
262
+ )
263
+ local_dpt_y_train = _convert_to_dataframe(
264
+ _get_local_tensor(y_train), sycl_queue=queue, target_df=dataframe
265
+ )
266
+ local_dpt_X_test = _convert_to_dataframe(
267
+ _get_local_tensor(X_test), sycl_queue=queue, target_df=dataframe
268
+ )
269
+
270
+ # Ensure predictions of batch algo match spmd
271
+ spmd_model = KNeighborsRegressor_SPMD(
272
+ n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute"
273
+ ).fit(local_dpt_X_train, local_dpt_y_train)
274
+ batch_model = KNeighborsRegressor_Batch(
275
+ n_neighbors=n_neighbors, weights=weights, metric=metric, algorithm="brute"
276
+ ).fit(X_train, y_train)
277
+ spmd_dists, spmd_indcs = spmd_model.kneighbors(local_dpt_X_test)
278
+ batch_dists, batch_indcs = batch_model.kneighbors(X_test)
279
+ spmd_result = spmd_model.predict(local_dpt_X_test)
280
+ batch_result = batch_model.predict(X_test)
281
+
282
+ tol = 0.005 if dtype == np.float32 else 1e-4
283
+ if dtype == np.float64:
284
+ _assert_unordered_allclose(spmd_indcs, batch_indcs, localize=True)
285
+ _assert_unordered_allclose(
286
+ spmd_dists, batch_dists, localize=True, rtol=tol, atol=tol
287
+ )
288
+ _spmd_assert_allclose(spmd_result, batch_result, rtol=tol, atol=tol)
sklearnex/svm/_common.py CHANGED
@@ -14,16 +14,21 @@
14
14
  # limitations under the License.
15
15
  # ==============================================================================
16
16
 
17
+ import warnings
17
18
  from abc import ABC
19
+ from numbers import Number, Real
18
20
 
19
21
  import numpy as np
22
+ from scipy import sparse as sp
23
+ from sklearn.base import BaseEstimator, ClassifierMixin
20
24
  from sklearn.calibration import CalibratedClassifierCV
21
- from sklearn.model_selection import StratifiedKFold
25
+ from sklearn.metrics import r2_score
22
26
  from sklearn.preprocessing import LabelEncoder
23
27
 
24
28
  from daal4py.sklearn._utils import sklearn_check_version
25
- from onedal.utils import _column_or_1d
29
+ from onedal.utils import _check_array, _check_X_y, _column_or_1d
26
30
 
31
+ from .._config import config_context, get_config
27
32
  from .._utils import PatchingConditionsChain
28
33
 
29
34
 
@@ -51,7 +56,8 @@ def set_intercept(self, value):
51
56
  del self._onedal_estimator._onedal_model
52
57
 
53
58
 
54
- class BaseSVM(ABC):
59
+ class BaseSVM(BaseEstimator, ABC):
60
+
55
61
  def _onedal_gpu_supported(self, method_name, *data):
56
62
  patching_status = PatchingConditionsChain(f"sklearn.{method_name}")
57
63
  patching_status.and_conditions([(False, "GPU offloading is not supported.")])
@@ -74,7 +80,7 @@ class BaseSVM(ABC):
74
80
  )
75
81
  return patching_status
76
82
  inference_methods = (
77
- ["predict"]
83
+ ["predict", "score"]
78
84
  if class_name.endswith("R")
79
85
  else ["predict", "predict_proba", "decision_function", "score"]
80
86
  )
@@ -85,6 +91,130 @@ class BaseSVM(ABC):
85
91
  return patching_status
86
92
  raise RuntimeError(f"Unknown method {method_name} in {class_name}")
87
93
 
94
+ def _compute_gamma_sigma(self, X):
95
+ # only run extended conversion if kernel is not linear
96
+ # set to a value = 1.0, so gamma will always be passed to
97
+ # the onedal estimator as a float type
98
+ if self.kernel == "linear":
99
+ return 1.0
100
+
101
+ if isinstance(self.gamma, str):
102
+ if self.gamma == "scale":
103
+ if sp.issparse(X):
104
+ # var = E[X^2] - E[X]^2
105
+ X_sc = (X.multiply(X)).mean() - (X.mean()) ** 2
106
+ else:
107
+ X_sc = X.var()
108
+ _gamma = 1.0 / (X.shape[1] * X_sc) if X_sc != 0 else 1.0
109
+ elif self.gamma == "auto":
110
+ _gamma = 1.0 / X.shape[1]
111
+ else:
112
+ raise ValueError(
113
+ "When 'gamma' is a string, it should be either 'scale' or "
114
+ "'auto'. Got '{}' instead.".format(self.gamma)
115
+ )
116
+ else:
117
+ if sklearn_check_version("1.1") and not sklearn_check_version("1.2"):
118
+ if isinstance(self.gamma, Real):
119
+ if self.gamma <= 0:
120
+ msg = (
121
+ f"gamma value must be > 0; {self.gamma!r} is invalid. Use"
122
+ " a positive number or use 'auto' to set gamma to a"
123
+ " value of 1 / n_features."
124
+ )
125
+ raise ValueError(msg)
126
+ _gamma = self.gamma
127
+ else:
128
+ msg = (
129
+ "The gamma value should be set to 'scale', 'auto' or a"
130
+ f" positive float value. {self.gamma!r} is not a valid option"
131
+ )
132
+ raise ValueError(msg)
133
+ else:
134
+ _gamma = self.gamma
135
+ return _gamma
136
+
137
+ def _onedal_fit_checks(self, X, y, sample_weight=None):
138
+ if hasattr(self, "decision_function_shape"):
139
+ if self.decision_function_shape not in ("ovr", "ovo", None):
140
+ raise ValueError(
141
+ f"decision_function_shape must be either 'ovr' or 'ovo', "
142
+ f"got {self.decision_function_shape}."
143
+ )
144
+
145
+ if y is None:
146
+ if self._get_tags()["requires_y"]:
147
+ raise ValueError(
148
+ f"This {self.__class__.__name__} estimator "
149
+ f"requires y to be passed, but the target y is None."
150
+ )
151
+ # using onedal _check_X_y to insure X and y are contiguous
152
+ # finite check occurs in onedal estimator
153
+ X, y = _check_X_y(
154
+ X,
155
+ y,
156
+ dtype=[np.float64, np.float32],
157
+ force_all_finite=False,
158
+ accept_sparse="csr",
159
+ )
160
+ y = self._validate_targets(y)
161
+ sample_weight = self._get_sample_weight(X, y, sample_weight)
162
+ return X, y, sample_weight
163
+
164
+ def _get_sample_weight(self, X, y, sample_weight):
165
+ n_samples = X.shape[0]
166
+ dtype = X.dtype
167
+ if n_samples == 1:
168
+ raise ValueError("n_samples=1")
169
+
170
+ sample_weight = np.ascontiguousarray(
171
+ [] if sample_weight is None else sample_weight, dtype=np.float64
172
+ )
173
+
174
+ sample_weight_count = sample_weight.shape[0]
175
+ if sample_weight_count != 0 and sample_weight_count != n_samples:
176
+ raise ValueError(
177
+ "sample_weight and X have incompatible shapes: "
178
+ "%r vs %r\n"
179
+ "Note: Sparse matrices cannot be indexed w/"
180
+ "boolean masks (use `indices=True` in CV)."
181
+ % (len(sample_weight), X.shape)
182
+ )
183
+
184
+ if sample_weight_count == 0:
185
+ if not isinstance(self, ClassifierMixin) or self.class_weight_ is None:
186
+ return None
187
+ sample_weight = np.ones(n_samples, dtype=dtype)
188
+ elif isinstance(sample_weight, Number):
189
+ sample_weight = np.full(n_samples, sample_weight, dtype=dtype)
190
+ else:
191
+ sample_weight = _check_array(
192
+ sample_weight,
193
+ accept_sparse=False,
194
+ ensure_2d=False,
195
+ dtype=dtype,
196
+ order="C",
197
+ )
198
+ if sample_weight.ndim != 1:
199
+ raise ValueError("Sample weights must be 1D array or scalar")
200
+
201
+ if sample_weight.shape != (n_samples,):
202
+ raise ValueError(
203
+ "sample_weight.shape == {}, expected {}!".format(
204
+ sample_weight.shape, (n_samples,)
205
+ )
206
+ )
207
+
208
+ if np.all(sample_weight <= 0):
209
+ if "nusvc" in self.__module__:
210
+ raise ValueError("negative dimensions are not allowed")
211
+ else:
212
+ raise ValueError(
213
+ "Invalid input - all samples have zero or negative weights."
214
+ )
215
+
216
+ return sample_weight
217
+
88
218
 
89
219
  class BaseSVC(BaseSVM):
90
220
  def _compute_balanced_class_weight(self, y):
@@ -100,27 +230,34 @@ class BaseSVC(BaseSVM):
100
230
  return recip_freq[le.transform(classes)]
101
231
 
102
232
  def _fit_proba(self, X, y, sample_weight=None, queue=None):
233
+ # TODO: rewrite this method when probabilities output is implemented in oneDAL
234
+
235
+ # LibSVM uses the random seed to control cross-validation for probability generation
236
+ # CalibratedClassifierCV with "prefit" does not use an RNG nor a seed. This may
237
+ # impact users without their knowledge, so display a warning.
238
+ if self.random_state is not None:
239
+ warnings.warn(
240
+ "random_state does not influence oneDAL SVM results",
241
+ RuntimeWarning,
242
+ )
243
+
103
244
  params = self.get_params()
104
245
  params["probability"] = False
105
246
  params["decision_function_shape"] = "ovr"
106
247
  clf_base = self.__class__(**params)
107
248
 
108
- try:
109
- n_splits = 5
110
- n_jobs = n_splits if queue is None or queue.sycl_device.is_cpu else 1
111
- cv = StratifiedKFold(
112
- n_splits=n_splits, shuffle=True, random_state=self.random_state
113
- )
249
+ # We use stock metaestimators below, so the only way
250
+ # to pass a queue is using config_context.
251
+ cfg = get_config()
252
+ cfg["target_offload"] = queue
253
+ with config_context(**cfg):
254
+ clf_base.fit(X, y)
114
255
  self.clf_prob = CalibratedClassifierCV(
115
- clf_base, ensemble=False, cv=cv, method="sigmoid", n_jobs=n_jobs
116
- )
117
- self.clf_prob.fit(X, y, sample_weight)
118
- except ValueError:
119
- clf_base = clf_base.fit(X, y, sample_weight)
120
- self.clf_prob = CalibratedClassifierCV(
121
- clf_base, cv="prefit", method="sigmoid"
122
- )
123
- self.clf_prob.fit(X, y, sample_weight)
256
+ clf_base,
257
+ ensemble=False,
258
+ cv="prefit",
259
+ method="sigmoid",
260
+ ).fit(X, y)
124
261
 
125
262
  def _save_attributes(self):
126
263
  self.support_vectors_ = self._onedal_estimator.support_vectors_
@@ -129,7 +266,8 @@ class BaseSVC(BaseSVM):
129
266
  self.dual_coef_ = self._onedal_estimator.dual_coef_
130
267
  self.shape_fit_ = self._onedal_estimator.class_weight_
131
268
  self.classes_ = self._onedal_estimator.classes_
132
- self.class_weight_ = self._onedal_estimator.class_weight_
269
+ if isinstance(self, ClassifierMixin) or not sklearn_check_version("1.2"):
270
+ self.class_weight_ = self._onedal_estimator.class_weight_
133
271
  self.support_ = self._onedal_estimator.support_
134
272
 
135
273
  self._intercept_ = self._onedal_estimator.intercept_
@@ -183,3 +321,8 @@ class BaseSVR(BaseSVM):
183
321
 
184
322
  if sklearn_check_version("1.1"):
185
323
  self.n_iter_ = self._onedal_estimator.n_iter_
324
+
325
+ def _onedal_score(self, X, y, sample_weight=None, queue=None):
326
+ return r2_score(
327
+ y, self._onedal_predict(X, queue=queue), sample_weight=sample_weight
328
+ )
sklearnex/svm/nusvc.py CHANGED
@@ -83,6 +83,17 @@ class NuSVC(sklearn_NuSVC, BaseSVC):
83
83
  def fit(self, X, y, sample_weight=None):
84
84
  if sklearn_check_version("1.2"):
85
85
  self._validate_params()
86
+ elif self.nu <= 0 or self.nu > 1:
87
+ # else if added to correct issues with
88
+ # sklearn tests:
89
+ # svm/tests/test_sparse.py::test_error
90
+ # svm/tests/test_svm.py::test_bad_input
91
+ # for sklearn versions < 1.2 (i.e. without
92
+ # validate_params parameter checking)
93
+ # Without this, a segmentation fault with
94
+ # Windows fatal exception: access violation
95
+ # occurs
96
+ raise ValueError("nu <= 0 or nu > 1")
86
97
  if sklearn_check_version("1.0"):
87
98
  self._check_feature_names(X, reset=True)
88
99
  dispatch(
@@ -94,7 +105,7 @@ class NuSVC(sklearn_NuSVC, BaseSVC):
94
105
  },
95
106
  X,
96
107
  y,
97
- sample_weight,
108
+ sample_weight=sample_weight,
98
109
  )
99
110
 
100
111
  return self
@@ -242,12 +253,31 @@ class NuSVC(sklearn_NuSVC, BaseSVC):
242
253
 
243
254
  decision_function.__doc__ = sklearn_NuSVC.decision_function.__doc__
244
255
 
256
+ def _get_sample_weight(self, X, y, sample_weight=None):
257
+ sample_weight = super()._get_sample_weight(X, y, sample_weight)
258
+ if sample_weight is None:
259
+ return sample_weight
260
+
261
+ weight_per_class = [
262
+ np.sum(sample_weight[y == class_label]) for class_label in np.unique(y)
263
+ ]
264
+
265
+ for i in range(len(weight_per_class)):
266
+ for j in range(i + 1, len(weight_per_class)):
267
+ if self.nu * (weight_per_class[i] + weight_per_class[j]) / 2 > min(
268
+ weight_per_class[i], weight_per_class[j]
269
+ ):
270
+ raise ValueError("specified nu is infeasible")
271
+
272
+ return sample_weight
273
+
245
274
  def _onedal_fit(self, X, y, sample_weight=None, queue=None):
275
+ X, _, weights = self._onedal_fit_checks(X, y, sample_weight)
246
276
  onedal_params = {
247
277
  "nu": self.nu,
248
278
  "kernel": self.kernel,
249
279
  "degree": self.degree,
250
- "gamma": self.gamma,
280
+ "gamma": self._compute_gamma_sigma(X),
251
281
  "coef0": self.coef0,
252
282
  "tol": self.tol,
253
283
  "shrinking": self.shrinking,
@@ -259,10 +289,16 @@ class NuSVC(sklearn_NuSVC, BaseSVC):
259
289
  }
260
290
 
261
291
  self._onedal_estimator = onedal_NuSVC(**onedal_params)
262
- self._onedal_estimator.fit(X, y, sample_weight, queue=queue)
292
+ self._onedal_estimator.fit(X, y, weights, queue=queue)
263
293
 
264
294
  if self.probability:
265
- self._fit_proba(X, y, sample_weight, queue=queue)
295
+ self._fit_proba(
296
+ X,
297
+ y,
298
+ sample_weight=sample_weight,
299
+ queue=queue,
300
+ )
301
+
266
302
  self._save_attributes()
267
303
 
268
304
  def _onedal_predict(self, X, queue=None):
sklearnex/svm/nusvr.py CHANGED
@@ -65,6 +65,17 @@ class NuSVR(sklearn_NuSVR, BaseSVR):
65
65
  def fit(self, X, y, sample_weight=None):
66
66
  if sklearn_check_version("1.2"):
67
67
  self._validate_params()
68
+ elif self.nu <= 0 or self.nu > 1:
69
+ # else if added to correct issues with
70
+ # sklearn tests:
71
+ # svm/tests/test_sparse.py::test_error
72
+ # svm/tests/test_svm.py::test_bad_input
73
+ # for sklearn versions < 1.2 (i.e. without
74
+ # validate_params parameter checking)
75
+ # Without this, a segmentation fault with
76
+ # Windows fatal exception: access violation
77
+ # occurs
78
+ raise ValueError("nu <= 0 or nu > 1")
68
79
  if sklearn_check_version("1.0"):
69
80
  self._check_feature_names(X, reset=True)
70
81
  dispatch(
@@ -76,7 +87,7 @@ class NuSVR(sklearn_NuSVR, BaseSVR):
76
87
  },
77
88
  X,
78
89
  y,
79
- sample_weight,
90
+ sample_weight=sample_weight,
80
91
  )
81
92
  return self
82
93
 
@@ -94,13 +105,30 @@ class NuSVR(sklearn_NuSVR, BaseSVR):
94
105
  X,
95
106
  )
96
107
 
108
+ @wrap_output_data
109
+ def score(self, X, y, sample_weight=None):
110
+ if sklearn_check_version("1.0"):
111
+ self._check_feature_names(X, reset=False)
112
+ return dispatch(
113
+ self,
114
+ "score",
115
+ {
116
+ "onedal": self.__class__._onedal_score,
117
+ "sklearn": sklearn_NuSVR.score,
118
+ },
119
+ X,
120
+ y,
121
+ sample_weight=sample_weight,
122
+ )
123
+
97
124
  def _onedal_fit(self, X, y, sample_weight=None, queue=None):
125
+ X, _, sample_weight = self._onedal_fit_checks(X, y, sample_weight)
98
126
  onedal_params = {
99
127
  "C": self.C,
100
128
  "nu": self.nu,
101
129
  "kernel": self.kernel,
102
130
  "degree": self.degree,
103
- "gamma": self.gamma,
131
+ "gamma": self._compute_gamma_sigma(X),
104
132
  "coef0": self.coef0,
105
133
  "tol": self.tol,
106
134
  "shrinking": self.shrinking,
@@ -117,3 +145,4 @@ class NuSVR(sklearn_NuSVR, BaseSVR):
117
145
 
118
146
  fit.__doc__ = sklearn_NuSVR.fit.__doc__
119
147
  predict.__doc__ = sklearn_NuSVR.predict.__doc__
148
+ score.__doc__ = sklearn_NuSVR.score.__doc__