optiml 1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. optiml/__init__.py +0 -0
  2. optiml/ml/__init__.py +0 -0
  3. optiml/ml/neural_network/__init__.py +3 -0
  4. optiml/ml/neural_network/_base.py +475 -0
  5. optiml/ml/neural_network/activations.py +79 -0
  6. optiml/ml/neural_network/initializers.py +66 -0
  7. optiml/ml/neural_network/layers.py +183 -0
  8. optiml/ml/neural_network/losses.py +178 -0
  9. optiml/ml/neural_network/regularizers.py +87 -0
  10. optiml/ml/svm/__init__.py +3 -0
  11. optiml/ml/svm/_base.py +1442 -0
  12. optiml/ml/svm/kernels.py +208 -0
  13. optiml/ml/svm/losses.py +284 -0
  14. optiml/ml/svm/smo.py +797 -0
  15. optiml/ml/tests/__init__.py +0 -0
  16. optiml/ml/tests/_datasets.py +49 -0
  17. optiml/ml/tests/_utils.py +28 -0
  18. optiml/ml/tests/test_initializers.py +33 -0
  19. optiml/ml/tests/test_neural_network.py +86 -0
  20. optiml/ml/tests/test_svc.py +245 -0
  21. optiml/ml/tests/test_svr.py +256 -0
  22. optiml/ml/utils.py +252 -0
  23. optiml/opti/__init__.py +4 -0
  24. optiml/opti/_base.py +309 -0
  25. optiml/opti/constrained/__init__.py +9 -0
  26. optiml/opti/constrained/_base.py +404 -0
  27. optiml/opti/constrained/active_set.py +228 -0
  28. optiml/opti/constrained/frank_wolfe.py +158 -0
  29. optiml/opti/constrained/interior_point.py +282 -0
  30. optiml/opti/constrained/projected_gradient.py +138 -0
  31. optiml/opti/constrained/tests/__init__.py +0 -0
  32. optiml/opti/constrained/tests/test_active_set.py +16 -0
  33. optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
  34. optiml/opti/constrained/tests/test_interior_point.py +16 -0
  35. optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
  36. optiml/opti/constrained/tests/test_lower_bound.py +29 -0
  37. optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
  38. optiml/opti/unconstrained/__init__.py +6 -0
  39. optiml/opti/unconstrained/_base.py +63 -0
  40. optiml/opti/unconstrained/line_search/__init__.py +10 -0
  41. optiml/opti/unconstrained/line_search/_base.py +106 -0
  42. optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
  43. optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
  44. optiml/opti/unconstrained/line_search/line_search.py +248 -0
  45. optiml/opti/unconstrained/line_search/newton.py +198 -0
  46. optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
  47. optiml/opti/unconstrained/proximal_bundle.py +219 -0
  48. optiml/opti/unconstrained/stochastic/__init__.py +12 -0
  49. optiml/opti/unconstrained/stochastic/_base.py +246 -0
  50. optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
  51. optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
  52. optiml/opti/unconstrained/stochastic/adam.py +179 -0
  53. optiml/opti/unconstrained/stochastic/adamax.py +178 -0
  54. optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
  55. optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
  56. optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
  57. optiml/opti/unconstrained/stochastic/schedules.py +89 -0
  58. optiml/opti/unconstrained/tests/__init__.py +0 -0
  59. optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
  60. optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
  61. optiml/opti/unconstrained/tests/test_adam.py +42 -0
  62. optiml/opti/unconstrained/tests/test_adamax.py +41 -0
  63. optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
  64. optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
  65. optiml/opti/unconstrained/tests/test_functions.py +34 -0
  66. optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
  67. optiml/opti/unconstrained/tests/test_newton.py +20 -0
  68. optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
  69. optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
  70. optiml/opti/unconstrained/tests/test_verbose.py +25 -0
  71. optiml/opti/utils.py +353 -0
  72. optiml-1.7.dist-info/METADATA +203 -0
  73. optiml-1.7.dist-info/RECORD +76 -0
  74. optiml-1.7.dist-info/WHEEL +5 -0
  75. optiml-1.7.dist-info/licenses/LICENSE +21 -0
  76. optiml-1.7.dist-info/top_level.txt +1 -0
optiml/ml/svm/smo.py ADDED
@@ -0,0 +1,797 @@
1
+ import sys
2
+ import warnings
3
+ from abc import ABC
4
+
5
+ import numpy as np
6
+ from sklearn.exceptions import PositiveSpectrumWarning
7
+
8
+ from .kernels import LinearKernel
9
+
10
+
11
+ class SMO(ABC):
12
+ """
13
+ Base abstract class for the sequential minimal optimization (SMO)
14
+ algorithm used to train the dual SVM formulation. It holds the data,
15
+ the kernel matrix and the optimization state shared by the classifier
16
+ and regression variants.
17
+
18
+ Subclasses must implement ``_take_step``, ``_examine_example`` and ``minimize``.
19
+ """
20
+
21
+ def __init__(self, quad, X, y, K, kernel, C, tol=1e-3, verbose=False):
22
+ """
23
+ Parameters
24
+ ----------
25
+
26
+ quad : `Quadratic` instance
27
+ The quadratic objective of the dual problem, used to monitor
28
+ the cost during the optimization.
29
+
30
+ X : ndarray of shape (n_samples, n_features)
31
+ Training data.
32
+
33
+ y : ndarray of shape (n_samples,)
34
+ Target values associated with ``X``.
35
+
36
+ K : ndarray of shape (n_samples, n_samples)
37
+ Precomputed kernel (Gram) matrix of the training data.
38
+
39
+ kernel : `Kernel` instance
40
+ The kernel function used to build ``K``. If it is a `LinearKernel`
41
+ the primal weight vector ``w`` is maintained explicitly.
42
+
43
+ C : float
44
+ Regularization parameter, i.e., the upper bound on the
45
+ Lagrange multipliers.
46
+
47
+ tol : float, default=1e-3
48
+ Tolerance for the KKT stopping criterion.
49
+
50
+ verbose : bool or int, default=False
51
+ Controls the verbosity of progress messages to stdout.
52
+ """
53
+ self.quad = quad
54
+ self.X = X
55
+ self.y = y
56
+ self.K = K
57
+ self.kernel = kernel
58
+ if isinstance(kernel, LinearKernel):
59
+ self.w = 0.
60
+ self.b = 0.
61
+ self.C = C
62
+ self.errors = np.zeros(len(X))
63
+ self.tol = tol
64
+ self.iter = 0
65
+ self.verbose = verbose
66
+
67
+ def _take_step(self, i1, i2):
68
+ raise NotImplementedError
69
+
70
+ def _examine_example(self, i2):
71
+ raise NotImplementedError
72
+
73
+ def minimize(self):
74
+ raise NotImplementedError
75
+
76
+
77
+ class SMOClassifier(SMO):
78
+ """
79
+ Implements John Platt's sequential minimal optimization
80
+ algorithm for training a support vector classifier.
81
+
82
+ The SMO algorithm is an algorithm for solving large quadratic programming (QP)
83
+ optimization problems, widely used for the training of support vector machines.
84
+ First developed by John C. Platt in 1998, SMO breaks up large QP problems into a
85
+ series of smallest possible QP problems, which are then solved analytically.
86
+
87
+ This class follows the original algorithm by Platt with additional modifications
88
+ by Keerthi et al.
89
+
90
+ References
91
+ ----------
92
+
93
+ John C. Platt. Sequential Minimal Optimization: A Fast Algorithm for Training Support Vector Machines.
94
+
95
+ S.S. Keerthi, S.K. Shevade, C. Bhattacharyya, K.R.K. Murthy. Improvements to Platt's SMO
96
+ Algorithm for SVM Classifier Design. Technical Report CD-99-14.
97
+ """
98
+
99
+ def __init__(self, quad, X, y, K, kernel, C, tol=1e-3, verbose=False):
100
+ self.alphas = np.zeros(len(X))
101
+ super(SMOClassifier, self).__init__(quad, X, y, K, kernel, C, tol, verbose)
102
+
103
+ # initialize variables and structures to implement improvements
104
+ # on the original Platt's SMO algorithm described in Keerthi et
105
+ # al. for better performance ed efficiency
106
+
107
+ # set of indices
108
+ # {i : 0 < alphas[i] < C}
109
+ self.I0 = set()
110
+ # {i : y[i] = +1, alphas[i] = 0}
111
+ self.I1 = set(i for i in range(len(X)) if y[i] == 1)
112
+ # {i : y[i] = -1, alphas[i] = C}
113
+ self.I2 = set()
114
+ # {i : y[i] = +1, alphas[i] = C}
115
+ self.I3 = set()
116
+ # {i : y[i] = -1, alphas[i] = 0}
117
+ self.I4 = set(i for i in range(len(X)) if y[i] == -1)
118
+
119
+ # multiple thresholds
120
+ self.b_up = -1
121
+ self.b_low = 1
122
+ # initialize b_up_idx to any one index of class +1
123
+ self.b_up_idx = next(i for i in range(len(X)) if y[i] == 1)
124
+ # initialize b_low_idx to any one index of class -1
125
+ self.b_low_idx = next(i for i in range(len(X)) if y[i] == -1)
126
+
127
+ self.errors[self.b_up_idx] = -1
128
+ self.errors[self.b_low_idx] = 1
129
+
130
+ def _take_step(self, i1, i2):
131
+ # skip if chosen alphas are the same
132
+ if i1 == i2:
133
+ return False
134
+
135
+ alpha1 = self.alphas[i1]
136
+ y1 = self.y[i1]
137
+ E1 = self.errors[i1]
138
+
139
+ alpha2 = self.alphas[i2]
140
+ y2 = self.y[i2]
141
+ E2 = self.errors[i2]
142
+
143
+ s = y1 * y2
144
+
145
+ # compute L and H, the bounds on new possible alpha values
146
+ # based on equations 13 and 14 in Platt's paper
147
+ if y1 != y2:
148
+ L = max(0, alpha2 - alpha1)
149
+ H = min(self.C, self.C + alpha2 - alpha1)
150
+ else:
151
+ L = max(0, alpha2 + alpha1 - self.C)
152
+ H = min(self.C, alpha2 + alpha1)
153
+
154
+ if L == H:
155
+ return False
156
+
157
+ # compute the 2nd derivative of the objective function along
158
+ # the diagonal line based on equation 15 in Platt's paper
159
+ eta = self.K[i1, i1] + self.K[i2, i2] - 2 * self.K[i1, i2]
160
+
161
+ # under normal circumstances, the objective function will be positive
162
+ # definite, there will be a minimum along the direction of the linear
163
+ # equality constraint, and eta will be greater than zero compute new
164
+ # alpha2, a2, if eta is positive based on equation 16 in Platt's paper
165
+ if eta > 0:
166
+ # clip a2 based on bounds L and H based
167
+ # on equation 17 in Platt's paper
168
+ a2 = max(L, min(alpha2 + y2 * (E1 - E2) / eta, H))
169
+ else:
170
+ Lobj = y2 * (E1 - E2) * L
171
+ Hobj = y2 * (E1 - E2) * H
172
+
173
+ if Lobj > Hobj + 1e-12:
174
+ a2 = L
175
+ elif Lobj < Hobj - 1e-12:
176
+ a2 = H
177
+ else:
178
+ a2 = alpha2
179
+
180
+ warnings.warn('kernel matrix is not positive definite', PositiveSpectrumWarning)
181
+
182
+ # if examples can't be optimized within tol, skip this pair
183
+ if abs(a2 - alpha2) < 1e-12 * (a2 + alpha2 + 1e-12):
184
+ return False
185
+
186
+ # calculate new alpha1 based on equation 18 in Platt's paper
187
+ a1 = alpha1 + s * (alpha2 - a2)
188
+
189
+ # update weight vector to reflect change in a1 and a2, if
190
+ # kernel is linear, based on equation 22 in Platt's paper
191
+ if isinstance(self.kernel, LinearKernel):
192
+ self.w += y1 * (a1 - alpha1) * self.X[i1] + y2 * (a2 - alpha2) * self.X[i2]
193
+
194
+ # update error cache using new alphas
195
+ for i in self.I0:
196
+ if i != i1 and i != i2:
197
+ self.errors[i] += y1 * (a1 - alpha1) * self.K[i1, i] + y2 * (a2 - alpha2) * self.K[i2, i]
198
+ # update error cache using new alphas for i1 and i2
199
+ self.errors[i1] += y1 * (a1 - alpha1) * self.K[i1, i1] + y2 * (a2 - alpha2) * self.K[i1, i2]
200
+ self.errors[i2] += y1 * (a1 - alpha1) * self.K[i1, i2] + y2 * (a2 - alpha2) * self.K[i2, i2]
201
+
202
+ # to prevent precision problems
203
+ if a2 > self.C - 1e-8 * self.C:
204
+ a2 = self.C
205
+ elif a2 <= 1e-8 * self.C:
206
+ a2 = 0.
207
+
208
+ if a1 > self.C - 1e-8 * self.C:
209
+ a1 = self.C
210
+ elif a1 <= 1e-8 * self.C:
211
+ a1 = 0.
212
+
213
+ # update model object with new alphas
214
+ self.alphas[i1] = a1
215
+ self.alphas[i2] = a2
216
+
217
+ # update the sets of indices for i1 and i2
218
+ for i in (i1, i2):
219
+ if 0 < self.alphas[i] < self.C:
220
+ self.I0.add(i)
221
+ else:
222
+ self.I0.discard(i)
223
+ if self.y[i] == 1 and self.alphas[i] == 0:
224
+ self.I1.add(i)
225
+ else:
226
+ self.I1.discard(i)
227
+ if self.y[i] == -1 and self.alphas[i] == self.C:
228
+ self.I2.add(i)
229
+ else:
230
+ self.I2.discard(i)
231
+ if self.y[i] == 1 and self.alphas[i] == self.C:
232
+ self.I3.add(i)
233
+ else:
234
+ self.I3.discard(i)
235
+ if self.y[i] == -1 and self.alphas[i] == 0:
236
+ self.I4.add(i)
237
+ else:
238
+ self.I4.discard(i)
239
+
240
+ # update thresholds (b_up, b_up_idx) and (b_low, b_low_idx)
241
+ # by applying equations 11a and 11b, using only i1, i2 and
242
+ # indices in I0 as suggested in item 3 of section 5 in
243
+ # Keerthi et al.
244
+ self.b_up_idx = -1
245
+ self.b_low_idx = -1
246
+ self.b_up = sys.float_info.max
247
+ self.b_low = -sys.float_info.max
248
+
249
+ for i in self.I0:
250
+ if self.errors[i] > self.b_low:
251
+ self.b_low = self.errors[i]
252
+ self.b_low_idx = i
253
+ if self.errors[i] < self.b_up:
254
+ self.b_up = self.errors[i]
255
+ self.b_up_idx = i
256
+ if i1 not in self.I0:
257
+ if i1 in self.I3 or i1 in self.I4:
258
+ if self.errors[i1] > self.b_low:
259
+ self.b_low = self.errors[i1]
260
+ self.b_low_idx = i1
261
+ elif self.errors[i1] < self.b_up:
262
+ self.b_up = self.errors[i1]
263
+ self.b_up_idx = i1
264
+ if i2 not in self.I0:
265
+ if i2 in self.I3 or i2 in self.I4:
266
+ if self.errors[i2] > self.b_low:
267
+ self.b_low = self.errors[i2]
268
+ self.b_low_idx = i2
269
+ elif self.errors[i2] < self.b_up:
270
+ self.b_up = self.errors[i2]
271
+ self.b_up_idx = i2
272
+
273
+ if self.b_low_idx == -1 or self.b_up_idx == -1:
274
+ raise Exception('unexpected status')
275
+
276
+ return True
277
+
278
+ def _examine_example(self, i2):
279
+ if i2 in self.I0:
280
+ E2 = self.errors[i2]
281
+ else:
282
+ E2 = (self.alphas * self.y).dot(self.K[i2]) - self.y[i2]
283
+ self.errors[i2] = E2
284
+
285
+ # update (b_up, b_up_idx) or (b_low, b_low_idx) using E2 and i2
286
+ if (i2 in self.I1 or i2 in self.I2) and E2 < self.b_up:
287
+ self.b_up = E2
288
+ self.b_up_idx = i2
289
+ elif (i2 in self.I3 or i2 in self.I4) and E2 > self.b_low:
290
+ self.b_low = E2
291
+ self.b_low_idx = i2
292
+
293
+ # check optimality using current b_up and b_low and, if violated,
294
+ # find another index i1 to do joint optimization with i2
295
+ i1 = -1
296
+ optimal = True
297
+ if i2 in self.I0 or i2 in self.I1 or i2 in self.I2:
298
+ if self.b_low - E2 > 2 * self.tol:
299
+ optimal = False
300
+ i1 = self.b_low_idx
301
+ if i2 in self.I0 or i2 in self.I3 or i2 in self.I4:
302
+ if E2 - self.b_up > 2 * self.tol:
303
+ optimal = False
304
+ i1 = self.b_up_idx
305
+
306
+ if optimal:
307
+ return False
308
+
309
+ # for i2 in I0 choose the better i1
310
+ if i2 in self.I0:
311
+ if self.b_low - E2 > E2 - self.b_up:
312
+ i1 = self.b_low_idx
313
+ else:
314
+ i1 = self.b_up_idx
315
+
316
+ if i1 == -1:
317
+ raise Exception('the index could not be found')
318
+
319
+ return self._take_step(i1, i2)
320
+
321
+ def minimize(self):
322
+ if self.verbose:
323
+ print('iter\t cost')
324
+
325
+ num_changed = 0
326
+ examine_all = True
327
+ while num_changed > 0 or examine_all:
328
+ num_changed = 0
329
+ # loop over all training examples
330
+ if examine_all:
331
+ for i in range(len(self.X)):
332
+ num_changed += self._examine_example(i)
333
+ else:
334
+ # loop over examples where alphas are not already at their limits
335
+ for i in range(len(self.X)):
336
+ if 0 < self.alphas[i] < self.C:
337
+ num_changed += self._examine_example(i)
338
+ # check if optimality on I0 is attained
339
+ if self.b_up > self.b_low - 2 * self.tol:
340
+ num_changed = 0
341
+ break
342
+ if examine_all:
343
+ examine_all = False
344
+ elif num_changed == 0:
345
+ examine_all = True
346
+
347
+ if self.verbose and not self.iter % self.verbose:
348
+ print('{:4d}\t{: 1.4e}'.format(self.iter, self.quad.function(self.alphas)))
349
+
350
+ self.iter += 1
351
+
352
+ self.b = -(self.b_low + self.b_up) / 2
353
+
354
+ if self.verbose:
355
+ print()
356
+
357
+ return self
358
+
359
+
360
+ class SMORegression(SMO):
361
+ """
362
+ Implements Smola and Scholkopf sequential minimal optimization
363
+ algorithm for training a support vector regression.
364
+
365
+ The SMO algorithm is an algorithm for solving large quadratic programming (QP)
366
+ optimization problems, widely used for the training of support vector machines.
367
+ First developed by John C. Platt in 1998, SMO breaks up large QP problems into a
368
+ series of smallest possible QP problems, which are then solved analytically.
369
+
370
+ This class incorporates modifications in the original SMO algorithm to solve
371
+ regression problems as suggested by Alex J. Smola and Bernhard Scholkopf and
372
+ further modifications for better performance by Shevade et al.
373
+
374
+ References
375
+ ----------
376
+
377
+ G.W. Flake, S. Lawrence. Efficient SVM Regression Training with SMO.
378
+
379
+ Alex J. Smola, Bernhard Scholkopf. A Tutorial on Support Vector Regression.
380
+ NeuroCOLT2 Technical Report Series NC2-TR-1998-030.
381
+
382
+ S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy. Improvements to SMO
383
+ Algorithm for SVM Regression. Technical Report CD-99-16.
384
+ """
385
+
386
+ def __init__(self, quad, X, y, K, kernel, C, epsilon, tol=1e-3, verbose=False):
387
+ """
388
+ Parameters
389
+ ----------
390
+
391
+ quad : `Quadratic` instance
392
+ The quadratic objective of the dual problem, used to monitor
393
+ the cost during the optimization.
394
+
395
+ X : ndarray of shape (n_samples, n_features)
396
+ Training data.
397
+
398
+ y : ndarray of shape (n_samples,)
399
+ Target values associated with ``X``.
400
+
401
+ K : ndarray of shape (n_samples, n_samples)
402
+ Precomputed kernel (Gram) matrix of the training data.
403
+
404
+ kernel : `Kernel` instance
405
+ The kernel function used to build ``K``. If it is a `LinearKernel`
406
+ the primal weight vector ``w`` is maintained explicitly.
407
+
408
+ C : float
409
+ Regularization parameter, i.e., the upper bound on the
410
+ Lagrange multipliers.
411
+
412
+ epsilon : float
413
+ Width of the epsilon-tube of the epsilon-insensitive loss within
414
+ which no penalty is associated in the regression problem.
415
+
416
+ tol : float, default=1e-3
417
+ Tolerance for the KKT stopping criterion.
418
+
419
+ verbose : bool or int, default=False
420
+ Controls the verbosity of progress messages to stdout.
421
+ """
422
+ self.alphas_p = np.zeros(len(X))
423
+ self.alphas_n = np.zeros(len(X))
424
+ super(SMORegression, self).__init__(quad, X, y, K, kernel, C, tol, verbose)
425
+ self.epsilon = epsilon
426
+
427
+ # initialize variables and structures to implement improvements
428
+ # on the original Smola and Scholkopf SMO algorithm described in
429
+ # Shevade et al. for better performance ed efficiency
430
+
431
+ # set of indices
432
+ # {i : 0 < alphas_p[i] < C, 0 < alphas_n[i] < C}
433
+ self.I0 = set()
434
+ # {i : alphas_p[i] = 0, alphas_n[i] = 0}
435
+ self.I1 = set(range(len(X)))
436
+ # {i : alphas_p[i] = 0, alphas_n[i] = C}
437
+ self.I2 = set()
438
+ # {i : alphas_p[i] = C, alphas_n[i] = 0}
439
+ self.I3 = set()
440
+
441
+ # multiple thresholds
442
+ self.b_up_idx = 0
443
+ self.b_low_idx = 0
444
+ self.b_up = y[self.b_up_idx] + self.epsilon
445
+ self.b_low = y[self.b_low_idx] - self.epsilon
446
+
447
+ def _take_step(self, i1, i2):
448
+ # skip if chosen alphas are the same
449
+ if i1 == i2:
450
+ return False
451
+
452
+ alpha1_p, alpha1_n = self.alphas_p[i1], self.alphas_n[i1]
453
+ E1 = self.errors[i1]
454
+
455
+ alpha2_p, alpha2_n = self.alphas_p[i2], self.alphas_n[i2]
456
+ E2 = self.errors[i2]
457
+
458
+ # compute kernel and 2nd derivative eta
459
+ # based on equation 15 in Platt's paper
460
+ eta = self.K[i1, i1] + self.K[i2, i2] - 2 * self.K[i1, i2]
461
+
462
+ if eta < 0:
463
+ eta = 0
464
+
465
+ gamma = alpha1_p - alpha1_n + alpha2_p - alpha2_n
466
+
467
+ case1 = case2 = case3 = case4 = False
468
+ changed = finished = False
469
+
470
+ delta_E = E1 - E2
471
+
472
+ while not finished: # occurs at most three times
473
+ if (not case1 and
474
+ (alpha1_p > 0 or (alpha1_n == 0 and delta_E > 0)) and
475
+ (alpha2_p > 0 or (alpha2_n == 0 and delta_E < 0))):
476
+ # compute L and H wrt alpha1_p, alpha2_p
477
+ L = max(0, gamma - self.C)
478
+ H = min(self.C, gamma)
479
+ if L < H:
480
+ if eta > 0:
481
+ a2 = max(L, min(alpha2_p - delta_E / eta, H))
482
+ else:
483
+ Lobj = -L * delta_E
484
+ Hobj = -H * delta_E
485
+ a2 = L if Lobj > Hobj else H
486
+ warnings.warn('kernel matrix is not positive definite', PositiveSpectrumWarning)
487
+ a1 = alpha1_p - (a2 - alpha2_p)
488
+ # update alpha1, alpha2_p if change is larger than some eps
489
+ if abs(a1 - alpha1_p) > 1e-12 or abs(a2 - alpha2_p) > 1e-12:
490
+ alpha1_p = a1
491
+ alpha2_p = a2
492
+ changed = True
493
+ else:
494
+ finished = True
495
+ case1 = True
496
+ elif (not case2 and
497
+ (alpha1_p > 0 or (alpha1_n == 0 and delta_E > 2 * self.epsilon)) and
498
+ (alpha2_n > 0 or (alpha2_p == 0 and delta_E > 2 * self.epsilon))):
499
+ # compute L and H wrt alpha1_p, alpha2_n
500
+ L = max(0, -gamma)
501
+ H = min(self.C, -gamma + self.C)
502
+ if L < H:
503
+ if eta > 0:
504
+ a2 = max(L, min(alpha2_n + (delta_E - 2 * self.epsilon) / eta, H))
505
+ else:
506
+ Lobj = L * (-2 * self.epsilon + delta_E)
507
+ Hobj = H * (-2 * self.epsilon + delta_E)
508
+ a2 = L if Lobj > Hobj else H
509
+ warnings.warn('kernel matrix is not positive definite', PositiveSpectrumWarning)
510
+ a1 = alpha1_p + (a2 - alpha2_n)
511
+ # update alpha1, alpha2_n if change is larger than some eps
512
+ if abs(a1 - alpha1_p) > 1e-12 or abs(a2 - alpha2_n) > 1e-12:
513
+ alpha1_p = a1
514
+ alpha2_n = a2
515
+ changed = True
516
+ else:
517
+ finished = True
518
+ case2 = True
519
+ elif (not case3 and
520
+ (alpha1_n > 0 or (alpha1_p == 0 and delta_E < -2 * self.epsilon)) and
521
+ (alpha2_p > 0 or (alpha2_n == 0 and delta_E < -2 * self.epsilon))):
522
+ # computer L and H wrt alpha1_n, alpha2_p
523
+ L = max(0, gamma)
524
+ H = min(self.C, self.C + gamma)
525
+ if L < H:
526
+ if eta > 0:
527
+ a2 = max(L, min(alpha2_p - (delta_E + 2 * self.epsilon) / eta, H))
528
+ else:
529
+ Lobj = -L * (2 * self.epsilon + delta_E)
530
+ Hobj = -H * (2 * self.epsilon + delta_E)
531
+ a2 = L if Lobj > Hobj else H
532
+ warnings.warn('kernel matrix is not positive definite', PositiveSpectrumWarning)
533
+ a1 = alpha1_n + (a2 - alpha2_p)
534
+ # update alpha1_n, alpha2_p if change is larger than some eps
535
+ if abs(a1 - alpha1_n) > 1e-12 or abs(a2 - alpha2_p) > 1e-12:
536
+ alpha1_n = a1
537
+ alpha2_p = a2
538
+ changed = True
539
+ else:
540
+ finished = True
541
+ case3 = True
542
+ elif (not case4 and
543
+ (alpha1_n > 0 or (alpha1_p == 0 and delta_E < 0)) and
544
+ (alpha2_n > 0 or (alpha2_p == 0 and delta_E > 0))):
545
+ # compute L and H wrt alpha1_n, alpha2_n
546
+ L = max(0, -gamma - self.C)
547
+ H = min(self.C, -gamma)
548
+ if L < H:
549
+ if eta > 0:
550
+ a2 = max(L, min(alpha2_n + delta_E / eta, H))
551
+ else:
552
+ Lobj = L * delta_E
553
+ Hobj = H * delta_E
554
+ a2 = L if Lobj > Hobj else H
555
+ warnings.warn('kernel matrix is not positive definite', PositiveSpectrumWarning)
556
+ a1 = alpha1_n - (a2 - alpha2_n)
557
+ # update alpha1_n, alpha2_n if change is larger than some eps
558
+ if abs(a1 - alpha1_n) > 1e-12 or abs(a2 - alpha2_n) > 1e-12:
559
+ alpha1_n = a1
560
+ alpha2_n = a2
561
+ changed = True
562
+ else:
563
+ finished = True
564
+ case4 = True
565
+ else:
566
+ finished = True
567
+
568
+ delta_E += eta * ((alpha2_p - alpha2_n) - (self.alphas_p[i2] - self.alphas_n[i2]))
569
+
570
+ if not changed:
571
+ return False
572
+
573
+ # if kernel is liner update weight vector
574
+ # to reflect change in a1 and a2
575
+ if isinstance(self.kernel, LinearKernel):
576
+ self.w -= (((self.alphas_p[i1] - self.alphas_n[i1]) - (alpha1_p - alpha1_n)) * self.X[i1] +
577
+ ((self.alphas_p[i2] - self.alphas_n[i2]) - (alpha2_p - alpha2_n)) * self.X[i2])
578
+
579
+ # update error cache using new alphas
580
+ for i in self.I0:
581
+ if i != i1 and i != i2:
582
+ self.errors[i] += (
583
+ ((self.alphas_p[i1] - self.alphas_n[i1]) - (alpha1_p - alpha1_n)) * self.K[i1, i] +
584
+ ((self.alphas_p[i2] - self.alphas_n[i2]) - (alpha2_p - alpha2_n)) * self.K[i2, i])
585
+ # update error cache using new alphas for i1 and i2
586
+ self.errors[i1] += (((self.alphas_p[i1] - self.alphas_n[i1]) - (alpha1_p - alpha1_n)) * self.K[i1, i1] +
587
+ ((self.alphas_p[i2] - self.alphas_n[i2]) - (alpha2_p - alpha2_n)) * self.K[i1, i2])
588
+ self.errors[i2] += (((self.alphas_p[i1] - self.alphas_n[i1]) - (alpha1_p - alpha1_n)) * self.K[i1, i2] +
589
+ ((self.alphas_p[i2] - self.alphas_n[i2]) - (alpha2_p - alpha2_n)) * self.K[i2, i2])
590
+
591
+ # to prevent precision problems
592
+ if alpha1_p > self.C - 1e-10 * self.C:
593
+ alpha1_p = self.C
594
+ elif alpha1_p <= 1e-10 * self.C:
595
+ alpha1_p = 0
596
+
597
+ if alpha1_n > self.C - 1e-10 * self.C:
598
+ alpha1_n = self.C
599
+ elif alpha1_n <= 1e-10 * self.C:
600
+ alpha1_n = 0
601
+
602
+ if alpha2_p > self.C - 1e-10 * self.C:
603
+ alpha2_p = self.C
604
+ elif alpha2_p <= 1e-10 * self.C:
605
+ alpha2_p = 0
606
+
607
+ if alpha2_n > self.C - 1e-10 * self.C:
608
+ alpha2_n = self.C
609
+ elif alpha2_n <= 1e-10 * self.C:
610
+ alpha2_n = 0
611
+
612
+ # update model object with new alphas
613
+ self.alphas_p[i1], self.alphas_p[i2] = alpha1_p, alpha2_p
614
+ self.alphas_n[i1], self.alphas_n[i2] = alpha1_n, alpha2_n
615
+
616
+ # update the sets of indices for i1 and i2
617
+ for i in (i1, i2):
618
+ if 0 < self.alphas_p[i] < self.C or 0 < self.alphas_n[i] < self.C:
619
+ self.I0.add(i)
620
+ else:
621
+ self.I0.discard(i)
622
+ if self.alphas_p[i] == 0 and self.alphas_n[i] == 0:
623
+ self.I1.add(i)
624
+ else:
625
+ self.I1.discard(i)
626
+ if self.alphas_p[i] == 0 and self.alphas_n[i] == self.C:
627
+ self.I2.add(i)
628
+ else:
629
+ self.I2.discard(i)
630
+ if self.alphas_p[i] == self.C and self.alphas_n[i] == 0:
631
+ self.I3.add(i)
632
+ else:
633
+ self.I3.discard(i)
634
+
635
+ # update thresholds
636
+ self.b_up_idx = -1
637
+ self.b_low_idx = -1
638
+ self.b_up = sys.float_info.max
639
+ self.b_low = -sys.float_info.max
640
+
641
+ for i in self.I0:
642
+ if 0 < self.alphas_p[i] < self.C and self.errors[i] - self.epsilon > self.b_low:
643
+ self.b_low = self.errors[i] - self.epsilon
644
+ self.b_low_idx = i
645
+ elif 0 < self.alphas_n[i] < self.C and self.errors[i] + self.epsilon > self.b_low:
646
+ self.b_low = self.errors[i] + self.epsilon
647
+ self.b_low_idx = i
648
+
649
+ if 0 < self.alphas_p[i] < self.C and self.errors[i] - self.epsilon < self.b_up:
650
+ self.b_up = self.errors[i] - self.epsilon
651
+ self.b_up_idx = i
652
+ elif 0 < self.alphas_n[i] < self.C and self.errors[i] + self.epsilon < self.b_up:
653
+ self.b_up = self.errors[i] + self.epsilon
654
+ self.b_up_idx = i
655
+
656
+ for i in (i1, i2):
657
+ if i not in self.I0:
658
+ if i in self.I2 and self.errors[i] + self.epsilon > self.b_low:
659
+ self.b_low = self.errors[i] + self.epsilon
660
+ self.b_low_idx = i
661
+ elif i in self.I1 and self.errors[i] - self.epsilon > self.b_low:
662
+ self.b_low = self.errors[i] - self.epsilon
663
+ self.b_low_idx = i
664
+
665
+ if i in self.I3 and self.errors[i] - self.epsilon < self.b_up:
666
+ self.b_up = self.errors[i] - self.epsilon
667
+ self.b_up_idx = i
668
+ elif i in self.I1 and self.errors[i] + self.epsilon < self.b_up:
669
+ self.b_up = self.errors[i] + self.epsilon
670
+ self.b_up_idx = i
671
+
672
+ if self.b_low_idx == -1 or self.b_up_idx == -1:
673
+ raise Exception('unexpected status')
674
+
675
+ return True
676
+
677
+ def _examine_example(self, i2):
678
+ alpha2_p, alpha2_n = self.alphas_p[i2], self.alphas_n[i2]
679
+
680
+ if i2 in self.I0:
681
+ E2 = self.errors[i2]
682
+ else:
683
+ E2 = self.y[i2] - (self.alphas_p - self.alphas_n).dot(self.K[i2])
684
+ self.errors[i2] = E2
685
+ # update (b_low, b_low_idx) or (b_up, b_up_idx) using (E2, i2)
686
+ if i2 in self.I1:
687
+ if E2 + self.epsilon < self.b_up:
688
+ self.b_up = E2 + self.epsilon
689
+ self.b_up_idx = i2
690
+ elif E2 - self.epsilon > self.b_low:
691
+ self.b_low = E2 - self.epsilon
692
+ self.b_low_idx = i2
693
+ elif i2 in self.I2 and E2 + self.epsilon > self.b_low:
694
+ self.b_low = E2 + self.epsilon
695
+ self.b_low_idx = i2
696
+ elif i2 in self.I3 and E2 - self.epsilon < self.b_up:
697
+ self.b_up = E2 - self.epsilon
698
+ self.b_up_idx = i2
699
+
700
+ # check optimality using current b_up and b_low and, if violated,
701
+ # find another index i1 to do joint optimization with i2
702
+ i1 = -1
703
+ optimal = True
704
+ if i2 in self.I0:
705
+ if 0 < alpha2_p < self.C:
706
+ if self.b_low - (E2 - self.epsilon) > 2 * self.tol:
707
+ optimal = False
708
+ i1 = self.b_low_idx
709
+ # for i2 in I0 choose the better i1
710
+ if (E2 - self.epsilon) - self.b_up > self.b_low - (E2 - self.epsilon):
711
+ i1 = self.b_up_idx
712
+ elif (E2 - self.epsilon) - self.b_up > 2 * self.tol:
713
+ optimal = False
714
+ i1 = self.b_up_idx
715
+ # for i2 in I0 choose the better i1
716
+ if self.b_low - (E2 - self.epsilon) > (E2 - self.epsilon) - self.b_up:
717
+ i1 = self.b_low_idx
718
+ elif 0 < alpha2_n < self.C:
719
+ if self.b_low - (E2 + self.epsilon) > 2 * self.tol:
720
+ optimal = False
721
+ i1 = self.b_low_idx
722
+ # for i2 in I0 choose the better i1
723
+ if (E2 + self.epsilon) - self.b_up > self.b_low - (E2 + self.epsilon):
724
+ i1 = self.b_up_idx
725
+ elif (E2 + self.epsilon) - self.b_up > 2 * self.tol:
726
+ optimal = False
727
+ i1 = self.b_up_idx
728
+ # for i2 in I0 choose the better i1
729
+ if self.b_low - (E2 + self.epsilon) > (E2 + self.epsilon) - self.b_up:
730
+ i1 = self.b_low_idx
731
+ elif i2 in self.I1:
732
+ if self.b_low - (E2 + self.epsilon) > 2 * self.tol:
733
+ optimal = False
734
+ i1 = self.b_low_idx
735
+ # for i2 in I1 choose the better i1
736
+ if (E2 + self.epsilon) - self.b_up > self.b_low - (E2 + self.epsilon):
737
+ i1 = self.b_up_idx
738
+ elif (E2 - self.epsilon) - self.b_up > 2 * self.tol:
739
+ optimal = False
740
+ i1 = self.b_up_idx
741
+ # for i2 in I1 choose the better i1
742
+ if self.b_low - (E2 - self.epsilon) > (E2 - self.epsilon) - self.b_up:
743
+ i1 = self.b_low_idx
744
+ elif i2 in self.I2:
745
+ if (E2 + self.epsilon) - self.b_up > 2 * self.tol:
746
+ optimal = False
747
+ i1 = self.b_up_idx
748
+ elif i2 in self.I3:
749
+ if self.b_low - (E2 - self.epsilon) > 2 * self.tol:
750
+ optimal = False
751
+ i1 = self.b_low_idx
752
+ else:
753
+ raise Exception('the index could not be found')
754
+
755
+ if optimal:
756
+ return False
757
+
758
+ return self._take_step(i1, i2)
759
+
760
+ def minimize(self):
761
+ if self.verbose:
762
+ print('iter\t cost')
763
+
764
+ num_changed = 0
765
+ examine_all = True
766
+ while num_changed > 0 or examine_all:
767
+ num_changed = 0
768
+ # loop over all training examples
769
+ if examine_all:
770
+ for i in range(len(self.X)):
771
+ num_changed += self._examine_example(i)
772
+ else:
773
+ # loop over examples where alphas are not already at their limits
774
+ for i in range(len(self.X)):
775
+ if 0 < self.alphas_p[i] < self.C or 0 < self.alphas_n[i] < self.C:
776
+ num_changed += self._examine_example(i)
777
+ # check if optimality on I0 is attained
778
+ if self.b_up > self.b_low - 2 * self.tol:
779
+ num_changed = 0
780
+ break
781
+ if examine_all:
782
+ examine_all = False
783
+ elif num_changed == 0:
784
+ examine_all = True
785
+
786
+ if self.verbose and not self.iter % self.verbose:
787
+ print('{:4d}\t{: 1.4e}'.format(
788
+ self.iter, self.quad.function(np.concatenate((self.alphas_p, self.alphas_n)))))
789
+
790
+ self.iter += 1
791
+
792
+ self.b = (self.b_low + self.b_up) / 2
793
+
794
+ if self.verbose:
795
+ print()
796
+
797
+ return self