optiml 1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. optiml/__init__.py +0 -0
  2. optiml/ml/__init__.py +0 -0
  3. optiml/ml/neural_network/__init__.py +3 -0
  4. optiml/ml/neural_network/_base.py +475 -0
  5. optiml/ml/neural_network/activations.py +79 -0
  6. optiml/ml/neural_network/initializers.py +66 -0
  7. optiml/ml/neural_network/layers.py +183 -0
  8. optiml/ml/neural_network/losses.py +178 -0
  9. optiml/ml/neural_network/regularizers.py +87 -0
  10. optiml/ml/svm/__init__.py +3 -0
  11. optiml/ml/svm/_base.py +1442 -0
  12. optiml/ml/svm/kernels.py +208 -0
  13. optiml/ml/svm/losses.py +284 -0
  14. optiml/ml/svm/smo.py +797 -0
  15. optiml/ml/tests/__init__.py +0 -0
  16. optiml/ml/tests/_datasets.py +49 -0
  17. optiml/ml/tests/_utils.py +28 -0
  18. optiml/ml/tests/test_initializers.py +33 -0
  19. optiml/ml/tests/test_neural_network.py +86 -0
  20. optiml/ml/tests/test_svc.py +245 -0
  21. optiml/ml/tests/test_svr.py +256 -0
  22. optiml/ml/utils.py +252 -0
  23. optiml/opti/__init__.py +4 -0
  24. optiml/opti/_base.py +309 -0
  25. optiml/opti/constrained/__init__.py +9 -0
  26. optiml/opti/constrained/_base.py +404 -0
  27. optiml/opti/constrained/active_set.py +228 -0
  28. optiml/opti/constrained/frank_wolfe.py +158 -0
  29. optiml/opti/constrained/interior_point.py +282 -0
  30. optiml/opti/constrained/projected_gradient.py +138 -0
  31. optiml/opti/constrained/tests/__init__.py +0 -0
  32. optiml/opti/constrained/tests/test_active_set.py +16 -0
  33. optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
  34. optiml/opti/constrained/tests/test_interior_point.py +16 -0
  35. optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
  36. optiml/opti/constrained/tests/test_lower_bound.py +29 -0
  37. optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
  38. optiml/opti/unconstrained/__init__.py +6 -0
  39. optiml/opti/unconstrained/_base.py +63 -0
  40. optiml/opti/unconstrained/line_search/__init__.py +10 -0
  41. optiml/opti/unconstrained/line_search/_base.py +106 -0
  42. optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
  43. optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
  44. optiml/opti/unconstrained/line_search/line_search.py +248 -0
  45. optiml/opti/unconstrained/line_search/newton.py +198 -0
  46. optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
  47. optiml/opti/unconstrained/proximal_bundle.py +219 -0
  48. optiml/opti/unconstrained/stochastic/__init__.py +12 -0
  49. optiml/opti/unconstrained/stochastic/_base.py +246 -0
  50. optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
  51. optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
  52. optiml/opti/unconstrained/stochastic/adam.py +179 -0
  53. optiml/opti/unconstrained/stochastic/adamax.py +178 -0
  54. optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
  55. optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
  56. optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
  57. optiml/opti/unconstrained/stochastic/schedules.py +89 -0
  58. optiml/opti/unconstrained/tests/__init__.py +0 -0
  59. optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
  60. optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
  61. optiml/opti/unconstrained/tests/test_adam.py +42 -0
  62. optiml/opti/unconstrained/tests/test_adamax.py +41 -0
  63. optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
  64. optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
  65. optiml/opti/unconstrained/tests/test_functions.py +34 -0
  66. optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
  67. optiml/opti/unconstrained/tests/test_newton.py +20 -0
  68. optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
  69. optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
  70. optiml/opti/unconstrained/tests/test_verbose.py +25 -0
  71. optiml/opti/utils.py +353 -0
  72. optiml-1.7.dist-info/METADATA +203 -0
  73. optiml-1.7.dist-info/RECORD +76 -0
  74. optiml-1.7.dist-info/WHEEL +5 -0
  75. optiml-1.7.dist-info/licenses/LICENSE +21 -0
  76. optiml-1.7.dist-info/top_level.txt +1 -0
optiml/__init__.py ADDED
File without changes
optiml/ml/__init__.py ADDED
File without changes
@@ -0,0 +1,3 @@
1
+ __all__ = ['NeuralNetworkClassifier', 'NeuralNetworkRegressor']
2
+
3
+ from ._base import NeuralNetworkClassifier, NeuralNetworkRegressor
@@ -0,0 +1,475 @@
1
+ import warnings
2
+ from abc import ABC
3
+
4
+ import autograd.numpy as np
5
+ from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
6
+ from sklearn.exceptions import ConvergenceWarning
7
+ from sklearn.metrics import accuracy_score
8
+ from sklearn.model_selection import train_test_split
9
+
10
+ from .activations import sigmoid, linear, softmax
11
+ from .layers import Layer, ParamLayer
12
+ from .losses import (CategoricalCrossEntropy, SparseCategoricalCrossEntropy,
13
+ MeanSquaredError, BinaryCrossEntropy, mean_squared_error, NeuralNetworkLoss)
14
+ from ...opti import Optimizer
15
+ from ...opti.unconstrained import ProximalBundle
16
+ from ...opti.unconstrained.line_search import LineSearchOptimizer
17
+ from ...opti.unconstrained.stochastic import StochasticOptimizer, StochasticGradientDescent, StochasticMomentumOptimizer
18
+
19
+
20
+ class NeuralNetwork(BaseEstimator, Layer, ABC):
21
+ """
22
+ Base abstract class for all feed-forward neural network estimators.
23
+ It chains a sequence of layers, performs forward/backward propagation
24
+ and trains the network parameters by minimizing the given loss with
25
+ the chosen optimizer.
26
+
27
+ Parameters
28
+ ----------
29
+
30
+ layers : tuple of `Layer` instances, default=()
31
+ The ordered sequence of layers composing the network.
32
+
33
+ loss : `NeuralNetworkLoss` subclass, default=mean_squared_error
34
+ Specifies the loss function to minimize.
35
+
36
+ optimizer : `Optimizer` subclass, default=StochasticGradientDescent
37
+ The solver for optimization. It can be a subclass of the
38
+ `LineSearchOptimizer`, the `ProximalBundle` method or a subclass
39
+ of the `StochasticOptimizer`.
40
+
41
+ learning_rate : float, default=0.01
42
+ The initial learning rate used for weight update. It controls the
43
+ step-size in updating the weights. Only used when ``optimizer`` is a
44
+ subclass of `StochasticOptimizer`.
45
+
46
+ max_iter : int, default=1000
47
+ Maximum number of iterations. The solver iterates until convergence
48
+ (determined by ``tol``) or this number of iterations. If the optimizer
49
+ is a subclass of `StochasticOptimizer`, this value determines the number
50
+ of epochs, not the number of gradient steps.
51
+
52
+ momentum_type : {'none', 'polyak', 'nesterov'}, default='none'
53
+ Momentum type used for weight update. Only used when ``optimizer`` is
54
+ a subclass of `StochasticMomentumOptimizer`.
55
+
56
+ momentum : float, default=0.9
57
+ Momentum for weight update. Should be between 0 and 1. Only used when
58
+ ``optimizer`` is a subclass of `StochasticMomentumOptimizer`.
59
+
60
+ tol : float, default=1e-4
61
+ Tolerance for stopping criterion.
62
+
63
+ validation_split : float, default=0.
64
+ The proportion of training data to set aside as validation set for
65
+ early stopping. Must be between 0 and 1. Only used when ``optimizer``
66
+ is a subclass of `StochasticOptimizer`.
67
+
68
+ batch_size : int, default=None
69
+ Size of mini batches for stochastic optimizers.
70
+ Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
71
+
72
+ max_f_eval : int, default=15000
73
+ Maximum number of loss function calls. Only used when ``optimizer``
74
+ is a subclass of `LineSearchOptimizer`.
75
+
76
+ early_stopping : bool, default=False
77
+ Whether to use early stopping to terminate training when the
78
+ monitored score/loss does not improve by at least ``tol`` for
79
+ ``patience`` consecutive epochs.
80
+ Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
81
+
82
+ patience : int, default=5
83
+ Maximum number of epochs to not meet ``tol`` improvement.
84
+ Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
85
+
86
+ shuffle : bool, default=True
87
+ Whether to shuffle samples for batch sampling in each iteration. Only
88
+ used when the ``optimizer`` is a subclass of `StochasticOptimizer`.
89
+
90
+ random_state : int, RandomState instance or None, default=None
91
+ Controls the pseudo random number generation for the train-validation
92
+ split and for shuffling the data in batch sampling.
93
+ Pass an int for reproducible output across multiple function calls.
94
+
95
+ mu : float, default=1
96
+ Mu parameter for the proximal bundle method.
97
+ Only used when ``optimizer`` is `ProximalBundle`. Must be strictly positive.
98
+
99
+ master_solver : string, default='clarabel'
100
+ Master solver for the proximal bundle method for the CVXPY interface.
101
+ Only used when ``optimizer`` is `ProximalBundle`.
102
+
103
+ master_verbose : bool or int, default=False
104
+ Controls the verbosity of the CVXPY interface.
105
+ Only used when ``optimizer`` is `ProximalBundle`.
106
+
107
+ verbose : bool or int, default=False
108
+ Controls the verbosity of progress messages to stdout. Use a boolean value
109
+ to switch on/off or an int value to show progress each ``verbose`` time
110
+ optimization steps.
111
+ """
112
+
113
+ def __init__(self,
114
+ layers=(),
115
+ loss=mean_squared_error,
116
+ optimizer=StochasticGradientDescent,
117
+ learning_rate=0.01,
118
+ max_iter=1000,
119
+ momentum_type='none',
120
+ momentum=0.9,
121
+ tol=1e-4,
122
+ validation_split=0.,
123
+ batch_size=None,
124
+ max_f_eval=15000,
125
+ early_stopping=False,
126
+ patience=5,
127
+ shuffle=True,
128
+ random_state=None,
129
+ mu=1,
130
+ master_solver='clarabel',
131
+ master_verbose=False,
132
+ verbose=False):
133
+ self.layers = layers
134
+ if not issubclass(loss, NeuralNetworkLoss):
135
+ raise TypeError(f'{loss} is not an allowed neural network loss function')
136
+ self.loss = loss
137
+ if not issubclass(optimizer, Optimizer):
138
+ raise TypeError(f'{optimizer} is not an allowed optimization method')
139
+ self.optimizer = optimizer
140
+ self.learning_rate = learning_rate
141
+ self.momentum_type = momentum_type
142
+ self.momentum = momentum
143
+ self.tol = tol
144
+ self.max_iter = max_iter
145
+ self.batch_size = batch_size
146
+ self.validation_split = validation_split
147
+ self.max_f_eval = max_f_eval
148
+ self.early_stopping = early_stopping
149
+ self.patience = patience
150
+ self.shuffle = shuffle
151
+ self.random_state = random_state
152
+ self.mu = mu
153
+ self.master_solver = master_solver
154
+ self.master_verbose = master_verbose
155
+ self.verbose = verbose
156
+ if issubclass(self.optimizer, StochasticOptimizer):
157
+ self.train_loss_history = []
158
+ self.train_score_history = []
159
+ self._no_improvement_count = 0
160
+ self._avg_epoch_loss = 0
161
+ if self.validation_split:
162
+ self.val_loss_history = []
163
+ self.val_score_history = []
164
+ self.best_val_score = -np.inf
165
+ else:
166
+ self.best_loss = np.inf
167
+
168
+ def forward(self, X):
169
+ for layer in self.layers:
170
+ X = layer.forward(X)
171
+ return X
172
+
173
+ def backward(self, delta):
174
+ coef_grads = []
175
+ inter_grads = []
176
+ # backpropagate
177
+ for layer in self.layers[::-1]:
178
+ if isinstance(layer, ParamLayer):
179
+ delta, grads = layer.backward(delta)
180
+ coef_grads.append(grads['dW'] + layer.coef_reg.jacobian(layer.coef_) / layer._X.shape[0])
181
+ if layer.fit_intercept:
182
+ inter_grads.append(grads['db'] + layer.inter_reg.jacobian(layer.inter_) / layer._X.shape[0])
183
+ else:
184
+ delta = layer.backward(delta)
185
+ return coef_grads[::-1], inter_grads[::-1]
186
+
187
+ @property
188
+ def coefs_(self):
189
+ return [layer.coef_ for layer in self.layers if isinstance(layer, ParamLayer)]
190
+
191
+ @property
192
+ def intercepts_(self):
193
+ return [layer.inter_ for layer in self.layers if isinstance(layer, ParamLayer) and layer.fit_intercept]
194
+
195
+ def _pack(self, coefs, intercepts):
196
+ return np.hstack([w.ravel() for w in coefs + intercepts])
197
+
198
+ def _unpack(self, packed_coef_inter):
199
+ coef_idx = 0
200
+ inter_idx = 0
201
+ for layer in self.layers:
202
+ if isinstance(layer, ParamLayer):
203
+ start, end, shape = self.coef_idx[coef_idx]
204
+ layer.coef_ = np.reshape(packed_coef_inter[start:end], shape)
205
+ if layer.fit_intercept:
206
+ start, end = self.inter_idx[inter_idx]
207
+ layer.inter_ = packed_coef_inter[start:end]
208
+ inter_idx += 1
209
+ coef_idx += 1
210
+
211
+ def _store_meta_info(self):
212
+ # store meta information for the parameters
213
+ self.coef_idx = []
214
+ self.inter_idx = []
215
+ start = 0
216
+ # save sizes and indices of coefs for faster unpacking
217
+ for layer in self.layers:
218
+ if isinstance(layer, ParamLayer):
219
+ end = start + (np.prod(layer.coef_.shape))
220
+ self.coef_idx.append((start, end, layer.coef_.shape))
221
+ start = end
222
+ # save sizes and indices of intercepts for faster unpacking
223
+ for layer in self.layers:
224
+ if isinstance(layer, ParamLayer) and layer.fit_intercept:
225
+ fan_in, fan_out = layer.inter_.shape[0], layer.inter_.shape[1]
226
+ end = start + fan_out
227
+ self.inter_idx.append((start, end))
228
+ start = end
229
+
230
+ def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
231
+ self._avg_epoch_loss += opt.f_x * X_batch.shape[0]
232
+ if opt.is_batch_end():
233
+ self._avg_epoch_loss /= opt.f.X.shape[0] # n_samples
234
+ self.train_loss_history.append(self._avg_epoch_loss)
235
+ if opt.is_verbose() and opt.epoch != opt.iter:
236
+ print('\tavg_loss: {: 1.4e}'.format(self._avg_epoch_loss), end='')
237
+ self._avg_epoch_loss = 0.
238
+ if self.validation_split:
239
+ val_loss = self.loss(opt.x, X_val, y_val)
240
+ self.val_loss_history.append(val_loss)
241
+ if opt.is_verbose():
242
+ print('\tval_loss: {: 1.4e}'.format(val_loss), end='')
243
+
244
+ def _update_no_improvement_count(self, opt):
245
+ if self.early_stopping:
246
+
247
+ if self.validation_split: # monitor val_score
248
+
249
+ if self.val_score_history[-1] < self.best_val_score + self.tol:
250
+ self._no_improvement_count += 1
251
+ else:
252
+ self._no_improvement_count = 0
253
+ if self.val_score_history[-1] > self.best_val_score:
254
+ self.best_val_score = self.val_score_history[-1]
255
+ self._best_coefs = [coef.copy() for coef in self.coefs_]
256
+ self._best_intercepts = [inter.copy() for inter in self.intercepts_]
257
+
258
+ else: # monitor train_loss
259
+
260
+ if self.train_loss_history[-1] > self.best_loss - self.tol:
261
+ self._no_improvement_count += 1
262
+ else:
263
+ self._no_improvement_count = 0
264
+ if self.train_loss_history[-1] < self.best_loss:
265
+ self.best_loss = self.train_loss_history[-1]
266
+
267
+ if self._no_improvement_count >= self.patience:
268
+
269
+ if self.validation_split:
270
+ opt.x = self._pack(self._best_coefs, self._best_intercepts)
271
+
272
+ if self.verbose:
273
+ if self.validation_split:
274
+ print(f'\ntraining stopped since validation score did not improve more than '
275
+ f'tol={self.tol} for {self.patience} consecutive epochs')
276
+ else:
277
+ print('\ntraining stopped since training loss did not improve more than '
278
+ f'tol={self.tol} for {self.patience} consecutive epochs')
279
+
280
+ raise StopIteration
281
+
282
+ def fit(self, X, y):
283
+
284
+ self._store_meta_info()
285
+
286
+ packed_coef_inter = self._pack(self.coefs_, self.intercepts_)
287
+
288
+ if issubclass(self.optimizer, LineSearchOptimizer):
289
+
290
+ self.loss = self.loss(self, X, y)
291
+ self.optimizer = self.optimizer(f=self.loss,
292
+ x=packed_coef_inter,
293
+ max_iter=self.max_iter,
294
+ max_f_eval=self.max_f_eval,
295
+ verbose=self.verbose).minimize()
296
+
297
+ if self.optimizer.status == 'stopped':
298
+ if self.optimizer.iter >= self.max_iter:
299
+ warnings.warn('max_iter reached but the optimization has not converged yet', ConvergenceWarning)
300
+ elif self.optimizer.f_eval >= self.max_f_eval:
301
+ warnings.warn('max_f_eval reached but the optimization has not converged yet', ConvergenceWarning)
302
+
303
+ elif issubclass(self.optimizer, ProximalBundle):
304
+
305
+ self.loss = self.loss(self, X, y)
306
+ self.optimizer = self.optimizer(f=self.loss,
307
+ x=packed_coef_inter,
308
+ mu=self.mu,
309
+ max_iter=self.max_iter,
310
+ master_solver=self.master_solver,
311
+ master_verbose=self.master_verbose,
312
+ verbose=self.verbose).minimize()
313
+
314
+ if self.optimizer.status == 'error':
315
+ warnings.warn('failure while computing direction for the master problem', ConvergenceWarning)
316
+
317
+ elif issubclass(self.optimizer, StochasticOptimizer):
318
+
319
+ if self.validation_split:
320
+ # don't stratify in multi-label classification
321
+ should_stratify = isinstance(self, NeuralNetworkClassifier) and self.layers[-1].fan_out == 1
322
+ stratify = y if should_stratify else None
323
+ X, X_val, y, y_val = train_test_split(X, y,
324
+ stratify=stratify,
325
+ test_size=self.validation_split,
326
+ random_state=self.random_state)
327
+ else:
328
+ X_val = None
329
+ y_val = None
330
+
331
+ self.loss = self.loss(self, X, y)
332
+
333
+ if issubclass(self.optimizer, StochasticMomentumOptimizer):
334
+
335
+ self.optimizer = self.optimizer(f=self.loss,
336
+ x=packed_coef_inter,
337
+ step_size=self.learning_rate,
338
+ epochs=self.max_iter,
339
+ batch_size=self.batch_size,
340
+ momentum_type=self.momentum_type,
341
+ momentum=self.momentum,
342
+ callback=self._store_train_val_info,
343
+ callback_args=(X_val, y_val),
344
+ shuffle=self.shuffle,
345
+ random_state=self.random_state,
346
+ verbose=self.verbose).minimize()
347
+
348
+ else:
349
+
350
+ self.optimizer = self.optimizer(f=self.loss,
351
+ x=packed_coef_inter,
352
+ step_size=self.learning_rate,
353
+ epochs=self.max_iter,
354
+ batch_size=self.batch_size,
355
+ callback=self._store_train_val_info,
356
+ callback_args=(X_val, y_val),
357
+ shuffle=self.shuffle,
358
+ random_state=self.random_state,
359
+ verbose=self.verbose).minimize()
360
+
361
+ else:
362
+
363
+ raise TypeError(f'{self.optimizer} is not an allowed optimizer')
364
+
365
+ self._unpack(self.optimizer.x)
366
+
367
+ return self
368
+
369
+
370
+ class NeuralNetworkClassifier(ClassifierMixin, NeuralNetwork):
371
+ """
372
+ Feed-forward neural network for classification. The output layer must be
373
+ sigmoid (binary/multi-label) or softmax (multi-class), consistently with
374
+ the chosen loss function.
375
+ """
376
+
377
+ def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
378
+ super(NeuralNetworkClassifier, self)._store_train_val_info(opt, X_batch, y_batch, X_val, y_val)
379
+ if opt.is_batch_end():
380
+ acc = self.score(X_batch, y_batch)
381
+ self.train_score_history.append(acc)
382
+ if opt.is_verbose():
383
+ print('\tacc: {:1.4f}'.format(acc), end='')
384
+ if self.validation_split:
385
+ val_acc = self.score(X_val, y_val)
386
+ self.val_score_history.append(val_acc)
387
+ if opt.is_verbose():
388
+ print('\tval_acc: {:1.4f}'.format(val_acc), end='')
389
+ self._update_no_improvement_count(opt)
390
+
391
+ def fit(self, X, y):
392
+ if y.ndim == 1:
393
+ y = y.reshape(-1, 1)
394
+
395
+ n_classes = y.shape[1] if self.loss == CategoricalCrossEntropy else np.unique(y).size
396
+ if self.loss in (SparseCategoricalCrossEntropy, CategoricalCrossEntropy):
397
+ if self.layers[-1].activation != softmax:
398
+ raise ValueError(f'NeuralNetworkClassifier with {type(self.loss).__name__} loss '
399
+ 'function only works with softmax output layer')
400
+ if self.layers[-1].fan_out != n_classes:
401
+ raise ValueError('the number of neurons in the output layer must '
402
+ f'be equal to the number of classes, i.e., {n_classes}')
403
+ elif self.loss in (MeanSquaredError, BinaryCrossEntropy):
404
+ if n_classes > 2:
405
+ raise ValueError(f'NeuralNetworkClassifier with {type(self.loss).__name__} '
406
+ 'loss function only works for binary classification')
407
+ if self.layers[-1].activation != sigmoid:
408
+ raise ValueError(f'NeuralNetworkClassifier with {type(self.loss).__name__} '
409
+ 'loss function only works with sigmoid output layer')
410
+ if self.layers[-1].fan_out != 1:
411
+ raise ValueError(f'NeuralNetworkClassifier with {type(self.loss).__name__} loss '
412
+ 'function only works with one neuron in the output layer')
413
+
414
+ return super(NeuralNetworkClassifier, self).fit(X, y)
415
+
416
+ def predict(self, X):
417
+ if self.layers[-1].activation == sigmoid:
418
+ return self.forward(X) >= 0.5
419
+ elif self.layers[-1].activation == softmax:
420
+ return np.argmax(self.forward(X), axis=1)
421
+ else:
422
+ return self.forward(X)
423
+
424
+ def score(self, X, y, sample_weight=None):
425
+ y = np.argmax(y, axis=1) if isinstance(self.loss, CategoricalCrossEntropy) else y
426
+ return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
427
+
428
+
429
+ class NeuralNetworkRegressor(RegressorMixin, NeuralNetwork):
430
+ """
431
+ Feed-forward neural network for regression. The output layer must be
432
+ linear or, for regression between 0 and 1, sigmoid. The number of output
433
+ neurons must equal the number of targets.
434
+ """
435
+
436
+ def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
437
+ super(NeuralNetworkRegressor, self)._store_train_val_info(opt, X_batch, y_batch, X_val, y_val)
438
+ if opt.is_batch_end():
439
+ r2 = self.score(X_batch, y_batch)
440
+ self.train_score_history.append(r2)
441
+ if opt.is_verbose():
442
+ print('\tr2: {: 1.4f}'.format(r2), end='')
443
+ if self.early_stopping:
444
+ val_r2 = self.score(X_val, y_val)
445
+ self.val_score_history.append(val_r2)
446
+ if opt.is_verbose():
447
+ print('\tval_r2: {: 1.4f}'.format(val_r2), end='')
448
+ self._update_no_improvement_count(opt)
449
+
450
+ def fit(self, X, y):
451
+ if y.ndim == 1:
452
+ y = y.reshape(-1, 1)
453
+
454
+ if self.layers[-1].activation not in (linear, sigmoid):
455
+ raise ValueError('NeuralNetworkRegressor only works with linear or '
456
+ 'sigmoid (for regression between 0 and 1) output layer')
457
+ if self.loss == BinaryCrossEntropy:
458
+ if self.layers[-1].activation != sigmoid:
459
+ raise ValueError('NeuralNetworkRegressor with binary_cross_entropy loss function only '
460
+ 'works with sigmoid output layer for regression between 0 and 1')
461
+ if not (0 <= y <= 1).all():
462
+ raise ValueError('NeuralNetworkRegressor with binary_cross_entropy loss '
463
+ 'function only works for regression between 0 and 1')
464
+ n_targets = y.shape[1]
465
+ if self.layers[-1].fan_out != n_targets:
466
+ raise ValueError(f'the number of neurons in the output layer must be '
467
+ f'equal to the number of targets, i.e., {n_targets}')
468
+
469
+ return super(NeuralNetworkRegressor, self).fit(X, y)
470
+
471
+ def predict(self, X):
472
+ if self.layers[-1].fan_out == 1: # one target
473
+ return self.forward(X).ravel()
474
+ else: # multi target
475
+ return self.forward(X)
@@ -0,0 +1,79 @@
1
+ from abc import ABC
2
+
3
+ import numpy as np
4
+ from autograd.scipy.special import expit
5
+
6
+
7
+ class Activation(ABC):
8
+ """
9
+ Base abstract class for all activation functions. Subclasses must
10
+ implement ``function`` and its element-wise derivative ``jacobian``.
11
+ """
12
+
13
+ def function(self, x):
14
+ raise NotImplementedError
15
+
16
+ def jacobian(self, x):
17
+ raise NotImplementedError
18
+
19
+ def __call__(self, x):
20
+ return self.function(x)
21
+
22
+
23
+ class Linear(Activation):
24
+ r"""Identity (linear) activation function :math:`f(x) = x`."""
25
+
26
+ def function(self, x):
27
+ return x
28
+
29
+ def jacobian(self, x):
30
+ return np.ones_like(x)
31
+
32
+
33
+ class ReLU(Activation):
34
+ r"""Rectified linear unit activation function :math:`f(x) = \max(0, x)`."""
35
+
36
+ def function(self, x):
37
+ return np.maximum(0., x)
38
+
39
+ def jacobian(self, x):
40
+ return np.where(x > 0, 1., 0.)
41
+
42
+
43
+ class Tanh(Activation):
44
+ r"""Hyperbolic tangent activation function :math:`f(x) = \tanh(x)`."""
45
+
46
+ def function(self, x):
47
+ return np.tanh(x)
48
+
49
+ def jacobian(self, x):
50
+ return 1. - np.square(self.function(x))
51
+
52
+
53
+ class Sigmoid(Activation):
54
+ r"""Logistic sigmoid activation function :math:`f(x) = \frac{1}{1 + e^{-x}}`."""
55
+
56
+ def function(self, x):
57
+ return expit(x)
58
+
59
+ def jacobian(self, x):
60
+ x = self.function(x)
61
+ return x * (1. - x)
62
+
63
+
64
+ class SoftMax(Activation):
65
+ r"""Softmax activation function :math:`f(x)_i = \frac{e^{x_i}}{\sum_j e^{x_j}}`."""
66
+
67
+ def function(self, x, axis=-1):
68
+ exps = np.exp(x - np.max(x, axis=axis, keepdims=True))
69
+ return exps / np.sum(exps, axis=axis, keepdims=True)
70
+
71
+ def jacobian(self, x):
72
+ return np.ones_like(x)
73
+
74
+
75
+ linear = Linear()
76
+ relu = ReLU()
77
+ tanh = Tanh()
78
+ sigmoid = Sigmoid()
79
+ softmax = SoftMax()
@@ -0,0 +1,66 @@
1
+ import numpy as np
2
+
3
+
4
+ def truncated_normal(shape, mean=0., std=1., random_state=None):
5
+ truncated = 2 * std + mean
6
+ return np.clip(np.random.RandomState(random_state).normal(size=shape, loc=mean, scale=std), -truncated, truncated)
7
+
8
+
9
+ def glorot_normal(shape, random_state=None):
10
+ r"""Glorot normal initializer, also called Xavier normal initializer.
11
+ It draws samples from a truncated normal distribution centered on 0
12
+ with
13
+
14
+ .. math::
15
+
16
+ \text{std} = \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
17
+
18
+ where ``fan_in`` is the number of input units in the weight tensor
19
+ and ``fan_out`` is the number of output units in the weight tensor."""
20
+ fan_in, fan_out = shape[0], shape[1]
21
+ std = np.sqrt(2. / (fan_in + fan_out))
22
+ return truncated_normal(shape=shape, mean=0., std=std, random_state=random_state)
23
+
24
+
25
+ def glorot_uniform(shape, random_state=None):
26
+ r"""Glorot uniform initializer, also called Xavier uniform initializer.
27
+ It draws samples from a uniform distribution within
28
+ :math:`[-\text{limit}, \text{limit}]` where
29
+
30
+ .. math::
31
+
32
+ \text{limit} = \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}
33
+
34
+ where ``fan_in`` is the number of input units in the weight tensor
35
+ and ``fan_out`` is the number of output units in the weight tensor."""
36
+ fan_in, fan_out = shape[0], shape[1]
37
+ limit = np.sqrt(6. / (fan_in + fan_out))
38
+ return np.random.RandomState(random_state).uniform(size=shape, low=-limit, high=limit)
39
+
40
+
41
+ def he_normal(shape, random_state=None):
42
+ r"""He normal initializer. It draws samples from a truncated normal
43
+ distribution centered on 0 with
44
+
45
+ .. math::
46
+
47
+ \text{std} = \sqrt{\frac{2}{\text{fan\_in}}}
48
+
49
+ where ``fan_in`` is the number of input units in the weight tensor."""
50
+ fan_in, fan_out = shape[0], shape[1]
51
+ std = np.sqrt(2. / fan_in)
52
+ return truncated_normal(shape=shape, mean=0., std=std, random_state=random_state)
53
+
54
+
55
+ def he_uniform(shape, random_state=None):
56
+ r"""He uniform variance scaling initializer. It draws samples from
57
+ a uniform distribution within :math:`[-\text{limit}, \text{limit}]` where
58
+
59
+ .. math::
60
+
61
+ \text{limit} = \sqrt{\frac{6}{\text{fan\_in}}}
62
+
63
+ where ``fan_in`` is the number of input units in the weight tensor."""
64
+ fan_in, fan_out = shape[0], shape[1]
65
+ limit = np.sqrt(6. / fan_in)
66
+ return np.random.RandomState(random_state).uniform(size=shape, low=-limit, high=limit)