optiml 1.5__tar.gz → 1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optiml-1.7/LICENSE +21 -0
- optiml-1.7/PKG-INFO +203 -0
- {optiml-1.5 → optiml-1.7}/README.md +47 -31
- {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/_base.py +102 -1
- {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/activations.py +9 -0
- optiml-1.7/optiml/ml/neural_network/initializers.py +66 -0
- {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/layers.py +80 -0
- {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/losses.py +55 -8
- optiml-1.7/optiml/ml/neural_network/regularizers.py +87 -0
- {optiml-1.5 → optiml-1.7}/optiml/ml/svm/_base.py +4 -4
- {optiml-1.5 → optiml-1.7}/optiml/ml/svm/kernels.py +45 -10
- {optiml-1.5 → optiml-1.7}/optiml/ml/svm/losses.py +105 -36
- {optiml-1.5 → optiml-1.7}/optiml/ml/svm/smo.py +75 -1
- optiml-1.7/optiml/ml/tests/_datasets.py +49 -0
- optiml-1.7/optiml/ml/tests/_utils.py +28 -0
- optiml-1.7/optiml/ml/tests/test_initializers.py +33 -0
- {optiml-1.5 → optiml-1.7}/optiml/ml/tests/test_neural_network.py +17 -2
- {optiml-1.5 → optiml-1.7}/optiml/ml/tests/test_svc.py +29 -77
- {optiml-1.5 → optiml-1.7}/optiml/ml/tests/test_svr.py +15 -51
- {optiml-1.5 → optiml-1.7}/optiml/ml/utils.py +1 -2
- {optiml-1.5 → optiml-1.7}/optiml/opti/_base.py +14 -12
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/_base.py +83 -28
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/active_set.py +55 -33
- optiml-1.7/optiml/opti/constrained/frank_wolfe.py +158 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/interior_point.py +75 -38
- optiml-1.7/optiml/opti/constrained/projected_gradient.py +138 -0
- optiml-1.7/optiml/opti/constrained/tests/test_lower_bound.py +29 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/__init__.py +2 -2
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/conjugate_gradient.py +73 -110
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/gradient_descent.py +8 -61
- optiml-1.7/optiml/opti/unconstrained/line_search/newton.py +198 -0
- optiml-1.7/optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/proximal_bundle.py +58 -68
- optiml-1.7/optiml/opti/unconstrained/stochastic/_base.py +246 -0
- optiml-1.7/optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
- optiml-1.7/optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/adam.py +50 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/adamax.py +50 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/amsgrad.py +49 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/gradient_descent.py +40 -0
- optiml-1.7/optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/schedules.py +1 -1
- optiml-1.7/optiml/opti/unconstrained/tests/test_functions.py +34 -0
- optiml-1.7/optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
- optiml-1.7/optiml/opti/unconstrained/tests/test_verbose.py +25 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/utils.py +54 -23
- optiml-1.7/optiml.egg-info/PKG-INFO +203 -0
- {optiml-1.5 → optiml-1.7}/optiml.egg-info/SOURCES.txt +8 -1
- {optiml-1.5 → optiml-1.7}/optiml.egg-info/requires.txt +1 -0
- {optiml-1.5 → optiml-1.7}/setup.py +4 -3
- optiml-1.5/PKG-INFO +0 -165
- optiml-1.5/optiml/ml/neural_network/initializers.py +0 -47
- optiml-1.5/optiml/ml/neural_network/regularizers.py +0 -46
- optiml-1.5/optiml/opti/constrained/frank_wolfe.py +0 -147
- optiml-1.5/optiml/opti/constrained/projected_gradient.py +0 -125
- optiml-1.5/optiml/opti/unconstrained/line_search/newton.py +0 -229
- optiml-1.5/optiml/opti/unconstrained/line_search/quasi_newton.py +0 -297
- optiml-1.5/optiml/opti/unconstrained/stochastic/_base.py +0 -167
- optiml-1.5/optiml/opti/unconstrained/stochastic/adadelta.py +0 -89
- optiml-1.5/optiml/opti/unconstrained/stochastic/adagrad.py +0 -81
- optiml-1.5/optiml/opti/unconstrained/stochastic/rmsprop.py +0 -108
- optiml-1.5/optiml/opti/unconstrained/tests/test_quasi_newton.py +0 -20
- optiml-1.5/optiml.egg-info/PKG-INFO +0 -165
- {optiml-1.5 → optiml-1.7}/optiml/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/ml/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/ml/svm/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/ml/tests/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_active_set.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_frank_wolfe.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_interior_point.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_lagrangian_quadratic.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_projected_gradient.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/_base.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/_base.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/line_search.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/__init__.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_adadelta.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_adagrad.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_adam.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_adamax.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_amsgrad.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_conjugate_gradient.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_gradient_descent.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_newton.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_rmsprop.py +0 -0
- {optiml-1.5 → optiml-1.7}/optiml.egg-info/dependency_links.txt +0 -0
- {optiml-1.5 → optiml-1.7}/optiml.egg-info/top_level.txt +0 -0
- {optiml-1.5 → optiml-1.7}/setup.cfg +0 -0
optiml-1.7/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2019 Donato Meoli
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
optiml-1.7/PKG-INFO
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: optiml
|
|
3
|
+
Version: 1.7
|
|
4
|
+
Summary: Optimizers for/and sklearn compatible Machine Learning models
|
|
5
|
+
Home-page: https://github.com/dmeoli/optiml
|
|
6
|
+
Author: Donato Meoli
|
|
7
|
+
Author-email: donato.meoli@outlook.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Python: >=3.9
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: autograd
|
|
13
|
+
Requires-Dist: casadi
|
|
14
|
+
Requires-Dist: cvxopt
|
|
15
|
+
Requires-Dist: cvxpy
|
|
16
|
+
Requires-Dist: matplotlib
|
|
17
|
+
Requires-Dist: numpy
|
|
18
|
+
Requires-Dist: pytest
|
|
19
|
+
Requires-Dist: qpsolvers
|
|
20
|
+
Requires-Dist: quadprog
|
|
21
|
+
Requires-Dist: scikit-learn
|
|
22
|
+
Requires-Dist: scipy
|
|
23
|
+
Requires-Dist: wurlitzer
|
|
24
|
+
Dynamic: author
|
|
25
|
+
Dynamic: author-email
|
|
26
|
+
Dynamic: description
|
|
27
|
+
Dynamic: description-content-type
|
|
28
|
+
Dynamic: home-page
|
|
29
|
+
Dynamic: license
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
Dynamic: requires-dist
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
Dynamic: summary
|
|
34
|
+
|
|
35
|
+
# OptiML
|
|
36
|
+
|
|
37
|
+
[](https://github.com/dmeoli/optiml/actions/workflows/ci.yml)
|
|
38
|
+
[](https://coveralls.io/github/dmeoli/optiml?branch=master)
|
|
39
|
+
[](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)
|
|
40
|
+
[](https://pypi.org/project/optiml/)
|
|
41
|
+
[](https://pypistats.org/packages/optiml)
|
|
42
|
+
[](https://mybinder.org/v2/gh/dmeoli/optiml/master)
|
|
43
|
+
[](https://dmeoli.github.io/optiml)
|
|
44
|
+
|
|
45
|
+
OptiML is a sklearn compatible implementation of *Support Vector Machines* and *Deep Neural Networks*, both with some of
|
|
46
|
+
the most successful features according to the state of the art.
|
|
47
|
+
|
|
48
|
+
This work was motivated by the possibility of being able to solve the optimization problem deriving from the
|
|
49
|
+
mathematical formulation of these models through a wide range of optimization algorithms object of study and developed
|
|
50
|
+
for the Numerical Methods and Optimization course @ [Department of Computer Science](https://www.di.unipi.it/en/) @
|
|
51
|
+
[University of Pisa](https://www.unipi.it/index.php/english) under the supervision of
|
|
52
|
+
prof. [Antonio Frangioni](http://pages.di.unipi.it/frangio/).
|
|
53
|
+
|
|
54
|
+
## Contents
|
|
55
|
+
|
|
56
|
+
- Numerical Optimization
|
|
57
|
+
- Unconstrained Optimization
|
|
58
|
+
- Line Search Methods
|
|
59
|
+
- 1st Order Methods
|
|
60
|
+
- [x] Steepest Gradient Descent
|
|
61
|
+
- [x] Conjugate Gradient
|
|
62
|
+
- [x] Fletcher–Reeves formula
|
|
63
|
+
- [x] Polak–Ribière formula
|
|
64
|
+
- [x] Hestenes-Stiefel formula
|
|
65
|
+
- [x] Dai-Yuan formula
|
|
66
|
+
- 2nd Order Methods
|
|
67
|
+
- [x] Newton
|
|
68
|
+
- Quasi-Newton
|
|
69
|
+
- [x] BFGS
|
|
70
|
+
- [x] L-BFGS
|
|
71
|
+
- Stochastic Methods
|
|
72
|
+
- [x] Stochastic Gradient Descent
|
|
73
|
+
- [x] Momentum
|
|
74
|
+
- [x] Polyak
|
|
75
|
+
- [x] Nesterov
|
|
76
|
+
- [x] Adam
|
|
77
|
+
- [x] Momentum
|
|
78
|
+
- [x] Polyak
|
|
79
|
+
- [x] Nesterov
|
|
80
|
+
- [x] AMSGrad
|
|
81
|
+
- [x] Momentum
|
|
82
|
+
- [x] Polyak
|
|
83
|
+
- [x] Nesterov
|
|
84
|
+
- [x] AdaMax
|
|
85
|
+
- [x] Momentum
|
|
86
|
+
- [x] Polyak
|
|
87
|
+
- [x] Nesterov
|
|
88
|
+
- [x] AdaGrad
|
|
89
|
+
- [x] AdaDelta
|
|
90
|
+
- [x] RMSProp
|
|
91
|
+
- [x] Momentum
|
|
92
|
+
- [x] Polyak
|
|
93
|
+
- [x] Nesterov
|
|
94
|
+
- [x] Schedules
|
|
95
|
+
- Step size
|
|
96
|
+
- [x] Decaying
|
|
97
|
+
- [x] Linear Annealing
|
|
98
|
+
- [x] Repeater
|
|
99
|
+
- Momentum
|
|
100
|
+
- [x] Sutskever Blend
|
|
101
|
+
- [x] Proximal Bundle with [cvxpy](https://github.com/cvxgrp/cvxpy) interface to
|
|
102
|
+
[ecos](https://github.com/embotech/ecos), [osqp](https://github.com/oxfordcontrol/osqp),
|
|
103
|
+
[scs](https://github.com/cvxgrp/scs),
|
|
104
|
+
[etc](https://www.cvxpy.org/tutorial/advanced/index.html#choosing-a-solver).
|
|
105
|
+
- Constrained Quadratic Optimization
|
|
106
|
+
- Box-Constrained Quadratic Methods
|
|
107
|
+
- [x] Projected Gradient
|
|
108
|
+
- [x] Frank-Wolfe or Conditional Gradient
|
|
109
|
+
- [x] Active Set
|
|
110
|
+
- [x] Interior Point
|
|
111
|
+
- [x] Lagrangian Dual
|
|
112
|
+
- [x] Augmented Lagrangian Dual
|
|
113
|
+
|
|
114
|
+
- Machine Learning
|
|
115
|
+
- [x] Support Vector Machines
|
|
116
|
+
- Formulations
|
|
117
|
+
- Primal
|
|
118
|
+
- Wolfe Dual
|
|
119
|
+
- Lagrangian Dual
|
|
120
|
+
- [x] Support Vector Classifier
|
|
121
|
+
- Losses
|
|
122
|
+
- [x] Hinge (L1 Loss) 
|
|
123
|
+
- [x] Squared Hinge (L2 Loss) 
|
|
124
|
+
- [x] Support Vector Regression
|
|
125
|
+
- Losses
|
|
126
|
+
- [x] Epsilon-insensitive (L1 Loss) 
|
|
127
|
+
- [x] Squared Epsilon-insensitive (L2 Loss) 
|
|
128
|
+
- Kernels
|
|
129
|
+
- [x] Linear
|
|
130
|
+
|
|
131
|
+
| SVC | SVR |
|
|
132
|
+
|:--------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------:|
|
|
133
|
+
|  |  |
|
|
134
|
+
|
|
135
|
+
- [x] Polynomial
|
|
136
|
+
|
|
137
|
+
| SVC | SVR |
|
|
138
|
+
| :----: | :----: |
|
|
139
|
+
|  |  |
|
|
140
|
+
|
|
141
|
+
- [x] Gaussian
|
|
142
|
+
|
|
143
|
+
| SVC | SVR |
|
|
144
|
+
|:------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------:|
|
|
145
|
+
|  |  |
|
|
146
|
+
|
|
147
|
+
- [x] Laplacian
|
|
148
|
+
|
|
149
|
+
| SVC | SVR |
|
|
150
|
+
|:--------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------:|
|
|
151
|
+
|  |  |
|
|
152
|
+
|
|
153
|
+
- [x] Sigmoid
|
|
154
|
+
- Optimizers (ad hoc)
|
|
155
|
+
- [x] Sequential Minimal Optimization (SMO)
|
|
156
|
+
- [x] QP solver with [qpsolvers](https://github.com/stephane-caron/qpsolvers) interface to
|
|
157
|
+
[cvxopt](https://github.com/cvxopt/cvxopt), [quadprog](https://github.com/rmcgibbo/quadprog),
|
|
158
|
+
[qpOASES](https://github.com/coin-or/qpOASES), [etc](https://github.com/stephane-caron/qpsolvers#solvers).
|
|
159
|
+
- [x] Neural Networks
|
|
160
|
+
- [x] Neural Network Classifier
|
|
161
|
+
- [x] Neural Network Regressor
|
|
162
|
+
- Losses
|
|
163
|
+
- [x] Mean Absolute Error (L1 Loss)
|
|
164
|
+
- [x] Mean Squared Error (L2 Loss)
|
|
165
|
+
- [x] Binary Cross Entropy
|
|
166
|
+
- [x] Categorical Cross Entropy
|
|
167
|
+
- [x] Sparse Categorical Cross Entropy
|
|
168
|
+
- Regularizers
|
|
169
|
+
- [x] L1 or Lasso
|
|
170
|
+
- [x] L2 or Ridge or Tikhonov
|
|
171
|
+
- Activations
|
|
172
|
+
- [x] Linear
|
|
173
|
+
- [x] Sigmoid
|
|
174
|
+
- [x] Tanh
|
|
175
|
+
- [x] ReLU
|
|
176
|
+
- [x] SoftMax
|
|
177
|
+
- Layers
|
|
178
|
+
- [x] Fully Connected
|
|
179
|
+
- Initializers
|
|
180
|
+
- [x] Xavier or Glorot (normal and uniform)
|
|
181
|
+
- [x] He (normal and uniform)
|
|
182
|
+
|
|
183
|
+
## Install
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
pip install optiml
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Documentation
|
|
190
|
+
|
|
191
|
+
The full API reference is available at [dmeoli.github.io/optiml](https://dmeoli.github.io/optiml),
|
|
192
|
+
automatically built from the source docstrings and published to GitHub Pages on every push to `master`.
|
|
193
|
+
|
|
194
|
+
It can also be built locally with [Sphinx](https://www.sphinx-doc.org):
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
pip install -r docs/requirements.txt
|
|
198
|
+
sphinx-build -b html docs docs/_build/html
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
## License [](https://opensource.org/licenses/MIT)
|
|
202
|
+
|
|
203
|
+
This software is released under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
# OptiML
|
|
2
|
-
|
|
3
|
-
[](https://github.com/dmeoli/optiml/actions/workflows/ci.yml)
|
|
4
|
+
[](https://coveralls.io/github/dmeoli/optiml?branch=master)
|
|
5
|
+
[](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)
|
|
5
6
|
[](https://pypi.org/project/optiml/)
|
|
6
7
|
[](https://pypistats.org/packages/optiml)
|
|
7
8
|
[](https://mybinder.org/v2/gh/dmeoli/optiml/master)
|
|
9
|
+
[](https://dmeoli.github.io/optiml)
|
|
8
10
|
|
|
9
|
-
OptiML is a sklearn compatible implementation of *Support Vector Machines* and *Deep Neural Networks*,
|
|
10
|
-
|
|
11
|
+
OptiML is a sklearn compatible implementation of *Support Vector Machines* and *Deep Neural Networks*, both with some of
|
|
12
|
+
the most successful features according to the state of the art.
|
|
11
13
|
|
|
12
|
-
This work was motivated by the possibility of being able to solve the optimization problem deriving from the
|
|
13
|
-
formulation of these models through a wide range of optimization algorithms object of study and developed
|
|
14
|
-
Numerical Methods and Optimization course
|
|
15
|
-
[University of Pisa](https://www.unipi.it/index.php/english) under the supervision of
|
|
14
|
+
This work was motivated by the possibility of being able to solve the optimization problem deriving from the
|
|
15
|
+
mathematical formulation of these models through a wide range of optimization algorithms object of study and developed
|
|
16
|
+
for the Numerical Methods and Optimization course @ [Department of Computer Science](https://www.di.unipi.it/en/) @
|
|
17
|
+
[University of Pisa](https://www.unipi.it/index.php/english) under the supervision of
|
|
18
|
+
prof. [Antonio Frangioni](http://pages.di.unipi.it/frangio/).
|
|
16
19
|
|
|
17
20
|
## Contents
|
|
18
21
|
|
|
@@ -30,7 +33,7 @@ Numerical Methods and Optimization course @ [Department of Computer Science](ht
|
|
|
30
33
|
- [x] Newton
|
|
31
34
|
- Quasi-Newton
|
|
32
35
|
- [x] BFGS
|
|
33
|
-
- [
|
|
36
|
+
- [x] L-BFGS
|
|
34
37
|
- Stochastic Methods
|
|
35
38
|
- [x] Stochastic Gradient Descent
|
|
36
39
|
- [x] Momentum
|
|
@@ -61,9 +64,10 @@ Numerical Methods and Optimization course @ [Department of Computer Science](ht
|
|
|
61
64
|
- [x] Repeater
|
|
62
65
|
- Momentum
|
|
63
66
|
- [x] Sutskever Blend
|
|
64
|
-
- [x] Proximal Bundle with [cvxpy](https://github.com/cvxgrp/cvxpy) interface to
|
|
65
|
-
[ecos](https://github.com/embotech/ecos), [osqp](https://github.com/oxfordcontrol/osqp),
|
|
66
|
-
[scs](https://github.com/cvxgrp/scs),
|
|
67
|
+
- [x] Proximal Bundle with [cvxpy](https://github.com/cvxgrp/cvxpy) interface to
|
|
68
|
+
[ecos](https://github.com/embotech/ecos), [osqp](https://github.com/oxfordcontrol/osqp),
|
|
69
|
+
[scs](https://github.com/cvxgrp/scs),
|
|
70
|
+
[etc](https://www.cvxpy.org/tutorial/advanced/index.html#choosing-a-solver).
|
|
67
71
|
- Constrained Quadratic Optimization
|
|
68
72
|
- Box-Constrained Quadratic Methods
|
|
69
73
|
- [x] Projected Gradient
|
|
@@ -89,35 +93,35 @@ Numerical Methods and Optimization course @ [Department of Computer Science](ht
|
|
|
89
93
|
- [x] Squared Epsilon-insensitive (L2 Loss) 
|
|
90
94
|
- Kernels
|
|
91
95
|
- [x] Linear
|
|
92
|
-
|
|
93
|
-
|
|
|
94
|
-
|
|
96
|
+
|
|
97
|
+
| SVC | SVR |
|
|
98
|
+
|:--------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------:|
|
|
95
99
|
|  |  |
|
|
96
|
-
|
|
100
|
+
|
|
97
101
|
- [x] Polynomial
|
|
98
|
-
|
|
99
|
-
|
|
|
102
|
+
|
|
103
|
+
| SVC | SVR |
|
|
100
104
|
| :----: | :----: |
|
|
101
105
|
|  |  |
|
|
102
|
-
|
|
106
|
+
|
|
103
107
|
- [x] Gaussian
|
|
104
|
-
|
|
105
|
-
|
|
|
106
|
-
|
|
108
|
+
|
|
109
|
+
| SVC | SVR |
|
|
110
|
+
|:------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------:|
|
|
107
111
|
|  |  |
|
|
108
|
-
|
|
112
|
+
|
|
109
113
|
- [x] Laplacian
|
|
110
|
-
|
|
111
|
-
|
|
|
112
|
-
|
|
114
|
+
|
|
115
|
+
| SVC | SVR |
|
|
116
|
+
|:--------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------:|
|
|
113
117
|
|  |  |
|
|
114
|
-
|
|
118
|
+
|
|
115
119
|
- [x] Sigmoid
|
|
116
120
|
- Optimizers (ad hoc)
|
|
117
121
|
- [x] Sequential Minimal Optimization (SMO)
|
|
118
|
-
- [x] QP solver with [qpsolvers](https://github.com/stephane-caron/qpsolvers) interface to
|
|
119
|
-
|
|
120
|
-
|
|
122
|
+
- [x] QP solver with [qpsolvers](https://github.com/stephane-caron/qpsolvers) interface to
|
|
123
|
+
[cvxopt](https://github.com/cvxopt/cvxopt), [quadprog](https://github.com/rmcgibbo/quadprog),
|
|
124
|
+
[qpOASES](https://github.com/coin-or/qpOASES), [etc](https://github.com/stephane-caron/qpsolvers#solvers).
|
|
121
125
|
- [x] Neural Networks
|
|
122
126
|
- [x] Neural Network Classifier
|
|
123
127
|
- [x] Neural Network Regressor
|
|
@@ -148,6 +152,18 @@ Numerical Methods and Optimization course @ [Department of Computer Science](ht
|
|
|
148
152
|
pip install optiml
|
|
149
153
|
```
|
|
150
154
|
|
|
155
|
+
## Documentation
|
|
156
|
+
|
|
157
|
+
The full API reference is available at [dmeoli.github.io/optiml](https://dmeoli.github.io/optiml),
|
|
158
|
+
automatically built from the source docstrings and published to GitHub Pages on every push to `master`.
|
|
159
|
+
|
|
160
|
+
It can also be built locally with [Sphinx](https://www.sphinx-doc.org):
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
pip install -r docs/requirements.txt
|
|
164
|
+
sphinx-build -b html docs docs/_build/html
|
|
165
|
+
```
|
|
166
|
+
|
|
151
167
|
## License [](https://opensource.org/licenses/MIT)
|
|
152
168
|
|
|
153
169
|
This software is released under the MIT License. See the [LICENSE](LICENSE) file for details.
|
|
@@ -18,6 +18,97 @@ from ...opti.unconstrained.stochastic import StochasticOptimizer, StochasticGrad
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class NeuralNetwork(BaseEstimator, Layer, ABC):
|
|
21
|
+
"""
|
|
22
|
+
Base abstract class for all feed-forward neural network estimators.
|
|
23
|
+
It chains a sequence of layers, performs forward/backward propagation
|
|
24
|
+
and trains the network parameters by minimizing the given loss with
|
|
25
|
+
the chosen optimizer.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
|
|
30
|
+
layers : tuple of `Layer` instances, default=()
|
|
31
|
+
The ordered sequence of layers composing the network.
|
|
32
|
+
|
|
33
|
+
loss : `NeuralNetworkLoss` subclass, default=mean_squared_error
|
|
34
|
+
Specifies the loss function to minimize.
|
|
35
|
+
|
|
36
|
+
optimizer : `Optimizer` subclass, default=StochasticGradientDescent
|
|
37
|
+
The solver for optimization. It can be a subclass of the
|
|
38
|
+
`LineSearchOptimizer`, the `ProximalBundle` method or a subclass
|
|
39
|
+
of the `StochasticOptimizer`.
|
|
40
|
+
|
|
41
|
+
learning_rate : float, default=0.01
|
|
42
|
+
The initial learning rate used for weight update. It controls the
|
|
43
|
+
step-size in updating the weights. Only used when ``optimizer`` is a
|
|
44
|
+
subclass of `StochasticOptimizer`.
|
|
45
|
+
|
|
46
|
+
max_iter : int, default=1000
|
|
47
|
+
Maximum number of iterations. The solver iterates until convergence
|
|
48
|
+
(determined by ``tol``) or this number of iterations. If the optimizer
|
|
49
|
+
is a subclass of `StochasticOptimizer`, this value determines the number
|
|
50
|
+
of epochs, not the number of gradient steps.
|
|
51
|
+
|
|
52
|
+
momentum_type : {'none', 'polyak', 'nesterov'}, default='none'
|
|
53
|
+
Momentum type used for weight update. Only used when ``optimizer`` is
|
|
54
|
+
a subclass of `StochasticMomentumOptimizer`.
|
|
55
|
+
|
|
56
|
+
momentum : float, default=0.9
|
|
57
|
+
Momentum for weight update. Should be between 0 and 1. Only used when
|
|
58
|
+
``optimizer`` is a subclass of `StochasticMomentumOptimizer`.
|
|
59
|
+
|
|
60
|
+
tol : float, default=1e-4
|
|
61
|
+
Tolerance for stopping criterion.
|
|
62
|
+
|
|
63
|
+
validation_split : float, default=0.
|
|
64
|
+
The proportion of training data to set aside as validation set for
|
|
65
|
+
early stopping. Must be between 0 and 1. Only used when ``optimizer``
|
|
66
|
+
is a subclass of `StochasticOptimizer`.
|
|
67
|
+
|
|
68
|
+
batch_size : int, default=None
|
|
69
|
+
Size of mini batches for stochastic optimizers.
|
|
70
|
+
Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
71
|
+
|
|
72
|
+
max_f_eval : int, default=15000
|
|
73
|
+
Maximum number of loss function calls. Only used when ``optimizer``
|
|
74
|
+
is a subclass of `LineSearchOptimizer`.
|
|
75
|
+
|
|
76
|
+
early_stopping : bool, default=False
|
|
77
|
+
Whether to use early stopping to terminate training when the
|
|
78
|
+
monitored score/loss does not improve by at least ``tol`` for
|
|
79
|
+
``patience`` consecutive epochs.
|
|
80
|
+
Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
81
|
+
|
|
82
|
+
patience : int, default=5
|
|
83
|
+
Maximum number of epochs to not meet ``tol`` improvement.
|
|
84
|
+
Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
85
|
+
|
|
86
|
+
shuffle : bool, default=True
|
|
87
|
+
Whether to shuffle samples for batch sampling in each iteration. Only
|
|
88
|
+
used when the ``optimizer`` is a subclass of `StochasticOptimizer`.
|
|
89
|
+
|
|
90
|
+
random_state : int, RandomState instance or None, default=None
|
|
91
|
+
Controls the pseudo random number generation for the train-validation
|
|
92
|
+
split and for shuffling the data in batch sampling.
|
|
93
|
+
Pass an int for reproducible output across multiple function calls.
|
|
94
|
+
|
|
95
|
+
mu : float, default=1
|
|
96
|
+
Mu parameter for the proximal bundle method.
|
|
97
|
+
Only used when ``optimizer`` is `ProximalBundle`. Must be strictly positive.
|
|
98
|
+
|
|
99
|
+
master_solver : string, default='clarabel'
|
|
100
|
+
Master solver for the proximal bundle method for the CVXPY interface.
|
|
101
|
+
Only used when ``optimizer`` is `ProximalBundle`.
|
|
102
|
+
|
|
103
|
+
master_verbose : bool or int, default=False
|
|
104
|
+
Controls the verbosity of the CVXPY interface.
|
|
105
|
+
Only used when ``optimizer`` is `ProximalBundle`.
|
|
106
|
+
|
|
107
|
+
verbose : bool or int, default=False
|
|
108
|
+
Controls the verbosity of progress messages to stdout. Use a boolean value
|
|
109
|
+
to switch on/off or an int value to show progress each ``verbose`` time
|
|
110
|
+
optimization steps.
|
|
111
|
+
"""
|
|
21
112
|
|
|
22
113
|
def __init__(self,
|
|
23
114
|
layers=(),
|
|
@@ -36,7 +127,7 @@ class NeuralNetwork(BaseEstimator, Layer, ABC):
|
|
|
36
127
|
shuffle=True,
|
|
37
128
|
random_state=None,
|
|
38
129
|
mu=1,
|
|
39
|
-
master_solver='
|
|
130
|
+
master_solver='clarabel',
|
|
40
131
|
master_verbose=False,
|
|
41
132
|
verbose=False):
|
|
42
133
|
self.layers = layers
|
|
@@ -277,6 +368,11 @@ class NeuralNetwork(BaseEstimator, Layer, ABC):
|
|
|
277
368
|
|
|
278
369
|
|
|
279
370
|
class NeuralNetworkClassifier(ClassifierMixin, NeuralNetwork):
|
|
371
|
+
"""
|
|
372
|
+
Feed-forward neural network for classification. The output layer must be
|
|
373
|
+
sigmoid (binary/multi-label) or softmax (multi-class), consistently with
|
|
374
|
+
the chosen loss function.
|
|
375
|
+
"""
|
|
280
376
|
|
|
281
377
|
def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
|
|
282
378
|
super(NeuralNetworkClassifier, self)._store_train_val_info(opt, X_batch, y_batch, X_val, y_val)
|
|
@@ -331,6 +427,11 @@ class NeuralNetworkClassifier(ClassifierMixin, NeuralNetwork):
|
|
|
331
427
|
|
|
332
428
|
|
|
333
429
|
class NeuralNetworkRegressor(RegressorMixin, NeuralNetwork):
|
|
430
|
+
"""
|
|
431
|
+
Feed-forward neural network for regression. The output layer must be
|
|
432
|
+
linear or, for regression between 0 and 1, sigmoid. The number of output
|
|
433
|
+
neurons must equal the number of targets.
|
|
434
|
+
"""
|
|
334
435
|
|
|
335
436
|
def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
|
|
336
437
|
super(NeuralNetworkRegressor, self)._store_train_val_info(opt, X_batch, y_batch, X_val, y_val)
|
|
@@ -5,6 +5,10 @@ from autograd.scipy.special import expit
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class Activation(ABC):
|
|
8
|
+
"""
|
|
9
|
+
Base abstract class for all activation functions. Subclasses must
|
|
10
|
+
implement ``function`` and its element-wise derivative ``jacobian``.
|
|
11
|
+
"""
|
|
8
12
|
|
|
9
13
|
def function(self, x):
|
|
10
14
|
raise NotImplementedError
|
|
@@ -17,6 +21,7 @@ class Activation(ABC):
|
|
|
17
21
|
|
|
18
22
|
|
|
19
23
|
class Linear(Activation):
|
|
24
|
+
r"""Identity (linear) activation function :math:`f(x) = x`."""
|
|
20
25
|
|
|
21
26
|
def function(self, x):
|
|
22
27
|
return x
|
|
@@ -26,6 +31,7 @@ class Linear(Activation):
|
|
|
26
31
|
|
|
27
32
|
|
|
28
33
|
class ReLU(Activation):
|
|
34
|
+
r"""Rectified linear unit activation function :math:`f(x) = \max(0, x)`."""
|
|
29
35
|
|
|
30
36
|
def function(self, x):
|
|
31
37
|
return np.maximum(0., x)
|
|
@@ -35,6 +41,7 @@ class ReLU(Activation):
|
|
|
35
41
|
|
|
36
42
|
|
|
37
43
|
class Tanh(Activation):
|
|
44
|
+
r"""Hyperbolic tangent activation function :math:`f(x) = \tanh(x)`."""
|
|
38
45
|
|
|
39
46
|
def function(self, x):
|
|
40
47
|
return np.tanh(x)
|
|
@@ -44,6 +51,7 @@ class Tanh(Activation):
|
|
|
44
51
|
|
|
45
52
|
|
|
46
53
|
class Sigmoid(Activation):
|
|
54
|
+
r"""Logistic sigmoid activation function :math:`f(x) = \frac{1}{1 + e^{-x}}`."""
|
|
47
55
|
|
|
48
56
|
def function(self, x):
|
|
49
57
|
return expit(x)
|
|
@@ -54,6 +62,7 @@ class Sigmoid(Activation):
|
|
|
54
62
|
|
|
55
63
|
|
|
56
64
|
class SoftMax(Activation):
|
|
65
|
+
r"""Softmax activation function :math:`f(x)_i = \frac{e^{x_i}}{\sum_j e^{x_j}}`."""
|
|
57
66
|
|
|
58
67
|
def function(self, x, axis=-1):
|
|
59
68
|
exps = np.exp(x - np.max(x, axis=axis, keepdims=True))
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def truncated_normal(shape, mean=0., std=1., random_state=None):
|
|
5
|
+
truncated = 2 * std + mean
|
|
6
|
+
return np.clip(np.random.RandomState(random_state).normal(size=shape, loc=mean, scale=std), -truncated, truncated)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def glorot_normal(shape, random_state=None):
|
|
10
|
+
r"""Glorot normal initializer, also called Xavier normal initializer.
|
|
11
|
+
It draws samples from a truncated normal distribution centered on 0
|
|
12
|
+
with
|
|
13
|
+
|
|
14
|
+
.. math::
|
|
15
|
+
|
|
16
|
+
\text{std} = \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
|
|
17
|
+
|
|
18
|
+
where ``fan_in`` is the number of input units in the weight tensor
|
|
19
|
+
and ``fan_out`` is the number of output units in the weight tensor."""
|
|
20
|
+
fan_in, fan_out = shape[0], shape[1]
|
|
21
|
+
std = np.sqrt(2. / (fan_in + fan_out))
|
|
22
|
+
return truncated_normal(shape=shape, mean=0., std=std, random_state=random_state)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def glorot_uniform(shape, random_state=None):
|
|
26
|
+
r"""Glorot uniform initializer, also called Xavier uniform initializer.
|
|
27
|
+
It draws samples from a uniform distribution within
|
|
28
|
+
:math:`[-\text{limit}, \text{limit}]` where
|
|
29
|
+
|
|
30
|
+
.. math::
|
|
31
|
+
|
|
32
|
+
\text{limit} = \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}
|
|
33
|
+
|
|
34
|
+
where ``fan_in`` is the number of input units in the weight tensor
|
|
35
|
+
and ``fan_out`` is the number of output units in the weight tensor."""
|
|
36
|
+
fan_in, fan_out = shape[0], shape[1]
|
|
37
|
+
limit = np.sqrt(6. / (fan_in + fan_out))
|
|
38
|
+
return np.random.RandomState(random_state).uniform(size=shape, low=-limit, high=limit)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def he_normal(shape, random_state=None):
|
|
42
|
+
r"""He normal initializer. It draws samples from a truncated normal
|
|
43
|
+
distribution centered on 0 with
|
|
44
|
+
|
|
45
|
+
.. math::
|
|
46
|
+
|
|
47
|
+
\text{std} = \sqrt{\frac{2}{\text{fan\_in}}}
|
|
48
|
+
|
|
49
|
+
where ``fan_in`` is the number of input units in the weight tensor."""
|
|
50
|
+
fan_in, fan_out = shape[0], shape[1]
|
|
51
|
+
std = np.sqrt(2. / fan_in)
|
|
52
|
+
return truncated_normal(shape=shape, mean=0., std=std, random_state=random_state)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def he_uniform(shape, random_state=None):
|
|
56
|
+
r"""He uniform variance scaling initializer. It draws samples from
|
|
57
|
+
a uniform distribution within :math:`[-\text{limit}, \text{limit}]` where
|
|
58
|
+
|
|
59
|
+
.. math::
|
|
60
|
+
|
|
61
|
+
\text{limit} = \sqrt{\frac{6}{\text{fan\_in}}}
|
|
62
|
+
|
|
63
|
+
where ``fan_in`` is the number of input units in the weight tensor."""
|
|
64
|
+
fan_in, fan_out = shape[0], shape[1]
|
|
65
|
+
limit = np.sqrt(6. / fan_in)
|
|
66
|
+
return np.random.RandomState(random_state).uniform(size=shape, low=-limit, high=limit)
|