optiml 1.5__tar.gz → 1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. optiml-1.7/LICENSE +21 -0
  2. optiml-1.7/PKG-INFO +203 -0
  3. {optiml-1.5 → optiml-1.7}/README.md +47 -31
  4. {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/_base.py +102 -1
  5. {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/activations.py +9 -0
  6. optiml-1.7/optiml/ml/neural_network/initializers.py +66 -0
  7. {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/layers.py +80 -0
  8. {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/losses.py +55 -8
  9. optiml-1.7/optiml/ml/neural_network/regularizers.py +87 -0
  10. {optiml-1.5 → optiml-1.7}/optiml/ml/svm/_base.py +4 -4
  11. {optiml-1.5 → optiml-1.7}/optiml/ml/svm/kernels.py +45 -10
  12. {optiml-1.5 → optiml-1.7}/optiml/ml/svm/losses.py +105 -36
  13. {optiml-1.5 → optiml-1.7}/optiml/ml/svm/smo.py +75 -1
  14. optiml-1.7/optiml/ml/tests/_datasets.py +49 -0
  15. optiml-1.7/optiml/ml/tests/_utils.py +28 -0
  16. optiml-1.7/optiml/ml/tests/test_initializers.py +33 -0
  17. {optiml-1.5 → optiml-1.7}/optiml/ml/tests/test_neural_network.py +17 -2
  18. {optiml-1.5 → optiml-1.7}/optiml/ml/tests/test_svc.py +29 -77
  19. {optiml-1.5 → optiml-1.7}/optiml/ml/tests/test_svr.py +15 -51
  20. {optiml-1.5 → optiml-1.7}/optiml/ml/utils.py +1 -2
  21. {optiml-1.5 → optiml-1.7}/optiml/opti/_base.py +14 -12
  22. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/_base.py +83 -28
  23. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/active_set.py +55 -33
  24. optiml-1.7/optiml/opti/constrained/frank_wolfe.py +158 -0
  25. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/interior_point.py +75 -38
  26. optiml-1.7/optiml/opti/constrained/projected_gradient.py +138 -0
  27. optiml-1.7/optiml/opti/constrained/tests/test_lower_bound.py +29 -0
  28. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/__init__.py +2 -2
  29. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/conjugate_gradient.py +73 -110
  30. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/gradient_descent.py +8 -61
  31. optiml-1.7/optiml/opti/unconstrained/line_search/newton.py +198 -0
  32. optiml-1.7/optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
  33. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/proximal_bundle.py +58 -68
  34. optiml-1.7/optiml/opti/unconstrained/stochastic/_base.py +246 -0
  35. optiml-1.7/optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
  36. optiml-1.7/optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
  37. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/adam.py +50 -0
  38. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/adamax.py +50 -0
  39. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/amsgrad.py +49 -0
  40. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/gradient_descent.py +40 -0
  41. optiml-1.7/optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
  42. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/schedules.py +1 -1
  43. optiml-1.7/optiml/opti/unconstrained/tests/test_functions.py +34 -0
  44. optiml-1.7/optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
  45. optiml-1.7/optiml/opti/unconstrained/tests/test_verbose.py +25 -0
  46. {optiml-1.5 → optiml-1.7}/optiml/opti/utils.py +54 -23
  47. optiml-1.7/optiml.egg-info/PKG-INFO +203 -0
  48. {optiml-1.5 → optiml-1.7}/optiml.egg-info/SOURCES.txt +8 -1
  49. {optiml-1.5 → optiml-1.7}/optiml.egg-info/requires.txt +1 -0
  50. {optiml-1.5 → optiml-1.7}/setup.py +4 -3
  51. optiml-1.5/PKG-INFO +0 -165
  52. optiml-1.5/optiml/ml/neural_network/initializers.py +0 -47
  53. optiml-1.5/optiml/ml/neural_network/regularizers.py +0 -46
  54. optiml-1.5/optiml/opti/constrained/frank_wolfe.py +0 -147
  55. optiml-1.5/optiml/opti/constrained/projected_gradient.py +0 -125
  56. optiml-1.5/optiml/opti/unconstrained/line_search/newton.py +0 -229
  57. optiml-1.5/optiml/opti/unconstrained/line_search/quasi_newton.py +0 -297
  58. optiml-1.5/optiml/opti/unconstrained/stochastic/_base.py +0 -167
  59. optiml-1.5/optiml/opti/unconstrained/stochastic/adadelta.py +0 -89
  60. optiml-1.5/optiml/opti/unconstrained/stochastic/adagrad.py +0 -81
  61. optiml-1.5/optiml/opti/unconstrained/stochastic/rmsprop.py +0 -108
  62. optiml-1.5/optiml/opti/unconstrained/tests/test_quasi_newton.py +0 -20
  63. optiml-1.5/optiml.egg-info/PKG-INFO +0 -165
  64. {optiml-1.5 → optiml-1.7}/optiml/__init__.py +0 -0
  65. {optiml-1.5 → optiml-1.7}/optiml/ml/__init__.py +0 -0
  66. {optiml-1.5 → optiml-1.7}/optiml/ml/neural_network/__init__.py +0 -0
  67. {optiml-1.5 → optiml-1.7}/optiml/ml/svm/__init__.py +0 -0
  68. {optiml-1.5 → optiml-1.7}/optiml/ml/tests/__init__.py +0 -0
  69. {optiml-1.5 → optiml-1.7}/optiml/opti/__init__.py +0 -0
  70. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/__init__.py +0 -0
  71. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/__init__.py +0 -0
  72. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_active_set.py +0 -0
  73. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_frank_wolfe.py +0 -0
  74. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_interior_point.py +0 -0
  75. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_lagrangian_quadratic.py +0 -0
  76. {optiml-1.5 → optiml-1.7}/optiml/opti/constrained/tests/test_projected_gradient.py +0 -0
  77. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/__init__.py +0 -0
  78. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/_base.py +0 -0
  79. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/_base.py +0 -0
  80. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/line_search/line_search.py +0 -0
  81. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/stochastic/__init__.py +0 -0
  82. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/__init__.py +0 -0
  83. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_adadelta.py +0 -0
  84. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_adagrad.py +0 -0
  85. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_adam.py +0 -0
  86. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_adamax.py +0 -0
  87. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_amsgrad.py +0 -0
  88. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_conjugate_gradient.py +0 -0
  89. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_gradient_descent.py +0 -0
  90. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_newton.py +0 -0
  91. {optiml-1.5 → optiml-1.7}/optiml/opti/unconstrained/tests/test_rmsprop.py +0 -0
  92. {optiml-1.5 → optiml-1.7}/optiml.egg-info/dependency_links.txt +0 -0
  93. {optiml-1.5 → optiml-1.7}/optiml.egg-info/top_level.txt +0 -0
  94. {optiml-1.5 → optiml-1.7}/setup.cfg +0 -0
optiml-1.7/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Donato Meoli
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
optiml-1.7/PKG-INFO ADDED
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: optiml
3
+ Version: 1.7
4
+ Summary: Optimizers for/and sklearn compatible Machine Learning models
5
+ Home-page: https://github.com/dmeoli/optiml
6
+ Author: Donato Meoli
7
+ Author-email: donato.meoli@outlook.com
8
+ License: MIT
9
+ Requires-Python: >=3.9
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: autograd
13
+ Requires-Dist: casadi
14
+ Requires-Dist: cvxopt
15
+ Requires-Dist: cvxpy
16
+ Requires-Dist: matplotlib
17
+ Requires-Dist: numpy
18
+ Requires-Dist: pytest
19
+ Requires-Dist: qpsolvers
20
+ Requires-Dist: quadprog
21
+ Requires-Dist: scikit-learn
22
+ Requires-Dist: scipy
23
+ Requires-Dist: wurlitzer
24
+ Dynamic: author
25
+ Dynamic: author-email
26
+ Dynamic: description
27
+ Dynamic: description-content-type
28
+ Dynamic: home-page
29
+ Dynamic: license
30
+ Dynamic: license-file
31
+ Dynamic: requires-dist
32
+ Dynamic: requires-python
33
+ Dynamic: summary
34
+
35
+ # OptiML
36
+
37
+ [![CI](https://github.com/dmeoli/optiml/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/dmeoli/optiml/actions/workflows/ci.yml)
38
+ [![Coverage Status](https://coveralls.io/repos/github/dmeoli/optiml/badge.svg?branch=master)](https://coveralls.io/github/dmeoli/optiml?branch=master)
39
+ [![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)
40
+ [![PyPI Version](https://img.shields.io/pypi/v/optiml.svg?color=blue)](https://pypi.org/project/optiml/)
41
+ [![PyPI Downloads](https://img.shields.io/pypi/dm/optiml.svg)](https://pypistats.org/packages/optiml)
42
+ [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/dmeoli/optiml/master)
43
+ [![Documentation](https://img.shields.io/badge/docs-online-blue)](https://dmeoli.github.io/optiml)
44
+
45
+ OptiML is a sklearn compatible implementation of *Support Vector Machines* and *Deep Neural Networks*, both with some of
46
+ the most successful features according to the state of the art.
47
+
48
+ This work was motivated by the possibility of being able to solve the optimization problem deriving from the
49
+ mathematical formulation of these models through a wide range of optimization algorithms object of study and developed
50
+ for the Numerical Methods and Optimization course @ [Department of Computer Science](https://www.di.unipi.it/en/) @
51
+ [University of Pisa](https://www.unipi.it/index.php/english) under the supervision of
52
+ prof. [Antonio Frangioni](http://pages.di.unipi.it/frangio/).
53
+
54
+ ## Contents
55
+
56
+ - Numerical Optimization
57
+ - Unconstrained Optimization
58
+ - Line Search Methods
59
+ - 1st Order Methods
60
+ - [x] Steepest Gradient Descent
61
+ - [x] Conjugate Gradient
62
+ - [x] Fletcher–Reeves formula
63
+ - [x] Polak–Ribière formula
64
+ - [x] Hestenes-Stiefel formula
65
+ - [x] Dai-Yuan formula
66
+ - 2nd Order Methods
67
+ - [x] Newton
68
+ - Quasi-Newton
69
+ - [x] BFGS
70
+ - [x] L-BFGS
71
+ - Stochastic Methods
72
+ - [x] Stochastic Gradient Descent
73
+ - [x] Momentum
74
+ - [x] Polyak
75
+ - [x] Nesterov
76
+ - [x] Adam
77
+ - [x] Momentum
78
+ - [x] Polyak
79
+ - [x] Nesterov
80
+ - [x] AMSGrad
81
+ - [x] Momentum
82
+ - [x] Polyak
83
+ - [x] Nesterov
84
+ - [x] AdaMax
85
+ - [x] Momentum
86
+ - [x] Polyak
87
+ - [x] Nesterov
88
+ - [x] AdaGrad
89
+ - [x] AdaDelta
90
+ - [x] RMSProp
91
+ - [x] Momentum
92
+ - [x] Polyak
93
+ - [x] Nesterov
94
+ - [x] Schedules
95
+ - Step size
96
+ - [x] Decaying
97
+ - [x] Linear Annealing
98
+ - [x] Repeater
99
+ - Momentum
100
+ - [x] Sutskever Blend
101
+ - [x] Proximal Bundle with [cvxpy](https://github.com/cvxgrp/cvxpy) interface to
102
+ [ecos](https://github.com/embotech/ecos), [osqp](https://github.com/oxfordcontrol/osqp),
103
+ [scs](https://github.com/cvxgrp/scs),
104
+ [etc](https://www.cvxpy.org/tutorial/advanced/index.html#choosing-a-solver).
105
+ - Constrained Quadratic Optimization
106
+ - Box-Constrained Quadratic Methods
107
+ - [x] Projected Gradient
108
+ - [x] Frank-Wolfe or Conditional Gradient
109
+ - [x] Active Set
110
+ - [x] Interior Point
111
+ - [x] Lagrangian Dual
112
+ - [x] Augmented Lagrangian Dual
113
+
114
+ - Machine Learning
115
+ - [x] Support Vector Machines
116
+ - Formulations
117
+ - Primal
118
+ - Wolfe Dual
119
+ - Lagrangian Dual
120
+ - [x] Support Vector Classifier
121
+ - Losses
122
+ - [x] Hinge (L1 Loss) ![l1_svc_loss](notebooks/optimization/tex/img/l1_svc_loss.png)
123
+ - [x] Squared Hinge (L2 Loss) ![l2_svc_loss](notebooks/optimization/tex/img/l2_svc_loss.png)
124
+ - [x] Support Vector Regression
125
+ - Losses
126
+ - [x] Epsilon-insensitive (L1 Loss) ![l1_svr_loss](notebooks/optimization/tex/img/l1_svr_loss.png)
127
+ - [x] Squared Epsilon-insensitive (L2 Loss) ![l2_svr_loss](notebooks/optimization/tex/img/l2_svr_loss.png)
128
+ - Kernels
129
+ - [x] Linear
130
+
131
+ | SVC | SVR |
132
+ |:--------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------:|
133
+ | ![linear_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/linear_dual_l1_svc_hyperplane.png) | ![linear_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/linear_dual_l1_svr_hyperplane.png) |
134
+
135
+ - [x] Polynomial
136
+
137
+ | SVC | SVR |
138
+ | :----: | :----: |
139
+ | ![poly_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/poly_dual_l1_svc_hyperplane.png) | ![poly_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/poly_dual_l1_svr_hyperplane.png) |
140
+
141
+ - [x] Gaussian
142
+
143
+ | SVC | SVR |
144
+ |:------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------:|
145
+ | ![gaussian_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/gaussian_dual_l1_svc_hyperplane.png) | ![gaussian_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/gaussian_dual_l1_svr_hyperplane.png) |
146
+
147
+ - [x] Laplacian
148
+
149
+ | SVC | SVR |
150
+ |:--------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------:|
151
+ | ![laplacian_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/laplacian_dual_l1_svc_hyperplane.png) | ![laplacian_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/laplacian_dual_l1_svr_hyperplane.png) |
152
+
153
+ - [x] Sigmoid
154
+ - Optimizers (ad hoc)
155
+ - [x] Sequential Minimal Optimization (SMO)
156
+ - [x] QP solver with [qpsolvers](https://github.com/stephane-caron/qpsolvers) interface to
157
+ [cvxopt](https://github.com/cvxopt/cvxopt), [quadprog](https://github.com/rmcgibbo/quadprog),
158
+ [qpOASES](https://github.com/coin-or/qpOASES), [etc](https://github.com/stephane-caron/qpsolvers#solvers).
159
+ - [x] Neural Networks
160
+ - [x] Neural Network Classifier
161
+ - [x] Neural Network Regressor
162
+ - Losses
163
+ - [x] Mean Absolute Error (L1 Loss)
164
+ - [x] Mean Squared Error (L2 Loss)
165
+ - [x] Binary Cross Entropy
166
+ - [x] Categorical Cross Entropy
167
+ - [x] Sparse Categorical Cross Entropy
168
+ - Regularizers
169
+ - [x] L1 or Lasso
170
+ - [x] L2 or Ridge or Tikhonov
171
+ - Activations
172
+ - [x] Linear
173
+ - [x] Sigmoid
174
+ - [x] Tanh
175
+ - [x] ReLU
176
+ - [x] SoftMax
177
+ - Layers
178
+ - [x] Fully Connected
179
+ - Initializers
180
+ - [x] Xavier or Glorot (normal and uniform)
181
+ - [x] He (normal and uniform)
182
+
183
+ ## Install
184
+
185
+ ```
186
+ pip install optiml
187
+ ```
188
+
189
+ ## Documentation
190
+
191
+ The full API reference is available at [dmeoli.github.io/optiml](https://dmeoli.github.io/optiml),
192
+ automatically built from the source docstrings and published to GitHub Pages on every push to `master`.
193
+
194
+ It can also be built locally with [Sphinx](https://www.sphinx-doc.org):
195
+
196
+ ```
197
+ pip install -r docs/requirements.txt
198
+ sphinx-build -b html docs docs/_build/html
199
+ ```
200
+
201
+ ## License [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
202
+
203
+ This software is released under the MIT License. See the [LICENSE](LICENSE) file for details.
@@ -1,18 +1,21 @@
1
1
  # OptiML
2
- [![Build Status](https://travis-ci.com/dmeoli/optiml.svg?branch=master)](https://travis-ci.com/dmeoli/optiml)
3
- [![Coverage Status](https://coveralls.io/repos/github/dmeoli/optiml/badge.svg?branch=master)](https://coveralls.io/github/dmeoli/optiml?branch=master)
4
- [![Python Version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9-blue)
2
+
3
+ [![CI](https://github.com/dmeoli/optiml/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/dmeoli/optiml/actions/workflows/ci.yml)
4
+ [![Coverage Status](https://coveralls.io/repos/github/dmeoli/optiml/badge.svg?branch=master)](https://coveralls.io/github/dmeoli/optiml?branch=master)
5
+ [![Python Version](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue)
5
6
  [![PyPI Version](https://img.shields.io/pypi/v/optiml.svg?color=blue)](https://pypi.org/project/optiml/)
6
7
  [![PyPI Downloads](https://img.shields.io/pypi/dm/optiml.svg)](https://pypistats.org/packages/optiml)
7
8
  [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/dmeoli/optiml/master)
9
+ [![Documentation](https://img.shields.io/badge/docs-online-blue)](https://dmeoli.github.io/optiml)
8
10
 
9
- OptiML is a sklearn compatible implementation of *Support Vector Machines* and *Deep Neural Networks*,
10
- both with some of the most successful features according to the state of the art.
11
+ OptiML is a sklearn compatible implementation of *Support Vector Machines* and *Deep Neural Networks*, both with some of
12
+ the most successful features according to the state of the art.
11
13
 
12
- This work was motivated by the possibility of being able to solve the optimization problem deriving from the mathematical
13
- formulation of these models through a wide range of optimization algorithms object of study and developed for the
14
- Numerical Methods and Optimization course @ [Department of Computer Science](https://www.di.unipi.it/en/) @
15
- [University of Pisa](https://www.unipi.it/index.php/english) under the supervision of prof. [Antonio Frangioni](http://pages.di.unipi.it/frangio/).
14
+ This work was motivated by the possibility of being able to solve the optimization problem deriving from the
15
+ mathematical formulation of these models through a wide range of optimization algorithms object of study and developed
16
+ for the Numerical Methods and Optimization course @ [Department of Computer Science](https://www.di.unipi.it/en/) @
17
+ [University of Pisa](https://www.unipi.it/index.php/english) under the supervision of
18
+ prof. [Antonio Frangioni](http://pages.di.unipi.it/frangio/).
16
19
 
17
20
  ## Contents
18
21
 
@@ -30,7 +33,7 @@ Numerical Methods and Optimization course @ [Department of Computer Science](ht
30
33
  - [x] Newton
31
34
  - Quasi-Newton
32
35
  - [x] BFGS
33
- - [ ] L-BFGS
36
+ - [x] L-BFGS
34
37
  - Stochastic Methods
35
38
  - [x] Stochastic Gradient Descent
36
39
  - [x] Momentum
@@ -61,9 +64,10 @@ Numerical Methods and Optimization course @ [Department of Computer Science](ht
61
64
  - [x] Repeater
62
65
  - Momentum
63
66
  - [x] Sutskever Blend
64
- - [x] Proximal Bundle with [cvxpy](https://github.com/cvxgrp/cvxpy) interface to
65
- [ecos](https://github.com/embotech/ecos), [osqp](https://github.com/oxfordcontrol/osqp),
66
- [scs](https://github.com/cvxgrp/scs), [etc](https://www.cvxpy.org/tutorial/advanced/index.html#choosing-a-solver).
67
+ - [x] Proximal Bundle with [cvxpy](https://github.com/cvxgrp/cvxpy) interface to
68
+ [ecos](https://github.com/embotech/ecos), [osqp](https://github.com/oxfordcontrol/osqp),
69
+ [scs](https://github.com/cvxgrp/scs),
70
+ [etc](https://www.cvxpy.org/tutorial/advanced/index.html#choosing-a-solver).
67
71
  - Constrained Quadratic Optimization
68
72
  - Box-Constrained Quadratic Methods
69
73
  - [x] Projected Gradient
@@ -89,35 +93,35 @@ Numerical Methods and Optimization course @ [Department of Computer Science](ht
89
93
  - [x] Squared Epsilon-insensitive (L2 Loss) ![l2_svr_loss](notebooks/optimization/tex/img/l2_svr_loss.png)
90
94
  - Kernels
91
95
  - [x] Linear
92
-
93
- | SVC | SVR |
94
- | :----: | :----: |
96
+
97
+ | SVC | SVR |
98
+ |:--------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------:|
95
99
  | ![linear_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/linear_dual_l1_svc_hyperplane.png) | ![linear_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/linear_dual_l1_svr_hyperplane.png) |
96
-
100
+
97
101
  - [x] Polynomial
98
-
99
- | SVC | SVR |
102
+
103
+ | SVC | SVR |
100
104
  | :----: | :----: |
101
105
  | ![poly_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/poly_dual_l1_svc_hyperplane.png) | ![poly_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/poly_dual_l1_svr_hyperplane.png) |
102
-
106
+
103
107
  - [x] Gaussian
104
-
105
- | SVC | SVR |
106
- | :----: | :----: |
108
+
109
+ | SVC | SVR |
110
+ |:------------------------------------------------------------------------------------------------------:|:------------------------------------------------------------------------------------------------------:|
107
111
  | ![gaussian_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/gaussian_dual_l1_svc_hyperplane.png) | ![gaussian_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/gaussian_dual_l1_svr_hyperplane.png) |
108
-
112
+
109
113
  - [x] Laplacian
110
-
111
- | SVC | SVR |
112
- | :----: | :----: |
114
+
115
+ | SVC | SVR |
116
+ |:--------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------:|
113
117
  | ![laplacian_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/laplacian_dual_l1_svc_hyperplane.png) | ![laplacian_dual_l1_svc_hyperplane](notebooks/optimization/tex/img/laplacian_dual_l1_svr_hyperplane.png) |
114
-
118
+
115
119
  - [x] Sigmoid
116
120
  - Optimizers (ad hoc)
117
121
  - [x] Sequential Minimal Optimization (SMO)
118
- - [x] QP solver with [qpsolvers](https://github.com/stephane-caron/qpsolvers) interface to
119
- [cvxopt](https://github.com/cvxopt/cvxopt), [quadprog](https://github.com/rmcgibbo/quadprog),
120
- [qpOASES](https://github.com/coin-or/qpOASES), [etc](https://github.com/stephane-caron/qpsolvers#solvers).
122
+ - [x] QP solver with [qpsolvers](https://github.com/stephane-caron/qpsolvers) interface to
123
+ [cvxopt](https://github.com/cvxopt/cvxopt), [quadprog](https://github.com/rmcgibbo/quadprog),
124
+ [qpOASES](https://github.com/coin-or/qpOASES), [etc](https://github.com/stephane-caron/qpsolvers#solvers).
121
125
  - [x] Neural Networks
122
126
  - [x] Neural Network Classifier
123
127
  - [x] Neural Network Regressor
@@ -148,6 +152,18 @@ Numerical Methods and Optimization course @ [Department of Computer Science](ht
148
152
  pip install optiml
149
153
  ```
150
154
 
155
+ ## Documentation
156
+
157
+ The full API reference is available at [dmeoli.github.io/optiml](https://dmeoli.github.io/optiml),
158
+ automatically built from the source docstrings and published to GitHub Pages on every push to `master`.
159
+
160
+ It can also be built locally with [Sphinx](https://www.sphinx-doc.org):
161
+
162
+ ```
163
+ pip install -r docs/requirements.txt
164
+ sphinx-build -b html docs docs/_build/html
165
+ ```
166
+
151
167
  ## License [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
152
168
 
153
169
  This software is released under the MIT License. See the [LICENSE](LICENSE) file for details.
@@ -18,6 +18,97 @@ from ...opti.unconstrained.stochastic import StochasticOptimizer, StochasticGrad
18
18
 
19
19
 
20
20
  class NeuralNetwork(BaseEstimator, Layer, ABC):
21
+ """
22
+ Base abstract class for all feed-forward neural network estimators.
23
+ It chains a sequence of layers, performs forward/backward propagation
24
+ and trains the network parameters by minimizing the given loss with
25
+ the chosen optimizer.
26
+
27
+ Parameters
28
+ ----------
29
+
30
+ layers : tuple of `Layer` instances, default=()
31
+ The ordered sequence of layers composing the network.
32
+
33
+ loss : `NeuralNetworkLoss` subclass, default=mean_squared_error
34
+ Specifies the loss function to minimize.
35
+
36
+ optimizer : `Optimizer` subclass, default=StochasticGradientDescent
37
+ The solver for optimization. It can be a subclass of the
38
+ `LineSearchOptimizer`, the `ProximalBundle` method or a subclass
39
+ of the `StochasticOptimizer`.
40
+
41
+ learning_rate : float, default=0.01
42
+ The initial learning rate used for weight update. It controls the
43
+ step-size in updating the weights. Only used when ``optimizer`` is a
44
+ subclass of `StochasticOptimizer`.
45
+
46
+ max_iter : int, default=1000
47
+ Maximum number of iterations. The solver iterates until convergence
48
+ (determined by ``tol``) or this number of iterations. If the optimizer
49
+ is a subclass of `StochasticOptimizer`, this value determines the number
50
+ of epochs, not the number of gradient steps.
51
+
52
+ momentum_type : {'none', 'polyak', 'nesterov'}, default='none'
53
+ Momentum type used for weight update. Only used when ``optimizer`` is
54
+ a subclass of `StochasticMomentumOptimizer`.
55
+
56
+ momentum : float, default=0.9
57
+ Momentum for weight update. Should be between 0 and 1. Only used when
58
+ ``optimizer`` is a subclass of `StochasticMomentumOptimizer`.
59
+
60
+ tol : float, default=1e-4
61
+ Tolerance for stopping criterion.
62
+
63
+ validation_split : float, default=0.
64
+ The proportion of training data to set aside as validation set for
65
+ early stopping. Must be between 0 and 1. Only used when ``optimizer``
66
+ is a subclass of `StochasticOptimizer`.
67
+
68
+ batch_size : int, default=None
69
+ Size of mini batches for stochastic optimizers.
70
+ Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
71
+
72
+ max_f_eval : int, default=15000
73
+ Maximum number of loss function calls. Only used when ``optimizer``
74
+ is a subclass of `LineSearchOptimizer`.
75
+
76
+ early_stopping : bool, default=False
77
+ Whether to use early stopping to terminate training when the
78
+ monitored score/loss does not improve by at least ``tol`` for
79
+ ``patience`` consecutive epochs.
80
+ Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
81
+
82
+ patience : int, default=5
83
+ Maximum number of epochs to not meet ``tol`` improvement.
84
+ Only used when ``optimizer`` is a subclass of `StochasticOptimizer`.
85
+
86
+ shuffle : bool, default=True
87
+ Whether to shuffle samples for batch sampling in each iteration. Only
88
+ used when the ``optimizer`` is a subclass of `StochasticOptimizer`.
89
+
90
+ random_state : int, RandomState instance or None, default=None
91
+ Controls the pseudo random number generation for the train-validation
92
+ split and for shuffling the data in batch sampling.
93
+ Pass an int for reproducible output across multiple function calls.
94
+
95
+ mu : float, default=1
96
+ Mu parameter for the proximal bundle method.
97
+ Only used when ``optimizer`` is `ProximalBundle`. Must be strictly positive.
98
+
99
+ master_solver : string, default='clarabel'
100
+ Master solver for the proximal bundle method for the CVXPY interface.
101
+ Only used when ``optimizer`` is `ProximalBundle`.
102
+
103
+ master_verbose : bool or int, default=False
104
+ Controls the verbosity of the CVXPY interface.
105
+ Only used when ``optimizer`` is `ProximalBundle`.
106
+
107
+ verbose : bool or int, default=False
108
+ Controls the verbosity of progress messages to stdout. Use a boolean value
109
+ to switch on/off or an int value to show progress each ``verbose`` time
110
+ optimization steps.
111
+ """
21
112
 
22
113
  def __init__(self,
23
114
  layers=(),
@@ -36,7 +127,7 @@ class NeuralNetwork(BaseEstimator, Layer, ABC):
36
127
  shuffle=True,
37
128
  random_state=None,
38
129
  mu=1,
39
- master_solver='ecos',
130
+ master_solver='clarabel',
40
131
  master_verbose=False,
41
132
  verbose=False):
42
133
  self.layers = layers
@@ -277,6 +368,11 @@ class NeuralNetwork(BaseEstimator, Layer, ABC):
277
368
 
278
369
 
279
370
  class NeuralNetworkClassifier(ClassifierMixin, NeuralNetwork):
371
+ """
372
+ Feed-forward neural network for classification. The output layer must be
373
+ sigmoid (binary/multi-label) or softmax (multi-class), consistently with
374
+ the chosen loss function.
375
+ """
280
376
 
281
377
  def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
282
378
  super(NeuralNetworkClassifier, self)._store_train_val_info(opt, X_batch, y_batch, X_val, y_val)
@@ -331,6 +427,11 @@ class NeuralNetworkClassifier(ClassifierMixin, NeuralNetwork):
331
427
 
332
428
 
333
429
  class NeuralNetworkRegressor(RegressorMixin, NeuralNetwork):
430
+ """
431
+ Feed-forward neural network for regression. The output layer must be
432
+ linear or, for regression between 0 and 1, sigmoid. The number of output
433
+ neurons must equal the number of targets.
434
+ """
334
435
 
335
436
  def _store_train_val_info(self, opt, X_batch, y_batch, X_val, y_val):
336
437
  super(NeuralNetworkRegressor, self)._store_train_val_info(opt, X_batch, y_batch, X_val, y_val)
@@ -5,6 +5,10 @@ from autograd.scipy.special import expit
5
5
 
6
6
 
7
7
  class Activation(ABC):
8
+ """
9
+ Base abstract class for all activation functions. Subclasses must
10
+ implement ``function`` and its element-wise derivative ``jacobian``.
11
+ """
8
12
 
9
13
  def function(self, x):
10
14
  raise NotImplementedError
@@ -17,6 +21,7 @@ class Activation(ABC):
17
21
 
18
22
 
19
23
  class Linear(Activation):
24
+ r"""Identity (linear) activation function :math:`f(x) = x`."""
20
25
 
21
26
  def function(self, x):
22
27
  return x
@@ -26,6 +31,7 @@ class Linear(Activation):
26
31
 
27
32
 
28
33
  class ReLU(Activation):
34
+ r"""Rectified linear unit activation function :math:`f(x) = \max(0, x)`."""
29
35
 
30
36
  def function(self, x):
31
37
  return np.maximum(0., x)
@@ -35,6 +41,7 @@ class ReLU(Activation):
35
41
 
36
42
 
37
43
  class Tanh(Activation):
44
+ r"""Hyperbolic tangent activation function :math:`f(x) = \tanh(x)`."""
38
45
 
39
46
  def function(self, x):
40
47
  return np.tanh(x)
@@ -44,6 +51,7 @@ class Tanh(Activation):
44
51
 
45
52
 
46
53
  class Sigmoid(Activation):
54
+ r"""Logistic sigmoid activation function :math:`f(x) = \frac{1}{1 + e^{-x}}`."""
47
55
 
48
56
  def function(self, x):
49
57
  return expit(x)
@@ -54,6 +62,7 @@ class Sigmoid(Activation):
54
62
 
55
63
 
56
64
  class SoftMax(Activation):
65
+ r"""Softmax activation function :math:`f(x)_i = \frac{e^{x_i}}{\sum_j e^{x_j}}`."""
57
66
 
58
67
  def function(self, x, axis=-1):
59
68
  exps = np.exp(x - np.max(x, axis=axis, keepdims=True))
@@ -0,0 +1,66 @@
1
+ import numpy as np
2
+
3
+
4
+ def truncated_normal(shape, mean=0., std=1., random_state=None):
5
+ truncated = 2 * std + mean
6
+ return np.clip(np.random.RandomState(random_state).normal(size=shape, loc=mean, scale=std), -truncated, truncated)
7
+
8
+
9
+ def glorot_normal(shape, random_state=None):
10
+ r"""Glorot normal initializer, also called Xavier normal initializer.
11
+ It draws samples from a truncated normal distribution centered on 0
12
+ with
13
+
14
+ .. math::
15
+
16
+ \text{std} = \sqrt{\frac{2}{\text{fan\_in} + \text{fan\_out}}}
17
+
18
+ where ``fan_in`` is the number of input units in the weight tensor
19
+ and ``fan_out`` is the number of output units in the weight tensor."""
20
+ fan_in, fan_out = shape[0], shape[1]
21
+ std = np.sqrt(2. / (fan_in + fan_out))
22
+ return truncated_normal(shape=shape, mean=0., std=std, random_state=random_state)
23
+
24
+
25
+ def glorot_uniform(shape, random_state=None):
26
+ r"""Glorot uniform initializer, also called Xavier uniform initializer.
27
+ It draws samples from a uniform distribution within
28
+ :math:`[-\text{limit}, \text{limit}]` where
29
+
30
+ .. math::
31
+
32
+ \text{limit} = \sqrt{\frac{6}{\text{fan\_in} + \text{fan\_out}}}
33
+
34
+ where ``fan_in`` is the number of input units in the weight tensor
35
+ and ``fan_out`` is the number of output units in the weight tensor."""
36
+ fan_in, fan_out = shape[0], shape[1]
37
+ limit = np.sqrt(6. / (fan_in + fan_out))
38
+ return np.random.RandomState(random_state).uniform(size=shape, low=-limit, high=limit)
39
+
40
+
41
+ def he_normal(shape, random_state=None):
42
+ r"""He normal initializer. It draws samples from a truncated normal
43
+ distribution centered on 0 with
44
+
45
+ .. math::
46
+
47
+ \text{std} = \sqrt{\frac{2}{\text{fan\_in}}}
48
+
49
+ where ``fan_in`` is the number of input units in the weight tensor."""
50
+ fan_in, fan_out = shape[0], shape[1]
51
+ std = np.sqrt(2. / fan_in)
52
+ return truncated_normal(shape=shape, mean=0., std=std, random_state=random_state)
53
+
54
+
55
+ def he_uniform(shape, random_state=None):
56
+ r"""He uniform variance scaling initializer. It draws samples from
57
+ a uniform distribution within :math:`[-\text{limit}, \text{limit}]` where
58
+
59
+ .. math::
60
+
61
+ \text{limit} = \sqrt{\frac{6}{\text{fan\_in}}}
62
+
63
+ where ``fan_in`` is the number of input units in the weight tensor."""
64
+ fan_in, fan_out = shape[0], shape[1]
65
+ limit = np.sqrt(6. / fan_in)
66
+ return np.random.RandomState(random_state).uniform(size=shape, low=-limit, high=limit)