PyPI - optiml - Versions diffs - 1.7__py3-none-any.whl - Mend

optiml 1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

optiml/__init__.py +0 -0
optiml/ml/__init__.py +0 -0
optiml/ml/neural_network/__init__.py +3 -0
optiml/ml/neural_network/_base.py +475 -0
optiml/ml/neural_network/activations.py +79 -0
optiml/ml/neural_network/initializers.py +66 -0
optiml/ml/neural_network/layers.py +183 -0
optiml/ml/neural_network/losses.py +178 -0
optiml/ml/neural_network/regularizers.py +87 -0
optiml/ml/svm/__init__.py +3 -0
optiml/ml/svm/_base.py +1442 -0
optiml/ml/svm/kernels.py +208 -0
optiml/ml/svm/losses.py +284 -0
optiml/ml/svm/smo.py +797 -0
optiml/ml/tests/__init__.py +0 -0
optiml/ml/tests/_datasets.py +49 -0
optiml/ml/tests/_utils.py +28 -0
optiml/ml/tests/test_initializers.py +33 -0
optiml/ml/tests/test_neural_network.py +86 -0
optiml/ml/tests/test_svc.py +245 -0
optiml/ml/tests/test_svr.py +256 -0
optiml/ml/utils.py +252 -0
optiml/opti/__init__.py +4 -0
optiml/opti/_base.py +309 -0
optiml/opti/constrained/__init__.py +9 -0
optiml/opti/constrained/_base.py +404 -0
optiml/opti/constrained/active_set.py +228 -0
optiml/opti/constrained/frank_wolfe.py +158 -0
optiml/opti/constrained/interior_point.py +282 -0
optiml/opti/constrained/projected_gradient.py +138 -0
optiml/opti/constrained/tests/__init__.py +0 -0
optiml/opti/constrained/tests/test_active_set.py +16 -0
optiml/opti/constrained/tests/test_frank_wolfe.py +16 -0
optiml/opti/constrained/tests/test_interior_point.py +16 -0
optiml/opti/constrained/tests/test_lagrangian_quadratic.py +26 -0
optiml/opti/constrained/tests/test_lower_bound.py +29 -0
optiml/opti/constrained/tests/test_projected_gradient.py +16 -0
optiml/opti/unconstrained/__init__.py +6 -0
optiml/opti/unconstrained/_base.py +63 -0
optiml/opti/unconstrained/line_search/__init__.py +10 -0
optiml/opti/unconstrained/line_search/_base.py +106 -0
optiml/opti/unconstrained/line_search/conjugate_gradient.py +255 -0
optiml/opti/unconstrained/line_search/gradient_descent.py +212 -0
optiml/opti/unconstrained/line_search/line_search.py +248 -0
optiml/opti/unconstrained/line_search/newton.py +198 -0
optiml/opti/unconstrained/line_search/quasi_newton.py +496 -0
optiml/opti/unconstrained/proximal_bundle.py +219 -0
optiml/opti/unconstrained/stochastic/__init__.py +12 -0
optiml/opti/unconstrained/stochastic/_base.py +246 -0
optiml/opti/unconstrained/stochastic/adadelta.py +133 -0
optiml/opti/unconstrained/stochastic/adagrad.py +123 -0
optiml/opti/unconstrained/stochastic/adam.py +179 -0
optiml/opti/unconstrained/stochastic/adamax.py +178 -0
optiml/opti/unconstrained/stochastic/amsgrad.py +177 -0
optiml/opti/unconstrained/stochastic/gradient_descent.py +135 -0
optiml/opti/unconstrained/stochastic/rmsprop.py +156 -0
optiml/opti/unconstrained/stochastic/schedules.py +89 -0
optiml/opti/unconstrained/tests/__init__.py +0 -0
optiml/opti/unconstrained/tests/test_adadelta.py +20 -0
optiml/opti/unconstrained/tests/test_adagrad.py +20 -0
optiml/opti/unconstrained/tests/test_adam.py +42 -0
optiml/opti/unconstrained/tests/test_adamax.py +41 -0
optiml/opti/unconstrained/tests/test_amsgrad.py +40 -0
optiml/opti/unconstrained/tests/test_conjugate_gradient.py +35 -0
optiml/opti/unconstrained/tests/test_functions.py +34 -0
optiml/opti/unconstrained/tests/test_gradient_descent.py +51 -0
optiml/opti/unconstrained/tests/test_newton.py +20 -0
optiml/opti/unconstrained/tests/test_quasi_newton.py +30 -0
optiml/opti/unconstrained/tests/test_rmsprop.py +40 -0
optiml/opti/unconstrained/tests/test_verbose.py +25 -0
optiml/opti/utils.py +353 -0
optiml-1.7.dist-info/METADATA +203 -0
optiml-1.7.dist-info/RECORD +76 -0
optiml-1.7.dist-info/WHEEL +5 -0
optiml-1.7.dist-info/licenses/LICENSE +21 -0
optiml-1.7.dist-info/top_level.txt +1 -0

optiml/ml/tests/__init__.py ADDED Viewed

File without changes

optiml/ml/tests/_datasets.py ADDED Viewed

@@ -0,0 +1,49 @@
+import os
+import urllib.request
+import numpy as np
+import pytest
+# The Boston house-prices dataset has been removed from scikit-learn since version
+# 1.2 due to ethical concerns. To keep the existing regression tests (and their
+# accuracy thresholds) reproducible, it is loaded here directly from its original
+# source, exactly as suggested in the scikit-learn deprecation notice, and cached
+# locally to avoid downloading it more than once.
+_BOSTON_URL = 'http://lib.stat.cmu.edu/datasets/boston'
+_BOSTON_CACHE = os.path.join(os.path.dirname(__file__), 'data', 'boston.npz')
+def load_boston(return_X_y=True):
+    """
+    Load and return the Boston house-prices dataset (regression).
+    The data (506 samples, 13 features) is fetched from its original StatLib
+    source and cached locally; if it is not available (e.g., no network) the
+    calling test is skipped rather than failed.
+    :param return_X_y: (bool, default True): if True return ``(data, target)``,
+                       otherwise the same tuple (kept for API compatibility with
+                       the former ``sklearn.datasets.load_boston``).
+    :return:           ``(X, y)`` with X of shape (506, 13) and y of shape (506,).
+    """
+    if os.path.exists(_BOSTON_CACHE):
+        with np.load(_BOSTON_CACHE) as cache:
+            return cache['data'], cache['target']
+    try:
+        with urllib.request.urlopen(_BOSTON_URL, timeout=30) as response:
+            raw = response.read().decode()
+    except Exception as e:  # no network or source unavailable
+        pytest.skip(f'Boston dataset not available: {e}')
+    # the 22 header lines are textual; the rest is a flat stream of 506 * 14
+    # floating point numbers (13 features + the target) laid out over two lines
+    # per record, so it is enough to parse all the numeric tokens and reshape
+    values = np.array(' '.join(raw.splitlines()[22:]).split(), dtype=float).reshape(-1, 14)
+    data, target = values[:, :13], values[:, 13]
+    os.makedirs(os.path.dirname(_BOSTON_CACHE), exist_ok=True)
+    np.savez(_BOSTON_CACHE, data=data, target=target)
+    return data, target

optiml/ml/tests/_utils.py ADDED Viewed

@@ -0,0 +1,28 @@
+import numpy as np
+# Tolerance on the relative optimality gap (f(x) - f*) / f*, used to certify that
+# an optimizer actually reached the (solver-certified) primal optimum f* computed
+# by `SVMLoss.f_star`. Smooth losses (squared hinge / squared epsilon-insensitive)
+# are minimized essentially to machine precision by every method, while the
+# nonsmooth ones (hinge / epsilon-insensitive) are only reached within a looser
+# tolerance by first-order and subgradient-type methods.
+SMOOTH_TOL = 1e-4
+NONSMOOTH_TOL = 5e-2
+def optimality_gap(model):
+    """Relative optimality gap (f(x) - f*) / f* of a fitted primal SVM model."""
+    x = np.hstack((model.coef_, model.intercept_))
+    return (model.loss(x) - model.loss.f_star()) / model.loss.f_star()
+def assert_optimal(model, tol):
+    """Assert that a fitted primal SVM model reached its optimum within ``tol``."""
+    gap = optimality_gap(model)
+    assert gap <= tol, f'relative optimality gap {gap:.2e} exceeds tolerance {tol:.0e}'
+def assert_all_optimal(ovr, tol):
+    """Assert optimality for each binary estimator of a fitted OvR SVM classifier."""
+    for estimator in ovr.estimators_:
+        assert_optimal(estimator, tol)

optiml/ml/tests/test_initializers.py ADDED Viewed

@@ -0,0 +1,33 @@
+import numpy as np
+import pytest
+from optiml.ml.neural_network.initializers import (truncated_normal, glorot_normal,
+                                                   glorot_uniform, he_normal, he_uniform)
+def test_initializers_shape_finite_and_reproducible():
+    shape = (10, 5)
+    for init in (glorot_normal, glorot_uniform, he_normal, he_uniform):
+        w = init(shape, random_state=42)
+        assert w.shape == shape
+        assert np.all(np.isfinite(w))
+        # same seed must give the same weights
+        assert np.allclose(w, init(shape, random_state=42))
+def test_truncated_normal_is_bounded():
+    std, mean = 1., 0.
+    w = truncated_normal((10000,), mean=mean, std=std, random_state=0)
+    assert np.all(np.abs(w - mean) <= 2 * std + 1e-9)
+def test_uniform_initializers_within_limits():
+    shape = (8, 4)
+    glorot_limit = np.sqrt(6. / (shape[0] + shape[1]))
+    assert np.all(np.abs(glorot_uniform(shape, random_state=0)) <= glorot_limit)
+    he_limit = np.sqrt(6. / shape[0])
+    assert np.all(np.abs(he_uniform(shape, random_state=0)) <= he_limit)
+if __name__ == "__main__":
+    pytest.main()

optiml/ml/tests/test_neural_network.py ADDED Viewed

@@ -0,0 +1,86 @@
+import numpy as np
+import pytest
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
+from optiml.ml.tests._datasets import load_boston
+from optiml.ml.neural_network import NeuralNetworkRegressor, NeuralNetworkClassifier
+from optiml.ml.neural_network.activations import sigmoid, softmax, linear, relu
+from optiml.ml.neural_network.layers import FullyConnected
+from optiml.ml.neural_network.losses import mean_squared_error, mean_absolute_error, categorical_cross_entropy
+from optiml.ml.neural_network.regularizers import L2
+from optiml.opti.unconstrained import ProximalBundle
+from optiml.opti.unconstrained.line_search import Newton
+from optiml.opti.unconstrained.stochastic import Adam, StochasticGradientDescent
+def test_perceptron_regressor_with_line_search_optimizer():
+    # aka linear regression
+    X, y = load_boston(return_X_y=True)
+    net = NeuralNetworkRegressor((FullyConnected(13, 1, linear, fit_intercept=False),),
+                                 loss=mean_squared_error, optimizer=Newton).fit(X, y)
+    assert np.allclose(net.coefs_[0], net.loss.x_star())
+def test_perceptron_ridge_regressor_with_line_search_optimizer():
+    # aka ridge regression
+    X, y = load_boston(return_X_y=True)
+    lmbda = 0.1
+    net = NeuralNetworkRegressor((FullyConnected(13, 1, linear, coef_reg=L2(lmbda), fit_intercept=False),),
+                                 loss=mean_squared_error, optimizer=Newton).fit(X, y)
+    assert np.allclose(net.coefs_[0], net.loss.x_star())
+def test_l2_neural_network_regressor_with_stochastic_optimizer():
+    X, y = load_boston(return_X_y=True)
+    X_scaled = StandardScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    net = NeuralNetworkRegressor((FullyConnected(13, 13, sigmoid),
+                                  FullyConnected(13, 1, linear)),
+                                 loss=mean_squared_error, optimizer=StochasticGradientDescent,
+                                 learning_rate=0.01, momentum_type='nesterov', momentum=0.9)
+    net.fit(X_train, y_train)
+    assert net.score(X_test, y_test) >= 0.83
+def test_neural_network_regressor_with_mini_batches():
+    # exercises the mini-batch path of the stochastic optimizers
+    X, y = load_boston(return_X_y=True)
+    X_scaled = StandardScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    net = NeuralNetworkRegressor((FullyConnected(13, 13, relu),
+                                  FullyConnected(13, 1, linear)),
+                                 loss=mean_squared_error, optimizer=StochasticGradientDescent,
+                                 learning_rate=0.01, max_iter=100, batch_size=32)
+    net.fit(X_train, y_train)
+    # mini-batch training must run end-to-end and produce finite predictions
+    assert np.all(np.isfinite(net.predict(X_test)))
+def test_l1_neural_network_regressor_with_proximal_bundle():
+    X, y = load_boston(return_X_y=True)
+    X_scaled = StandardScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    net = NeuralNetworkRegressor((FullyConnected(13, 13, relu),
+                                  FullyConnected(13, 1, linear)),
+                                 loss=mean_absolute_error, optimizer=ProximalBundle, max_iter=150)
+    net.fit(X_train, y_train)
+    assert net.score(X_test, y_test) >= 0.83
+def test_neural_network_classifier_with_stochastic_optimizer():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    ohe = OneHotEncoder(sparse_output=False).fit(y.reshape(-1, 1))
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    net = NeuralNetworkClassifier((FullyConnected(4, 4, sigmoid),
+                                   FullyConnected(4, 4, sigmoid),
+                                   FullyConnected(4, 3, softmax)),
+                                  loss=categorical_cross_entropy, optimizer=Adam, learning_rate=0.01)
+    net.fit(X_train, ohe.transform(y_train.reshape(-1, 1)))
+    assert net.score(X_test, ohe.transform(y_test.reshape(-1, 1))) >= 0.95
+if __name__ == "__main__":
+    pytest.main()

optiml/ml/tests/test_svc.py ADDED Viewed

@@ -0,0 +1,245 @@
+import numpy as np
+import pytest
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from sklearn.multiclass import OneVsRestClassifier as OVR
+from sklearn.preprocessing import MinMaxScaler
+from optiml.ml.svm import SVC
+from optiml.ml.svm.kernels import gaussian
+from optiml.ml.svm.losses import hinge, squared_hinge
+from optiml.opti.constrained import ProjectedGradient, ActiveSet, InteriorPoint, FrankWolfe
+from optiml.opti.unconstrained import ProximalBundle
+from optiml.opti.unconstrained.line_search import SteepestGradientDescent, ConjugateGradient, Newton, BFGS, LBFGS
+from optiml.opti.unconstrained.stochastic import (StochasticGradientDescent, Adam, AMSGrad,
+                                                  AdaMax, AdaGrad, AdaDelta, RMSProp)
+def test_solve_primal_l1_svc_with_line_search_optimizers():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=hinge, optimizer=SteepestGradientDescent))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=hinge, optimizer=ConjugateGradient))
+    svc = svc.fit(X_train, y_train)
+    # CG only crawls on the nonsmooth multiclass hinge primal, so just check the score here;
+    # its convergence to f* is exercised on the better-conditioned SVR problem
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=hinge, optimizer=Newton))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=hinge, optimizer=BFGS))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=hinge, optimizer=LBFGS))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+def test_solve_primal_l1_svc_with_stochastic_optimizers():
+    # On the nonsmooth multiclass hinge primal the stochastic optimizers converge
+    # too slowly / too erratically to reliably meet a fixed optimality-gap tolerance
+    # across platforms and seeds, so here we only check the score; their convergence
+    # to f* is verified rigorously on the (single, well-conditioned) SVR problem.
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    for optimizer, kwargs in ((StochasticGradientDescent, {}), (Adam, {}), (AMSGrad, {}), (AdaMax, {}),
+                              (AdaGrad, {}), (AdaDelta, {'learning_rate': 1.}), (RMSProp, {})):
+        svc = OVR(SVC(loss=hinge, optimizer=optimizer, **kwargs)).fit(X_train, y_train)
+        assert svc.score(X_test, y_test) >= 0.57
+def test_solve_primal_l1_svc_with_proximal_bundle():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=hinge, optimizer=ProximalBundle))
+    svc = svc.fit(X_train, y_train)
+    # the proximal bundle method only crawls on the nonsmooth multiclass hinge primal, so check only the score
+    assert svc.score(X_test, y_test) >= 0.57
+def test_solve_dual_l1_svc_with_smo():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    smo = OVR(SVC(loss=hinge, kernel=gaussian, dual=True, optimizer='smo')).fit(X_train, y_train)
+    # SMO must reach essentially the same solution as the reference QP solver (cvxopt)
+    ref = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=False, dual=True, optimizer='cvxopt')).fit(X_train, y_train)
+    assert (smo.predict(X_test) == ref.predict(X_test)).mean() >= 0.97
+def test_solve_dual_l1_svc_with_cvxopt():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=True, dual=True, optimizer='cvxopt'))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=False, dual=True, optimizer='cvxopt'))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+def test_solve_dual_l1_svc_with_reg_intercept_with_bcqp_optimizers():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=True, dual=True, optimizer=ProjectedGradient))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=True, dual=True, optimizer=ActiveSet))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=True, dual=True, optimizer=InteriorPoint))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=True, dual=True, optimizer=FrankWolfe))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+def test_solve_dual_l1_svc_with_proximal_bundle():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=True,
+                  dual=True, optimizer=ProximalBundle, max_iter=150))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=False,
+                  dual=True, optimizer=ProximalBundle, max_iter=150))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+def test_solve_dual_l1_svc_with_AdaGrad():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=True,
+                  dual=True, optimizer=AdaGrad, learning_rate=1.))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+    svc = OVR(SVC(loss=hinge, kernel=gaussian, reg_intercept=False,
+                  dual=True, optimizer=AdaGrad, learning_rate=1.))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+def test_solve_primal_l2_svc_with_line_search_optimizers():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=squared_hinge, optimizer=SteepestGradientDescent))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=ConjugateGradient))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=Newton))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=BFGS))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=LBFGS))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+def test_solve_primal_l2_svc_with_stochastic_optimizers():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=squared_hinge, optimizer=StochasticGradientDescent))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=Adam))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=AMSGrad))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=AdaMax))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=AdaGrad))
+    svc = svc.fit(X_train, y_train)
+    # AdaGrad converges too slowly on this multiclass problem to reliably hit a fixed
+    # optimality-gap tolerance across platforms, so check only the score here
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=AdaDelta, learning_rate=1.))
+    svc = svc.fit(X_train, y_train)
+    # AdaDelta converges too slowly on this multiclass problem to reliably hit a fixed
+    # optimality-gap tolerance across platforms, so check only the score here
+    assert svc.score(X_test, y_test) >= 0.57
+    svc = OVR(SVC(loss=squared_hinge, optimizer=RMSProp))
+    svc = svc.fit(X_train, y_train)
+    # RMSProp does not reliably converge here, so check only the score
+    assert svc.score(X_test, y_test) >= 0.57
+def test_solve_dual_l2_svc_with_cvxopt():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=squared_hinge, kernel=gaussian, reg_intercept=True, dual=True, optimizer='cvxopt'))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+    svc = OVR(SVC(loss=squared_hinge, kernel=gaussian, reg_intercept=False, dual=True, optimizer='cvxopt'))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+def test_solve_dual_l2_svc_with_AdaGrad():
+    X, y = load_iris(return_X_y=True)
+    X_scaled = MinMaxScaler().fit_transform(X)
+    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, train_size=0.75, random_state=123456)
+    svc = OVR(SVC(loss=squared_hinge, kernel=gaussian, reg_intercept=True,
+                  dual=True, optimizer=AdaGrad, learning_rate=1.))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+    svc = OVR(SVC(loss=squared_hinge, kernel=gaussian, reg_intercept=False,
+                  dual=True, optimizer=AdaGrad, learning_rate=1.))
+    svc = svc.fit(X_train, y_train)
+    assert svc.score(X_test, y_test) >= 0.97
+if __name__ == "__main__":
+    pytest.main()