PyPI - gfdl - Versions diffs - 0.1.0__py3-none-any.whl - Mend

gfdl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

gfdl/__init__.py +36 -0
gfdl/activations.py +194 -0
gfdl/model.py +851 -0
gfdl/tests/__init__.py +0 -0
gfdl/tests/test_model.py +518 -0
gfdl/tests/test_regression.py +142 -0
gfdl/weights.py +378 -0
gfdl-0.1.0.dist-info/METADATA +33 -0
gfdl-0.1.0.dist-info/RECORD +12 -0
gfdl-0.1.0.dist-info/WHEEL +5 -0
gfdl-0.1.0.dist-info/licenses/COPYING +29 -0
gfdl-0.1.0.dist-info/top_level.txt +1 -0

gfdl/tests/__init__.py ADDED Viewed

File without changes

gfdl/tests/test_model.py ADDED Viewed

@@ -0,0 +1,518 @@
+# tests/test_model.py
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+from sklearn.datasets import load_digits, make_classification
+from sklearn.metrics import accuracy_score, roc_auc_score
+from sklearn.model_selection import StratifiedKFold, train_test_split
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.utils.estimator_checks import parametrize_with_checks
+from ucimlrepo import fetch_ucirepo
+from gfdl.model import EnsembleGFDLClassifier, GFDLClassifier
+activations = ["relu", "tanh", "sigmoid", "identity", "softmax", "softmin",
+               "log_sigmoid", "log_softmax"]
+weights = ["zeros", "range", "uniform", "normal", "he_uniform", "lecun_uniform",
+           "glorot_uniform", "he_normal", "lecun_normal", "glorot_normal"]
+@pytest.mark.parametrize(
+       "hidden_layer_sizes",
+       [(10,), (10, 10), (5, 10, 15, 20), (100,)]
+       )
+@pytest.mark.parametrize("n_classes", [2, 5])
+@pytest.mark.parametrize("direct_links", [0, 1])
+@pytest.mark.parametrize("activation", activations)
+@pytest.mark.parametrize("weight_scheme", weights)
+def test_model(hidden_layer_sizes, n_classes, activation, weight_scheme, direct_links):
+    N, d = 60, 10
+    X, y = make_classification(n_samples=N,
+                               n_features=d,
+                               n_classes=n_classes,
+                               n_informative=8,
+                               random_state=42)
+    model = GFDLClassifier(hidden_layer_sizes, activation, weight_scheme,
+                           direct_links, 0)
+    model.fit(X, y)
+    assert len(model.W_) == len(hidden_layer_sizes)
+    assert model.W_[0].T.shape == (d, hidden_layer_sizes[0])
+    for layer, w, b, i in zip(
+        hidden_layer_sizes[1:],
+        model.W_[1:],
+        model.b_[1:],
+        range(len(model.W_) - 1), strict=False
+        ):
+        assert w.T.shape == (hidden_layer_sizes[i], layer)
+        assert b.shape == (layer,)
+    if direct_links:
+        assert model.coeff_.shape == (
+            sum(hidden_layer_sizes) + d, len(np.arange(n_classes))
+            )
+    else:
+        assert model.coeff_.shape == (sum(hidden_layer_sizes),
+                                      len(np.arange(n_classes)))
+    pred = model.predict(X[:10])
+    assert set(np.unique(pred)).issubset(set(np.arange(n_classes)))
+    np.testing.assert_array_equal(np.unique(y), np.arange(n_classes))
+    P = model.predict_proba(X[:10])
+    np.testing.assert_allclose(P.sum(axis=1), 1.0, atol=1e-6)
+    assert (P >= 0).all() and (P <= 1).all()
+    np.testing.assert_array_equal(pred, model.classes_[np.argmax(P, axis=1)])
+@pytest.mark.parametrize("weight_scheme", weights)
+@pytest.mark.parametrize(
+        "hidden_layer_size",
+        [(10,), (2, 3, 2, 1), (5, 10, 15, 20, 15, 10), (100,)]
+        )
+def test_multilayer_math(weight_scheme, hidden_layer_size):
+    N, d = 60, 10
+    X, y = make_classification(n_samples=N,
+                               n_features=d,
+                               n_classes=3,
+                               n_informative=8,
+                               random_state=42)
+    model = GFDLClassifier(
+        hidden_layer_sizes=hidden_layer_size,
+        activation="identity",
+        weight_scheme=weight_scheme,
+        direct_links=False,
+        seed=0
+        )
+    model.fit(X, y)
+    enc = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
+    Y = enc.fit_transform(y.reshape(-1, 1))
+    # collapsing weights and biases for representation as linear operation
+    weights = [w.T for w in model.W_]
+    Ts, cs = [], []
+    T = np.eye(X.shape[1])
+    c = np.zeros((X.shape[1],))
+    for w, b in zip(weights, model.b_, strict=False):
+        T = T @ w
+        c = c @ w + b
+        Ts.append(T)
+        cs.append(c)
+    # design matrix with ALL layers concatenated
+    expected_phi = np.hstack([X @ T_l + c_l for T_l, c_l in zip(Ts, cs, strict=False)])
+    expected_beta = np.linalg.pinv(expected_phi) @ Y
+    np.testing.assert_allclose(model.coeff_, expected_beta)
+@pytest.mark.parametrize("hidden_layer_sizes, activation, weight_scheme, exp_auc", [
+    # when direct links are absent (ELM), we expect the
+    # ROC AUC to increase with multi-layer network complexity
+    # up to a reasonable degree, when the width of the layers is
+    # quite small
+     ((2,), "relu", "uniform", 0.5598328634285958),
+     ((2, 2), "relu", "uniform", 0.5666639967533855),
+     # start hitting diminishing returns here:
+     ((2, 2, 2, 2), "relu", "uniform", 0.5666639967533855),
+     ((2, 2, 2, 2, 2, 2, 2), "relu", "uniform", 0.5666639967533855),
+     # effectively no improvement here:
+     ((2, 2, 2, 2, 2, 2, 2, 2, 2), "relu", "uniform", 0.5666639967533855),
+     ]
+ )
+def test_multilayer_progression(weight_scheme,
+                                hidden_layer_sizes,
+                                activation,
+                                exp_auc):
+    X, y = make_classification(n_samples=400,
+                               n_features=100,
+                               n_classes=5,
+                               n_informative=26,
+                               random_state=42,
+                               class_sep=0.5)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
+                                                        random_state=0)
+    model = GFDLClassifier(
+        hidden_layer_sizes=hidden_layer_sizes,
+        activation=activation,
+        weight_scheme=weight_scheme,
+        direct_links=False,
+        seed=0
+        )
+    model.fit(X_train, y_train)
+    y_score = model.predict_proba(X_test)
+    actual_auc = roc_auc_score(y_test, y_score, multi_class="ovo")
+    assert_allclose(actual_auc, exp_auc)
+@pytest.mark.parametrize(
+        "Classifier, target",
+        [(GFDLClassifier, 0.7161), (EnsembleGFDLClassifier, 0.7132)]
+        )
+def test_against_shi2021(Classifier, target):
+    # test multilayer classification against
+    # the results given in Shi et al. (2021) DOI 10.1016/j.patcog.2021.107978
+    # dataset obtained from UCI ML repo
+    abalone = fetch_ucirepo(id=1)
+    X = abalone.data.features
+    y = abalone.data.targets
+    X = X.assign(
+            Sex=lambda d: d["Sex"].map({"M": 0, "F": 1, "I": 2}).astype("int8")
+            )
+    X, y = np.array(X), np.array(y).reshape(-1)
+    # Shi et al. only use 3 classes, but all samples are used, which implies binning
+    # they do not specify the bins used, so I used my best judgement
+    y = np.digitize(y, bins=[7, 11])
+    # Shi et al. used half of the titanic dataset to tune
+    # so I assumed they did the same for this dataset
+    X_tune, X_eval, y_tune, y_eval = train_test_split(
+        X, y, test_size=0.5, random_state=0)
+    # Shi et al. used 4 folds on the titanic dataset
+    # I inferred that they also used 4 folds for this dataset
+    K = 4
+    # X_tune and y_tune were used to find the hyperparameters
+    # using Shi et al.'s two-stage tuning method, disregarding
+    # parameter C and tuning the activation function instead
+    """
+    For RVFL based models, we use a two-stage tuning method to obtain
+    their best hyperparameter configurations. The two-stage tuning can be
+    performed by the following steps: 1) Fix the number layers to 2, and
+    then select the optimal number of neurons (N*) and regularization
+    parameter (C*) using a coarse range for N and C. 2) Tune the number
+    of layers and fine tune the N, C parameters by considering only a fine
+    range in the neighborhood of N* and C*.
+    Shi et al. (2021) https://doi.org/10.1016/j.patcog.2021.107978
+    """
+    # values determined using method outlined above
+    hidden_layer_sizes = [512, 512]
+    reg = 16
+    model = Classifier(
+        hidden_layer_sizes=hidden_layer_sizes,
+        activation="relu",
+        weight_scheme="uniform",
+        reg_alpha=reg,
+        seed=0
+        )
+    scl = StandardScaler()
+    # The actual splits used in the paper were not specified
+    skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)
+    acc = 0
+    for train_index, test_index in skf.split(X_eval, y_eval):
+        X_train = X_eval[train_index]
+        y_train = y_eval[train_index]
+        X_test = X_eval[test_index]
+        y_test = y_eval[test_index]
+        model.fit(scl.fit_transform(X_train), y_train)
+        y_hat = model.predict(scl.transform(X_test))
+        acc += accuracy_score(y_test, y_hat)
+    acc /= K
+    # not an exact match because they don't specify their activation
+    # nor do they mention the best hyperparameter configuration
+    # and they're using ridge
+    # tightest bound for both rel and abs
+    # values in paper:
+    # dRVFL accuracy: 66.33%
+    # edRVFL accuracy: 65.81%
+    assert acc == pytest.approx(target, rel=1e-4, abs=0)
+def test_soft_and_hard():
+    N, d = 60, 10
+    X, y = make_classification(n_samples=N,
+                               n_features=d,
+                               n_classes=3,
+                               n_informative=8,
+                               random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
+                                                        random_state=0)
+    model = EnsembleGFDLClassifier(
+        hidden_layer_sizes=(5, 5, 5),
+        activation="tanh",
+        weight_scheme="uniform",
+        seed=0,
+        reg_alpha=0.1
+    )
+    model.fit(X_train, y_train)
+    y_soft = model.predict(X_test)
+    P = model.predict_proba(X_test)
+    y_from_mean = model.classes_[np.argmax(P, axis=1)]
+    np.testing.assert_equal(y_soft, y_from_mean)
+    model.voting = "hard"
+    y_hard = model.predict(X_test)
+    np.testing.assert_equal(y_soft, y_hard)
+def test_hard_vote_proba_error():
+    X, y = make_classification(n_samples=60,
+                               n_features=10,
+                               n_classes=3,
+                               n_informative=8,
+                               random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
+                                                        random_state=0)
+    model = EnsembleGFDLClassifier(
+        hidden_layer_sizes=(5, 5, 5),
+        activation="tanh",
+        weight_scheme="uniform",
+        seed=0,
+        reg_alpha=0.1,
+        voting="hard",
+    )
+    model.fit(X_train, y_train)
+    with pytest.raises(AttributeError, match="predict_proba"):
+        model.predict_proba(X_test)
+@pytest.mark.parametrize("alpha", [None, 0.1])
+def test_soft_and_hard_can_differ(alpha):
+    N, d = 60, 10
+    X, y = make_classification(n_samples=N,
+                               n_features=d,
+                               n_classes=3,
+                               n_informative=8,
+                               random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
+                                                        random_state=0)
+    # adding more layers (heads) increases the chance of disagreement
+    # between the two voting methods
+    model = EnsembleGFDLClassifier(
+        hidden_layer_sizes=(3, 3, 3, 3),
+        activation="tanh",
+        weight_scheme="uniform",
+        seed=0,
+        reg_alpha=alpha
+    )
+    model.fit(X_train, y_train)
+    y_soft = model.predict(X_test)
+    model.voting = "hard"
+    y_hard = model.predict(X_test)
+    difference = [
+        True, True, True, True, True, True, True, True, True, True, False, True
+        ]
+    np.testing.assert_array_equal(y_soft == y_hard, difference)
+@pytest.mark.parametrize("Classifier", [GFDLClassifier, EnsembleGFDLClassifier])
+def test_invalid_activation_weight(Classifier):
+    X = np.zeros((30, 4))
+    y = np.zeros((30,))
+    invalid_act = Classifier(hidden_layer_sizes=100,
+                             activation="bogus_activation",
+                             weight_scheme="uniform")
+    invalid_weight = Classifier(hidden_layer_sizes=100,
+                                activation="identity",
+                                weight_scheme="bogus_weight")
+    # the sklearn estimator API bans input validation in __init__,
+    # so we need to call fit() for error handling to kick in:
+    # https://scikit-learn.org/stable/developers/develop.html#developing-scikit-learn-estimators
+    with pytest.raises(ValueError, match="is not supported"):
+        invalid_act.fit(X, y)
+    with pytest.raises(ValueError, match="is not supported"):
+        invalid_weight.fit(X, y)
+@pytest.mark.parametrize("Classifier", [GFDLClassifier, EnsembleGFDLClassifier])
+def test_invalid_alpha(Classifier):
+    # the sklearn estimator API bans input validation in __init__,
+    # so we need to call fit() for error handling to kick in:
+    # https://scikit-learn.org/stable/developers/develop.html#developing-scikit-learn-estimators
+    X = np.zeros((30, 4))
+    y = np.zeros((30,))
+    bad_est = Classifier(hidden_layer_sizes=100,
+                             activation="identity",
+                             weight_scheme="uniform",
+                             reg_alpha=-10)
+    with pytest.raises(ValueError, match=r"Negative reg\_alpha"):
+        bad_est.fit(X, y)
+@pytest.mark.parametrize("""hidden_layer_sizes,
+                            n_classes,
+                            activation,
+                            weight_scheme,
+                            alpha,
+                            exp_proba_shape,
+                            exp_proba_median,
+                            exp_proba_min""", [
+                    # expected values are from graforvfl library
+                    ([10,], 2, "relu", "uniform", None, (20, 2), 0.5, 0.0444571694),
+                    ([100,], 2, "tanh", "normal", None, (20, 2), 0.5, 0.02538905725),
+                    ([10,], 5, "softmax", "lecun_uniform", None, (20, 5),
+                     0.186506112, 0.08469873),
+                    ([10,], 2, "relu", "uniform", 0.5, (20, 2), 0.49999999999999994,
+                    0.04676933232591643),
+                    ([10,], 2, "relu", "normal", 0.5, (20, 2), 0.5,
+                    0.13832596541020634),
+                    ([10,], 2, "relu", "he_uniform", 0.5, (20, 2), 0.5,
+                    0.09354846081377409),
+                    ([10,], 2, "relu", "lecun_uniform", 0.5, (20, 2), 0.5,
+                    0.09387932375067173),
+                    ([10,], 2, "relu", "glorot_uniform", 0.5, (20, 2),
+                    0.49999999999999994, 0.09474642560519067),
+                    ([10,], 2, "relu", "he_normal", 0.5, (20, 2), 0.5,
+                    0.13756805074436051),
+                    ([10,], 2, "relu", "lecun_normal", 0.5, (20, 2), 0.5,
+                    0.1366715193146648),
+                    ([10,], 2, "relu", "glorot_normal", 0.5, (20, 2), 0.5,
+                    0.147434110768701),
+                    ([100,], 5, "relu", "normal", 1, (20, 5), 0.15697278777061396,
+                    0.014480242978774488),
+                    ([100,], 5, "tanh", "normal", 1, (20, 5), 0.18173657135483476,
+                    0.04755723146401269),
+                    ([100,], 5, "sigmoid", "normal", 1, (20, 5), 0.1831653950464296,
+                    0.05378741996708733),
+                    ([100,], 5, "softmax", "normal", 1, (20, 5), 0.19357646668265396,
+                     0.10898717209741866),
+                    ([100,], 5, "softmin", "normal", 1, (20, 5), 0.18746771358297387,
+                     0.09186562406164228),
+                    ([100,], 5, "log_sigmoid", "normal", 1, (20, 5),
+                    0.16722029352468032, 0.012690348255702557),
+                    ([100,], 5, "log_softmax", "normal", 1, (20, 5),
+                    0.1853363666712296, 0.10846041127337658),
+])
+def test_classification_against_grafo(hidden_layer_sizes, n_classes, activation,
+                                      weight_scheme, alpha, exp_proba_shape,
+                                      exp_proba_median, exp_proba_min):
+    # test binary and multi-class classification against expected values
+    # from the open source graforvfl library on some synthetic
+    # datasets
+    X, y = make_classification(n_classes=n_classes,
+                               n_informative=8, random_state=0)
+    X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.2,
+                                                        random_state=0)
+    model = GFDLClassifier(hidden_layer_sizes=hidden_layer_sizes,
+                 activation=activation,
+                 weight_scheme=weight_scheme,
+                 direct_links=1,
+                 seed=0,
+                 reg_alpha=alpha)
+    model.fit(X_train, y_train)
+    actual_proba = model.predict_proba(X_test)
+    actual_proba_shape = actual_proba.shape
+    actual_proba_median = np.median(actual_proba)
+    actual_proba_min = np.min(actual_proba)
+    np.testing.assert_allclose(actual_proba_shape, exp_proba_shape)
+    np.testing.assert_allclose(actual_proba_median, exp_proba_median)
+    np.testing.assert_allclose(actual_proba_min, exp_proba_min)
+@parametrize_with_checks([GFDLClassifier(), EnsembleGFDLClassifier()])
+def test_sklearn_api_conformance(estimator, check):
+    check(estimator)
+@pytest.mark.parametrize("reg_alpha, rtol, expected_acc, expected_roc", [
+    (0.1, 1e-15, 0.9083333333333333, 0.9893414717354735),
+    (None, 1e-15, 0.2222222222222222, 0.5518850599798965),
+    (None, 1e-3, 0.8972222222222223, 0.9802912857599967),
+])
+def test_rtol_classifier(reg_alpha, rtol, expected_acc, expected_roc):
+    # For Moore-Penrose, a large singular value cutoff (rtol)
+    # may be required to achieve reasonable results. This test
+    # showcases that a default low cut off leads to almost random classification
+    # output for the Digits datasets which is alleviated by increasing the cut off.
+    # This cut off has no effect on ridge solver.
+    data = load_digits()
+    X, y = data.data, data.target
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
+                                                        random_state=0)
+    scaler = StandardScaler().fit(X_train)
+    X_train_s = scaler.transform(X_train)
+    X_test_s = scaler.transform(X_test)
+    model = GFDLClassifier(hidden_layer_sizes=[800] * 10,
+            activation="softmax",
+            weight_scheme="normal",
+            seed=0,
+            reg_alpha=reg_alpha,
+            rtol=rtol)
+    model.fit(X_train_s, y_train)
+    y_hat_cur = model.predict(X_test_s)
+    y_hat_cur_proba = model.predict_proba(X_test_s)
+    acc_cur = accuracy_score(y_test, y_hat_cur)
+    roc_cur = roc_auc_score(y_test, y_hat_cur_proba, multi_class="ovo")
+    np.testing.assert_allclose(acc_cur, expected_acc)
+    np.testing.assert_allclose(roc_cur, expected_roc)
+@pytest.mark.parametrize("reg_alpha, rtol, expected_acc, expected_roc", [
+    (5.0, 1e-15, 0.7222222222222222, 0.9525486362311113),
+    (None, 1e-15, 0.10833333333333334, 0.5062846049300238),
+    (None, 1e-3, 0.9555555555555556, 0.9920190654177233),
+])
+def test_rtol_ensemble(reg_alpha, rtol, expected_acc, expected_roc):
+    # For Moore-Penrose, a large singular value cutoff (rtol)
+    # may be required to achieve reasonable results. This test
+    # showcases that a default low cut off leads to almost random classification
+    # output for the Digits datasets which is alleviated by increasing the cut off.
+    # This cut off has no effect on ridge solver.
+    data = load_digits()
+    X, y = data.data, data.target
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
+                                                        random_state=0)
+    scaler = StandardScaler().fit(X_train)
+    X_train_s = scaler.transform(X_train)
+    X_test_s = scaler.transform(X_test)
+    model = EnsembleGFDLClassifier(hidden_layer_sizes=[2000] * 2,
+            activation="relu",
+            weight_scheme="uniform",
+            seed=0,
+            reg_alpha=reg_alpha,
+            rtol=rtol)
+    model.fit(X_train_s, y_train)
+    y_hat_cur = model.predict(X_test_s)
+    y_hat_cur_proba = model.predict_proba(X_test_s)
+    acc_cur = accuracy_score(y_test, y_hat_cur)
+    roc_cur = roc_auc_score(y_test, y_hat_cur_proba, multi_class="ovo")
+    np.testing.assert_allclose(acc_cur, expected_acc)
+    np.testing.assert_allclose(roc_cur, expected_roc, atol=1e-05)

gfdl/tests/test_regression.py ADDED Viewed

@@ -0,0 +1,142 @@
+import numpy as np
+import pytest
+from sklearn.datasets import fetch_openml, make_regression
+from sklearn.metrics import r2_score
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils.estimator_checks import parametrize_with_checks
+from gfdl.model import GFDLRegressor
+@pytest.mark.parametrize("""n_samples,
+                            n_targets,
+                            hidden_layer_sizes,
+                            activation,
+                            weight_scheme,
+                            reg_alpha,
+                            exp_preds_shape,
+                            exp_preds_median,
+                            exp_preds_min,
+                            exp_preds_r2""", [
+    # expected values are from the graforvfl library
+    (100, 10, (100,), "relu", "glorot_normal", 10, (25, 10),
+     -29.31478018, -490.57518221, 0.97537085),
+    (100, 10, (100,), "tanh", "uniform", 1, (25, 10),
+     -43.03897314, -504.32794352, 0.98411997),
+    (100, 10, (100,), "log_softmax", "uniform", 1, (25, 10),
+      -30.56871963218171, -558.1388909597706, 0.9999532782125536),
+    (100, 10, (100,), "log_sigmoid", "normal", 10, (25, 10),
+      -19.5976250350991, -574.1699708675857, 0.9853855947182326),
+    (100, 10, (1000,), "softmin", "he_uniform", 1, (25, 10),
+     -57.91870287977487, -589.6707200160679, 0.9656730623177637),
+    (100, 10, (1000,), "softmax", "lecun_uniform", 10, (25, 10),
+     -51.938696542946786, -513.4094105001416, 0.9589931777194366),
+    (100, 100, (100,), "sigmoid", "glorot_uniform", 1, (25, 100),
+     -46.92889730988215, -1585.2331437646524, 0.6496204322668526),
+    (100, 100, (100,), "tanh", "he_normal", 10, (25, 100),
+     -5.531248709518545, -1131.5021652659007, 0.6018381457540279),
+    (100, 100, (1000,), "relu", "lecun_normal", 1, (25, 100),
+     -24.857674257413233, -1241.941403822942, 0.5954067650339964),
+    (100, 100, (1000,), "identity", "glorot_normal", 10, (25, 100),
+     -49.66037744636776, -1418.0996396366454, 0.6387637880009253),
+    (1000, 10, (100,), "log_softmax", "glorot_normal", 1, (250, 10),
+     -2.157983014856103, -821.8910528092026, 0.999999671320564),
+    (1000, 10, (100,), "log_sigmoid", "lecun_normal", 10, (250, 10),
+     -2.25281191108881, -813.3197346939389, 0.9998208055604957),
+    (1000, 10, (1000,), "softmin", "he_normal", 1, (250, 10),
+     -2.932635323616438, -819.9889270165279, 0.9999535335431835),
+    (1000, 10, (1000,), "softmax", "glorot_uniform", 10, (250, 10),
+     -3.27895924524588, -809.0526184106433, 0.9996980844468629),
+    (1000, 100, (100,), "sigmoid", "lecun_uniform", 1, (250, 100),
+     40.193814730616296, -2003.2760146757932, 0.9999864051131802),
+    (1000, 100, (100,), "tanh", "he_normal", 10, (250, 100),
+     38.349789631939906, -1968.7361166078529, 0.9984649082549426),
+    (1000, 100, (1000,), "relu", "normal", 1, (250, 100),
+     47.91240910167704, -2194.259205351918, 0.8620693547752554),
+    (1000, 100, (1000,), "identity", "uniform", 10, (250, 100),
+     39.788475103832646, -2004.3219743138504, 0.9999999882159872)
+])
+def test_regression_against_grafo(n_samples, n_targets, hidden_layer_sizes,
+                                  activation, weight_scheme, reg_alpha,
+                                  exp_preds_shape, exp_preds_median,
+                                  exp_preds_min, exp_preds_r2):
+    N, d = n_samples, n_targets
+    RNG = 42
+    X, y = make_regression(n_samples=N,
+                           n_features=d,
+                           n_informative=d,
+                           n_targets=n_targets,
+                           noise=0.0,
+                           bias=0.0,
+                           random_state=RNG)
+    X_train, X_test, y_train, y_test = train_test_split(X, y,
+                                                        test_size=0.25,
+                                                        random_state=RNG)
+    # Preprocessing (use the SAME scaler for all models that need it)
+    scaler = StandardScaler().fit(X_train)
+    X_train_s = scaler.transform(X_train)
+    X_test_s = scaler.transform(X_test)
+    model = GFDLRegressor(
+            hidden_layer_sizes=hidden_layer_sizes,
+            activation=activation,
+            weight_scheme=weight_scheme,
+            direct_links=1,
+            seed=RNG,
+            reg_alpha=reg_alpha
+        )
+    model.fit(X_train_s, y_train)
+    actual_preds = model.predict(X_test_s)
+    actual_preds_shape = actual_preds.shape
+    actual_preds_median = np.median(actual_preds)
+    actual_preds_min = actual_preds.min()
+    actual_preds_r2 = r2_score(y_test, actual_preds)
+    np.testing.assert_allclose(actual_preds_shape, exp_preds_shape)
+    np.testing.assert_allclose(actual_preds_median, exp_preds_median)
+    np.testing.assert_allclose(actual_preds_min, exp_preds_min)
+    np.testing.assert_allclose(actual_preds_r2, exp_preds_r2)
+@parametrize_with_checks([GFDLRegressor()])
+def test_sklearn_api_conformance(estimator, check):
+    check(estimator)
+@pytest.mark.parametrize("reg_alpha, expected", [
+    (0.1, 0.78550376),
+    # NOTE: for Moore-Penrose, a large singular value
+    # cutoff (rtol) is required to achieve reasonable R2 with
+    # the Boston Housing dataset
+    (None, 0.73452466),
+])
+def test_regression_boston(reg_alpha, expected):
+    # real-world data test with multi-layer RVFL
+    boston = fetch_openml(name="boston", version=1, as_frame=False)
+    X, y = boston.data, boston.target.astype(float)
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
+                                                        random_state=42,
+                                                        shuffle=True)
+    scaler = StandardScaler().fit(X_train)
+    X_train_s = scaler.transform(X_train)
+    X_test_s = scaler.transform(X_test)
+    model = GFDLRegressor(
+            hidden_layer_sizes=[800] * 10,
+            activation="tanh",
+            weight_scheme="uniform",
+            direct_links=1,
+            seed=0,
+            reg_alpha=reg_alpha,
+            rtol=1e-3,  # has no effect for `Ridge`
+        )
+    model.fit(X_train_s, y_train)
+    y_pred = model.predict(X_test_s)
+    # RandomForestRegressor() with default params scores
+    # 0.8733907 here; multi-layer GFDL with above params is a bit
+    # worse, but certainly better than random chance:
+    actual = r2_score(y_test, y_pred)
+    np.testing.assert_allclose(actual, expected)