gfdl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gfdl/tests/__init__.py ADDED
File without changes
@@ -0,0 +1,518 @@
1
+ # tests/test_model.py
2
+
3
+ import numpy as np
4
+ import pytest
5
+ from numpy.testing import assert_allclose
6
+ from sklearn.datasets import load_digits, make_classification
7
+ from sklearn.metrics import accuracy_score, roc_auc_score
8
+ from sklearn.model_selection import StratifiedKFold, train_test_split
9
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
10
+ from sklearn.utils.estimator_checks import parametrize_with_checks
11
+ from ucimlrepo import fetch_ucirepo
12
+
13
+ from gfdl.model import EnsembleGFDLClassifier, GFDLClassifier
14
+
15
+ activations = ["relu", "tanh", "sigmoid", "identity", "softmax", "softmin",
16
+ "log_sigmoid", "log_softmax"]
17
+ weights = ["zeros", "range", "uniform", "normal", "he_uniform", "lecun_uniform",
18
+ "glorot_uniform", "he_normal", "lecun_normal", "glorot_normal"]
19
+
20
+
21
+ @pytest.mark.parametrize(
22
+ "hidden_layer_sizes",
23
+ [(10,), (10, 10), (5, 10, 15, 20), (100,)]
24
+ )
25
+ @pytest.mark.parametrize("n_classes", [2, 5])
26
+ @pytest.mark.parametrize("direct_links", [0, 1])
27
+ @pytest.mark.parametrize("activation", activations)
28
+ @pytest.mark.parametrize("weight_scheme", weights)
29
+ def test_model(hidden_layer_sizes, n_classes, activation, weight_scheme, direct_links):
30
+ N, d = 60, 10
31
+ X, y = make_classification(n_samples=N,
32
+ n_features=d,
33
+ n_classes=n_classes,
34
+ n_informative=8,
35
+ random_state=42)
36
+
37
+ model = GFDLClassifier(hidden_layer_sizes, activation, weight_scheme,
38
+ direct_links, 0)
39
+
40
+ model.fit(X, y)
41
+
42
+ assert len(model.W_) == len(hidden_layer_sizes)
43
+ assert model.W_[0].T.shape == (d, hidden_layer_sizes[0])
44
+
45
+ for layer, w, b, i in zip(
46
+ hidden_layer_sizes[1:],
47
+ model.W_[1:],
48
+ model.b_[1:],
49
+ range(len(model.W_) - 1), strict=False
50
+ ):
51
+ assert w.T.shape == (hidden_layer_sizes[i], layer)
52
+ assert b.shape == (layer,)
53
+
54
+ if direct_links:
55
+ assert model.coeff_.shape == (
56
+ sum(hidden_layer_sizes) + d, len(np.arange(n_classes))
57
+ )
58
+ else:
59
+ assert model.coeff_.shape == (sum(hidden_layer_sizes),
60
+ len(np.arange(n_classes)))
61
+
62
+ pred = model.predict(X[:10])
63
+ assert set(np.unique(pred)).issubset(set(np.arange(n_classes)))
64
+ np.testing.assert_array_equal(np.unique(y), np.arange(n_classes))
65
+
66
+ P = model.predict_proba(X[:10])
67
+ np.testing.assert_allclose(P.sum(axis=1), 1.0, atol=1e-6)
68
+ assert (P >= 0).all() and (P <= 1).all()
69
+ np.testing.assert_array_equal(pred, model.classes_[np.argmax(P, axis=1)])
70
+
71
+
72
+ @pytest.mark.parametrize("weight_scheme", weights)
73
+ @pytest.mark.parametrize(
74
+ "hidden_layer_size",
75
+ [(10,), (2, 3, 2, 1), (5, 10, 15, 20, 15, 10), (100,)]
76
+ )
77
+ def test_multilayer_math(weight_scheme, hidden_layer_size):
78
+
79
+ N, d = 60, 10
80
+ X, y = make_classification(n_samples=N,
81
+ n_features=d,
82
+ n_classes=3,
83
+ n_informative=8,
84
+ random_state=42)
85
+
86
+ model = GFDLClassifier(
87
+ hidden_layer_sizes=hidden_layer_size,
88
+ activation="identity",
89
+ weight_scheme=weight_scheme,
90
+ direct_links=False,
91
+ seed=0
92
+ )
93
+
94
+ model.fit(X, y)
95
+
96
+ enc = OneHotEncoder(sparse_output=False, handle_unknown="ignore")
97
+ Y = enc.fit_transform(y.reshape(-1, 1))
98
+
99
+ # collapsing weights and biases for representation as linear operation
100
+ weights = [w.T for w in model.W_]
101
+ Ts, cs = [], []
102
+ T = np.eye(X.shape[1])
103
+ c = np.zeros((X.shape[1],))
104
+
105
+ for w, b in zip(weights, model.b_, strict=False):
106
+ T = T @ w
107
+ c = c @ w + b
108
+ Ts.append(T)
109
+ cs.append(c)
110
+
111
+ # design matrix with ALL layers concatenated
112
+ expected_phi = np.hstack([X @ T_l + c_l for T_l, c_l in zip(Ts, cs, strict=False)])
113
+
114
+ expected_beta = np.linalg.pinv(expected_phi) @ Y
115
+
116
+ np.testing.assert_allclose(model.coeff_, expected_beta)
117
+
118
+
119
+ @pytest.mark.parametrize("hidden_layer_sizes, activation, weight_scheme, exp_auc", [
120
+ # when direct links are absent (ELM), we expect the
121
+ # ROC AUC to increase with multi-layer network complexity
122
+ # up to a reasonable degree, when the width of the layers is
123
+ # quite small
124
+ ((2,), "relu", "uniform", 0.5598328634285958),
125
+ ((2, 2), "relu", "uniform", 0.5666639967533855),
126
+ # start hitting diminishing returns here:
127
+ ((2, 2, 2, 2), "relu", "uniform", 0.5666639967533855),
128
+ ((2, 2, 2, 2, 2, 2, 2), "relu", "uniform", 0.5666639967533855),
129
+ # effectively no improvement here:
130
+ ((2, 2, 2, 2, 2, 2, 2, 2, 2), "relu", "uniform", 0.5666639967533855),
131
+ ]
132
+ )
133
+ def test_multilayer_progression(weight_scheme,
134
+ hidden_layer_sizes,
135
+ activation,
136
+ exp_auc):
137
+ X, y = make_classification(n_samples=400,
138
+ n_features=100,
139
+ n_classes=5,
140
+ n_informative=26,
141
+ random_state=42,
142
+ class_sep=0.5)
143
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
144
+ random_state=0)
145
+ model = GFDLClassifier(
146
+ hidden_layer_sizes=hidden_layer_sizes,
147
+ activation=activation,
148
+ weight_scheme=weight_scheme,
149
+ direct_links=False,
150
+ seed=0
151
+ )
152
+ model.fit(X_train, y_train)
153
+ y_score = model.predict_proba(X_test)
154
+ actual_auc = roc_auc_score(y_test, y_score, multi_class="ovo")
155
+ assert_allclose(actual_auc, exp_auc)
156
+
157
+
158
+ @pytest.mark.parametrize(
159
+ "Classifier, target",
160
+ [(GFDLClassifier, 0.7161), (EnsembleGFDLClassifier, 0.7132)]
161
+ )
162
+ def test_against_shi2021(Classifier, target):
163
+ # test multilayer classification against
164
+ # the results given in Shi et al. (2021) DOI 10.1016/j.patcog.2021.107978
165
+ # dataset obtained from UCI ML repo
166
+ abalone = fetch_ucirepo(id=1)
167
+
168
+ X = abalone.data.features
169
+ y = abalone.data.targets
170
+
171
+ X = X.assign(
172
+ Sex=lambda d: d["Sex"].map({"M": 0, "F": 1, "I": 2}).astype("int8")
173
+ )
174
+
175
+ X, y = np.array(X), np.array(y).reshape(-1)
176
+
177
+ # Shi et al. only use 3 classes, but all samples are used, which implies binning
178
+ # they do not specify the bins used, so I used my best judgement
179
+ y = np.digitize(y, bins=[7, 11])
180
+
181
+ # Shi et al. used half of the titanic dataset to tune
182
+ # so I assumed they did the same for this dataset
183
+ X_tune, X_eval, y_tune, y_eval = train_test_split(
184
+ X, y, test_size=0.5, random_state=0)
185
+
186
+ # Shi et al. used 4 folds on the titanic dataset
187
+ # I inferred that they also used 4 folds for this dataset
188
+ K = 4
189
+
190
+ # X_tune and y_tune were used to find the hyperparameters
191
+ # using Shi et al.'s two-stage tuning method, disregarding
192
+ # parameter C and tuning the activation function instead
193
+ """
194
+ For RVFL based models, we use a two-stage tuning method to obtain
195
+ their best hyperparameter configurations. The two-stage tuning can be
196
+ performed by the following steps: 1) Fix the number layers to 2, and
197
+ then select the optimal number of neurons (N*) and regularization
198
+ parameter (C*) using a coarse range for N and C. 2) Tune the number
199
+ of layers and fine tune the N, C parameters by considering only a fine
200
+ range in the neighborhood of N* and C*.
201
+
202
+ Shi et al. (2021) https://doi.org/10.1016/j.patcog.2021.107978
203
+ """
204
+
205
+ # values determined using method outlined above
206
+ hidden_layer_sizes = [512, 512]
207
+ reg = 16
208
+
209
+ model = Classifier(
210
+ hidden_layer_sizes=hidden_layer_sizes,
211
+ activation="relu",
212
+ weight_scheme="uniform",
213
+ reg_alpha=reg,
214
+ seed=0
215
+ )
216
+
217
+ scl = StandardScaler()
218
+
219
+ # The actual splits used in the paper were not specified
220
+ skf = StratifiedKFold(n_splits=K, shuffle=True, random_state=42)
221
+
222
+ acc = 0
223
+ for train_index, test_index in skf.split(X_eval, y_eval):
224
+ X_train = X_eval[train_index]
225
+ y_train = y_eval[train_index]
226
+ X_test = X_eval[test_index]
227
+ y_test = y_eval[test_index]
228
+
229
+ model.fit(scl.fit_transform(X_train), y_train)
230
+
231
+ y_hat = model.predict(scl.transform(X_test))
232
+
233
+ acc += accuracy_score(y_test, y_hat)
234
+
235
+ acc /= K
236
+
237
+ # not an exact match because they don't specify their activation
238
+ # nor do they mention the best hyperparameter configuration
239
+ # and they're using ridge
240
+
241
+ # tightest bound for both rel and abs
242
+ # values in paper:
243
+ # dRVFL accuracy: 66.33%
244
+ # edRVFL accuracy: 65.81%
245
+ assert acc == pytest.approx(target, rel=1e-4, abs=0)
246
+
247
+
248
+ def test_soft_and_hard():
249
+ N, d = 60, 10
250
+ X, y = make_classification(n_samples=N,
251
+ n_features=d,
252
+ n_classes=3,
253
+ n_informative=8,
254
+ random_state=0)
255
+
256
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
257
+ random_state=0)
258
+
259
+ model = EnsembleGFDLClassifier(
260
+ hidden_layer_sizes=(5, 5, 5),
261
+ activation="tanh",
262
+ weight_scheme="uniform",
263
+ seed=0,
264
+ reg_alpha=0.1
265
+ )
266
+ model.fit(X_train, y_train)
267
+
268
+ y_soft = model.predict(X_test)
269
+
270
+ P = model.predict_proba(X_test)
271
+ y_from_mean = model.classes_[np.argmax(P, axis=1)]
272
+ np.testing.assert_equal(y_soft, y_from_mean)
273
+
274
+ model.voting = "hard"
275
+ y_hard = model.predict(X_test)
276
+
277
+ np.testing.assert_equal(y_soft, y_hard)
278
+
279
+
280
+ def test_hard_vote_proba_error():
281
+ X, y = make_classification(n_samples=60,
282
+ n_features=10,
283
+ n_classes=3,
284
+ n_informative=8,
285
+ random_state=0)
286
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
287
+ random_state=0)
288
+ model = EnsembleGFDLClassifier(
289
+ hidden_layer_sizes=(5, 5, 5),
290
+ activation="tanh",
291
+ weight_scheme="uniform",
292
+ seed=0,
293
+ reg_alpha=0.1,
294
+ voting="hard",
295
+ )
296
+ model.fit(X_train, y_train)
297
+ with pytest.raises(AttributeError, match="predict_proba"):
298
+ model.predict_proba(X_test)
299
+
300
+
301
+ @pytest.mark.parametrize("alpha", [None, 0.1])
302
+ def test_soft_and_hard_can_differ(alpha):
303
+ N, d = 60, 10
304
+ X, y = make_classification(n_samples=N,
305
+ n_features=d,
306
+ n_classes=3,
307
+ n_informative=8,
308
+ random_state=0)
309
+
310
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
311
+ random_state=0)
312
+
313
+ # adding more layers (heads) increases the chance of disagreement
314
+ # between the two voting methods
315
+ model = EnsembleGFDLClassifier(
316
+ hidden_layer_sizes=(3, 3, 3, 3),
317
+ activation="tanh",
318
+ weight_scheme="uniform",
319
+ seed=0,
320
+ reg_alpha=alpha
321
+ )
322
+ model.fit(X_train, y_train)
323
+ y_soft = model.predict(X_test)
324
+ model.voting = "hard"
325
+ y_hard = model.predict(X_test)
326
+ difference = [
327
+ True, True, True, True, True, True, True, True, True, True, False, True
328
+ ]
329
+
330
+ np.testing.assert_array_equal(y_soft == y_hard, difference)
331
+
332
+
333
+ @pytest.mark.parametrize("Classifier", [GFDLClassifier, EnsembleGFDLClassifier])
334
+ def test_invalid_activation_weight(Classifier):
335
+ X = np.zeros((30, 4))
336
+ y = np.zeros((30,))
337
+ invalid_act = Classifier(hidden_layer_sizes=100,
338
+ activation="bogus_activation",
339
+ weight_scheme="uniform")
340
+ invalid_weight = Classifier(hidden_layer_sizes=100,
341
+ activation="identity",
342
+ weight_scheme="bogus_weight")
343
+ # the sklearn estimator API bans input validation in __init__,
344
+ # so we need to call fit() for error handling to kick in:
345
+ # https://scikit-learn.org/stable/developers/develop.html#developing-scikit-learn-estimators
346
+ with pytest.raises(ValueError, match="is not supported"):
347
+ invalid_act.fit(X, y)
348
+ with pytest.raises(ValueError, match="is not supported"):
349
+ invalid_weight.fit(X, y)
350
+
351
+
352
+ @pytest.mark.parametrize("Classifier", [GFDLClassifier, EnsembleGFDLClassifier])
353
+ def test_invalid_alpha(Classifier):
354
+ # the sklearn estimator API bans input validation in __init__,
355
+ # so we need to call fit() for error handling to kick in:
356
+ # https://scikit-learn.org/stable/developers/develop.html#developing-scikit-learn-estimators
357
+ X = np.zeros((30, 4))
358
+ y = np.zeros((30,))
359
+ bad_est = Classifier(hidden_layer_sizes=100,
360
+ activation="identity",
361
+ weight_scheme="uniform",
362
+ reg_alpha=-10)
363
+ with pytest.raises(ValueError, match=r"Negative reg\_alpha"):
364
+ bad_est.fit(X, y)
365
+
366
+
367
+ @pytest.mark.parametrize("""hidden_layer_sizes,
368
+ n_classes,
369
+ activation,
370
+ weight_scheme,
371
+ alpha,
372
+ exp_proba_shape,
373
+ exp_proba_median,
374
+ exp_proba_min""", [
375
+
376
+ # expected values are from graforvfl library
377
+ ([10,], 2, "relu", "uniform", None, (20, 2), 0.5, 0.0444571694),
378
+ ([100,], 2, "tanh", "normal", None, (20, 2), 0.5, 0.02538905725),
379
+ ([10,], 5, "softmax", "lecun_uniform", None, (20, 5),
380
+ 0.186506112, 0.08469873),
381
+ ([10,], 2, "relu", "uniform", 0.5, (20, 2), 0.49999999999999994,
382
+ 0.04676933232591643),
383
+ ([10,], 2, "relu", "normal", 0.5, (20, 2), 0.5,
384
+ 0.13832596541020634),
385
+ ([10,], 2, "relu", "he_uniform", 0.5, (20, 2), 0.5,
386
+ 0.09354846081377409),
387
+ ([10,], 2, "relu", "lecun_uniform", 0.5, (20, 2), 0.5,
388
+ 0.09387932375067173),
389
+ ([10,], 2, "relu", "glorot_uniform", 0.5, (20, 2),
390
+ 0.49999999999999994, 0.09474642560519067),
391
+ ([10,], 2, "relu", "he_normal", 0.5, (20, 2), 0.5,
392
+ 0.13756805074436051),
393
+ ([10,], 2, "relu", "lecun_normal", 0.5, (20, 2), 0.5,
394
+ 0.1366715193146648),
395
+ ([10,], 2, "relu", "glorot_normal", 0.5, (20, 2), 0.5,
396
+ 0.147434110768701),
397
+ ([100,], 5, "relu", "normal", 1, (20, 5), 0.15697278777061396,
398
+ 0.014480242978774488),
399
+ ([100,], 5, "tanh", "normal", 1, (20, 5), 0.18173657135483476,
400
+ 0.04755723146401269),
401
+ ([100,], 5, "sigmoid", "normal", 1, (20, 5), 0.1831653950464296,
402
+ 0.05378741996708733),
403
+ ([100,], 5, "softmax", "normal", 1, (20, 5), 0.19357646668265396,
404
+ 0.10898717209741866),
405
+ ([100,], 5, "softmin", "normal", 1, (20, 5), 0.18746771358297387,
406
+ 0.09186562406164228),
407
+ ([100,], 5, "log_sigmoid", "normal", 1, (20, 5),
408
+ 0.16722029352468032, 0.012690348255702557),
409
+ ([100,], 5, "log_softmax", "normal", 1, (20, 5),
410
+ 0.1853363666712296, 0.10846041127337658),
411
+ ])
412
+ def test_classification_against_grafo(hidden_layer_sizes, n_classes, activation,
413
+ weight_scheme, alpha, exp_proba_shape,
414
+ exp_proba_median, exp_proba_min):
415
+ # test binary and multi-class classification against expected values
416
+ # from the open source graforvfl library on some synthetic
417
+ # datasets
418
+ X, y = make_classification(n_classes=n_classes,
419
+ n_informative=8, random_state=0)
420
+ X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.2,
421
+ random_state=0)
422
+ model = GFDLClassifier(hidden_layer_sizes=hidden_layer_sizes,
423
+ activation=activation,
424
+ weight_scheme=weight_scheme,
425
+ direct_links=1,
426
+ seed=0,
427
+ reg_alpha=alpha)
428
+ model.fit(X_train, y_train)
429
+
430
+ actual_proba = model.predict_proba(X_test)
431
+ actual_proba_shape = actual_proba.shape
432
+ actual_proba_median = np.median(actual_proba)
433
+ actual_proba_min = np.min(actual_proba)
434
+
435
+ np.testing.assert_allclose(actual_proba_shape, exp_proba_shape)
436
+ np.testing.assert_allclose(actual_proba_median, exp_proba_median)
437
+ np.testing.assert_allclose(actual_proba_min, exp_proba_min)
438
+
439
+
440
+ @parametrize_with_checks([GFDLClassifier(), EnsembleGFDLClassifier()])
441
+ def test_sklearn_api_conformance(estimator, check):
442
+ check(estimator)
443
+
444
+
445
+ @pytest.mark.parametrize("reg_alpha, rtol, expected_acc, expected_roc", [
446
+ (0.1, 1e-15, 0.9083333333333333, 0.9893414717354735),
447
+ (None, 1e-15, 0.2222222222222222, 0.5518850599798965),
448
+ (None, 1e-3, 0.8972222222222223, 0.9802912857599967),
449
+ ])
450
+ def test_rtol_classifier(reg_alpha, rtol, expected_acc, expected_roc):
451
+ # For Moore-Penrose, a large singular value cutoff (rtol)
452
+ # may be required to achieve reasonable results. This test
453
+ # showcases that a default low cut off leads to almost random classification
454
+ # output for the Digits datasets which is alleviated by increasing the cut off.
455
+ # This cut off has no effect on ridge solver.
456
+ data = load_digits()
457
+ X, y = data.data, data.target
458
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
459
+ random_state=0)
460
+
461
+ scaler = StandardScaler().fit(X_train)
462
+ X_train_s = scaler.transform(X_train)
463
+ X_test_s = scaler.transform(X_test)
464
+
465
+ model = GFDLClassifier(hidden_layer_sizes=[800] * 10,
466
+ activation="softmax",
467
+ weight_scheme="normal",
468
+ seed=0,
469
+ reg_alpha=reg_alpha,
470
+ rtol=rtol)
471
+ model.fit(X_train_s, y_train)
472
+
473
+ y_hat_cur = model.predict(X_test_s)
474
+ y_hat_cur_proba = model.predict_proba(X_test_s)
475
+
476
+ acc_cur = accuracy_score(y_test, y_hat_cur)
477
+ roc_cur = roc_auc_score(y_test, y_hat_cur_proba, multi_class="ovo")
478
+
479
+ np.testing.assert_allclose(acc_cur, expected_acc)
480
+ np.testing.assert_allclose(roc_cur, expected_roc)
481
+
482
+
483
+ @pytest.mark.parametrize("reg_alpha, rtol, expected_acc, expected_roc", [
484
+ (5.0, 1e-15, 0.7222222222222222, 0.9525486362311113),
485
+ (None, 1e-15, 0.10833333333333334, 0.5062846049300238),
486
+ (None, 1e-3, 0.9555555555555556, 0.9920190654177233),
487
+ ])
488
+ def test_rtol_ensemble(reg_alpha, rtol, expected_acc, expected_roc):
489
+ # For Moore-Penrose, a large singular value cutoff (rtol)
490
+ # may be required to achieve reasonable results. This test
491
+ # showcases that a default low cut off leads to almost random classification
492
+ # output for the Digits datasets which is alleviated by increasing the cut off.
493
+ # This cut off has no effect on ridge solver.
494
+ data = load_digits()
495
+ X, y = data.data, data.target
496
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
497
+ random_state=0)
498
+
499
+ scaler = StandardScaler().fit(X_train)
500
+ X_train_s = scaler.transform(X_train)
501
+ X_test_s = scaler.transform(X_test)
502
+
503
+ model = EnsembleGFDLClassifier(hidden_layer_sizes=[2000] * 2,
504
+ activation="relu",
505
+ weight_scheme="uniform",
506
+ seed=0,
507
+ reg_alpha=reg_alpha,
508
+ rtol=rtol)
509
+ model.fit(X_train_s, y_train)
510
+
511
+ y_hat_cur = model.predict(X_test_s)
512
+ y_hat_cur_proba = model.predict_proba(X_test_s)
513
+
514
+ acc_cur = accuracy_score(y_test, y_hat_cur)
515
+ roc_cur = roc_auc_score(y_test, y_hat_cur_proba, multi_class="ovo")
516
+
517
+ np.testing.assert_allclose(acc_cur, expected_acc)
518
+ np.testing.assert_allclose(roc_cur, expected_roc, atol=1e-05)
@@ -0,0 +1,142 @@
1
+ import numpy as np
2
+ import pytest
3
+ from sklearn.datasets import fetch_openml, make_regression
4
+ from sklearn.metrics import r2_score
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.utils.estimator_checks import parametrize_with_checks
8
+
9
+ from gfdl.model import GFDLRegressor
10
+
11
+
12
+ @pytest.mark.parametrize("""n_samples,
13
+ n_targets,
14
+ hidden_layer_sizes,
15
+ activation,
16
+ weight_scheme,
17
+ reg_alpha,
18
+ exp_preds_shape,
19
+ exp_preds_median,
20
+ exp_preds_min,
21
+ exp_preds_r2""", [
22
+ # expected values are from the graforvfl library
23
+ (100, 10, (100,), "relu", "glorot_normal", 10, (25, 10),
24
+ -29.31478018, -490.57518221, 0.97537085),
25
+ (100, 10, (100,), "tanh", "uniform", 1, (25, 10),
26
+ -43.03897314, -504.32794352, 0.98411997),
27
+ (100, 10, (100,), "log_softmax", "uniform", 1, (25, 10),
28
+ -30.56871963218171, -558.1388909597706, 0.9999532782125536),
29
+ (100, 10, (100,), "log_sigmoid", "normal", 10, (25, 10),
30
+ -19.5976250350991, -574.1699708675857, 0.9853855947182326),
31
+ (100, 10, (1000,), "softmin", "he_uniform", 1, (25, 10),
32
+ -57.91870287977487, -589.6707200160679, 0.9656730623177637),
33
+ (100, 10, (1000,), "softmax", "lecun_uniform", 10, (25, 10),
34
+ -51.938696542946786, -513.4094105001416, 0.9589931777194366),
35
+ (100, 100, (100,), "sigmoid", "glorot_uniform", 1, (25, 100),
36
+ -46.92889730988215, -1585.2331437646524, 0.6496204322668526),
37
+ (100, 100, (100,), "tanh", "he_normal", 10, (25, 100),
38
+ -5.531248709518545, -1131.5021652659007, 0.6018381457540279),
39
+ (100, 100, (1000,), "relu", "lecun_normal", 1, (25, 100),
40
+ -24.857674257413233, -1241.941403822942, 0.5954067650339964),
41
+ (100, 100, (1000,), "identity", "glorot_normal", 10, (25, 100),
42
+ -49.66037744636776, -1418.0996396366454, 0.6387637880009253),
43
+ (1000, 10, (100,), "log_softmax", "glorot_normal", 1, (250, 10),
44
+ -2.157983014856103, -821.8910528092026, 0.999999671320564),
45
+ (1000, 10, (100,), "log_sigmoid", "lecun_normal", 10, (250, 10),
46
+ -2.25281191108881, -813.3197346939389, 0.9998208055604957),
47
+ (1000, 10, (1000,), "softmin", "he_normal", 1, (250, 10),
48
+ -2.932635323616438, -819.9889270165279, 0.9999535335431835),
49
+ (1000, 10, (1000,), "softmax", "glorot_uniform", 10, (250, 10),
50
+ -3.27895924524588, -809.0526184106433, 0.9996980844468629),
51
+ (1000, 100, (100,), "sigmoid", "lecun_uniform", 1, (250, 100),
52
+ 40.193814730616296, -2003.2760146757932, 0.9999864051131802),
53
+ (1000, 100, (100,), "tanh", "he_normal", 10, (250, 100),
54
+ 38.349789631939906, -1968.7361166078529, 0.9984649082549426),
55
+ (1000, 100, (1000,), "relu", "normal", 1, (250, 100),
56
+ 47.91240910167704, -2194.259205351918, 0.8620693547752554),
57
+ (1000, 100, (1000,), "identity", "uniform", 10, (250, 100),
58
+ 39.788475103832646, -2004.3219743138504, 0.9999999882159872)
59
+ ])
60
+ def test_regression_against_grafo(n_samples, n_targets, hidden_layer_sizes,
61
+ activation, weight_scheme, reg_alpha,
62
+ exp_preds_shape, exp_preds_median,
63
+ exp_preds_min, exp_preds_r2):
64
+ N, d = n_samples, n_targets
65
+ RNG = 42
66
+ X, y = make_regression(n_samples=N,
67
+ n_features=d,
68
+ n_informative=d,
69
+ n_targets=n_targets,
70
+ noise=0.0,
71
+ bias=0.0,
72
+ random_state=RNG)
73
+
74
+ X_train, X_test, y_train, y_test = train_test_split(X, y,
75
+ test_size=0.25,
76
+ random_state=RNG)
77
+
78
+ # Preprocessing (use the SAME scaler for all models that need it)
79
+ scaler = StandardScaler().fit(X_train)
80
+ X_train_s = scaler.transform(X_train)
81
+ X_test_s = scaler.transform(X_test)
82
+
83
+ model = GFDLRegressor(
84
+ hidden_layer_sizes=hidden_layer_sizes,
85
+ activation=activation,
86
+ weight_scheme=weight_scheme,
87
+ direct_links=1,
88
+ seed=RNG,
89
+ reg_alpha=reg_alpha
90
+ )
91
+ model.fit(X_train_s, y_train)
92
+ actual_preds = model.predict(X_test_s)
93
+ actual_preds_shape = actual_preds.shape
94
+ actual_preds_median = np.median(actual_preds)
95
+ actual_preds_min = actual_preds.min()
96
+ actual_preds_r2 = r2_score(y_test, actual_preds)
97
+ np.testing.assert_allclose(actual_preds_shape, exp_preds_shape)
98
+ np.testing.assert_allclose(actual_preds_median, exp_preds_median)
99
+ np.testing.assert_allclose(actual_preds_min, exp_preds_min)
100
+ np.testing.assert_allclose(actual_preds_r2, exp_preds_r2)
101
+
102
+
103
+ @parametrize_with_checks([GFDLRegressor()])
104
+ def test_sklearn_api_conformance(estimator, check):
105
+ check(estimator)
106
+
107
+
108
+ @pytest.mark.parametrize("reg_alpha, expected", [
109
+ (0.1, 0.78550376),
110
+ # NOTE: for Moore-Penrose, a large singular value
111
+ # cutoff (rtol) is required to achieve reasonable R2 with
112
+ # the Boston Housing dataset
113
+ (None, 0.73452466),
114
+ ])
115
+ def test_regression_boston(reg_alpha, expected):
116
+ # real-world data test with multi-layer RVFL
117
+ boston = fetch_openml(name="boston", version=1, as_frame=False)
118
+ X, y = boston.data, boston.target.astype(float)
119
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
120
+ random_state=42,
121
+ shuffle=True)
122
+
123
+ scaler = StandardScaler().fit(X_train)
124
+ X_train_s = scaler.transform(X_train)
125
+ X_test_s = scaler.transform(X_test)
126
+
127
+ model = GFDLRegressor(
128
+ hidden_layer_sizes=[800] * 10,
129
+ activation="tanh",
130
+ weight_scheme="uniform",
131
+ direct_links=1,
132
+ seed=0,
133
+ reg_alpha=reg_alpha,
134
+ rtol=1e-3, # has no effect for `Ridge`
135
+ )
136
+ model.fit(X_train_s, y_train)
137
+ y_pred = model.predict(X_test_s)
138
+ # RandomForestRegressor() with default params scores
139
+ # 0.8733907 here; multi-layer GFDL with above params is a bit
140
+ # worse, but certainly better than random chance:
141
+ actual = r2_score(y_test, y_pred)
142
+ np.testing.assert_allclose(actual, expected)