gfdl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gfdl/model.py ADDED
@@ -0,0 +1,851 @@
1
+ """
2
+ Estimators for gradient free deep learning.
3
+ """
4
+
5
+ import numpy as np
6
+ from scipy.special import logsumexp
7
+ from scipy.stats import mode
8
+ from sklearn.base import (
9
+ BaseEstimator,
10
+ ClassifierMixin,
11
+ MultiOutputMixin,
12
+ RegressorMixin,
13
+ )
14
+ from sklearn.linear_model import Ridge
15
+ from sklearn.preprocessing import OneHotEncoder
16
+ from sklearn.utils.metaestimators import available_if
17
+ from sklearn.utils.multiclass import unique_labels
18
+ from sklearn.utils.validation import check_is_fitted, validate_data
19
+
20
+ from gfdl.activations import resolve_activation
21
+ from gfdl.weights import resolve_weight
22
+
23
+
24
+ class GFDL(BaseEstimator):
25
+ """Base class for GFDL for classification and regression."""
26
+ def __init__(
27
+ self,
28
+ hidden_layer_sizes: np.typing.ArrayLike = (100,),
29
+ activation: str = "identity",
30
+ weight_scheme: str = "uniform",
31
+ direct_links: bool = True,
32
+ seed: int = None,
33
+ reg_alpha: float = None,
34
+ rtol: float | None = None,
35
+ ):
36
+ self.hidden_layer_sizes = hidden_layer_sizes
37
+ self.activation = activation
38
+ self.direct_links = direct_links
39
+ self.seed = seed
40
+ self.weight_scheme = weight_scheme
41
+ self.reg_alpha = reg_alpha
42
+ self.rtol = rtol
43
+
44
+ def fit(self, X, Y):
45
+ # Assumption : X, Y have been pre-processed.
46
+ # X shape: (n_samples, n_features)
47
+ # Y shape: (n_samples, n_classes-1)
48
+ if self.reg_alpha is not None and self.reg_alpha < 0.0:
49
+ raise ValueError("Negative reg_alpha. Expected range : None or [0.0, inf).")
50
+ fn = resolve_activation(self.activation)[1]
51
+ self._activation_fn = fn
52
+ self._N = X.shape[1]
53
+ hidden_layer_sizes = np.asarray(self.hidden_layer_sizes)
54
+ self._weight_mode = resolve_weight(self.weight_scheme)
55
+
56
+ # weights shape: (n_layers,)
57
+ # biases shape: (n_layers,)
58
+ self.W_ = []
59
+ self.b_ = []
60
+ rng = self.get_generator(self.seed)
61
+
62
+ self.W_.append(
63
+ self._weight_mode(
64
+ self._N, hidden_layer_sizes[0], rng=self.get_generator(self.seed)
65
+ )
66
+ )
67
+ self.b_.append(
68
+ self._weight_mode(1, hidden_layer_sizes[0], rng=rng)
69
+ .reshape(-1)
70
+ )
71
+ for i, layer in enumerate(hidden_layer_sizes[1:]):
72
+ # (n_hidden, n_features)
73
+ self.W_.append(
74
+ self._weight_mode(hidden_layer_sizes[i], layer, rng=rng,)
75
+ )
76
+ # (n_hidden,)
77
+ self.b_.append(
78
+ self._weight_mode(1, layer, rng=rng,).reshape(-1)
79
+ )
80
+
81
+ # hypothesis space shape: (n_layers,)
82
+ Hs = []
83
+ H_prev = X
84
+ for w, b in zip(self.W_, self.b_, strict=False):
85
+ Z = H_prev @ w.T + b # (n_samples, n_hidden)
86
+ H_prev = self._activation_fn(Z)
87
+ Hs.append(H_prev)
88
+
89
+ # design matrix shape: (n_samples, sum_hidden+n_features)
90
+ # or (n_samples, sum_hidden)
91
+ if self.direct_links:
92
+ Hs.append(X)
93
+ D = np.hstack(Hs)
94
+
95
+ # beta shape: (sum_hidden+n_features, n_classes-1)
96
+ # or (sum_hidden, n_classes-1)
97
+
98
+ # If reg_alpha is None, use direct solve using
99
+ # MoorePenrose Pseudo-Inverse, otherwise use ridge regularized form.
100
+ if self.reg_alpha is None:
101
+ self.coeff_ = np.linalg.pinv(D, rtol=self.rtol) @ Y
102
+ else:
103
+ ridge = Ridge(alpha=self.reg_alpha, fit_intercept=False)
104
+ ridge.fit(D, Y)
105
+ self.coeff_ = ridge.coef_.T
106
+ return self
107
+
108
+ def predict(self, X):
109
+ check_is_fitted(self)
110
+ Hs = []
111
+ H_prev = X
112
+ for W, b in zip(self.W_, self.b_, strict=False):
113
+ Z = H_prev @ W.T + b # (n, m)
114
+ H_prev = self._activation_fn(Z)
115
+ Hs.append(H_prev)
116
+
117
+ if self.direct_links:
118
+ Hs.append(X)
119
+ D = np.hstack(Hs)
120
+ out = D @ self.coeff_
121
+
122
+ return out
123
+
124
+ def get_generator(self, seed):
125
+ return np.random.default_rng(seed)
126
+
127
+
128
+ class GFDLClassifier(ClassifierMixin, GFDL):
129
+ """
130
+ Random vector functional link network classifier.
131
+
132
+ This model fits a feedforward neural network with fixed random hidden-layer
133
+ parameters and solves for the output weights using linear least squares or
134
+ ridge regression. When direct links are disabled, the model architecture corresponds
135
+ to an Extreme Learning Machine (ELM) architecture.
136
+
137
+ Parameters
138
+ ----------
139
+ hidden_layer_sizes : array-like of shape (n_layers,), default=(100,)
140
+ The ith element represents the number of neurons in the ith
141
+ hidden layer.
142
+
143
+ activation : str, default='identity'
144
+ Activation function for the hidden layers.
145
+
146
+ - 'identity', no-op activation, useful to implement linear bottleneck,
147
+ returns f(x) = x
148
+
149
+ - 'tanh': :func:`tanh <gfdl.activations.tanh>`.
150
+
151
+ - 'relu': :func:`relu <gfdl.activations.relu>`.
152
+
153
+ - 'sigmoid': :func:`sigmoid <gfdl.activations.sigmoid>`.
154
+
155
+ - 'softmax': :func:`softmax <gfdl.activations.softmax>`.
156
+
157
+ - 'softmin': :func:`softmin <gfdl.activations.softmin>`.
158
+
159
+ - 'log_sigmoid': :func:`log_sigmoid <gfdl.activations.log_sigmoid>`.
160
+
161
+ - 'log_softmax': :func:`log_softmax <gfdl.activations.log_softmax>`.
162
+
163
+ weight_scheme : str, default='uniform'
164
+ Distribution used to initialize the random hidden-layer weights.
165
+
166
+ The initialization functions generate weight matrices of shape
167
+ (n_hidden_units, n_features), where values are drawn
168
+ according to the selected scheme.
169
+
170
+ - 'zeros': set weights to zeros (:func:`zeros <gfdl.weights.zeros>`).
171
+
172
+ - 'range': set weights to normalized np.arange
173
+ (:func:`range <gfdl.weights.range>`).
174
+
175
+ - 'uniform': uniform distribution (:func:`uniform <gfdl.weights.uniform>`).
176
+
177
+ - 'he_uniform': He uniform distribution
178
+ (:func:`he_uniform <gfdl.weights.he_uniform>`).
179
+
180
+ - 'lecun_uniform': Lecun uniform distribution
181
+ (:func:`lecun_uniform <gfdl.weights.lecun_uniform>`).
182
+
183
+ - 'glorot_uniform': Glorot uniform distribution
184
+ (:func:`glorot_uniform <gfdl.weights.glorot_uniform>`).
185
+
186
+ - 'normal': Normal distribution (:func:`normal <gfdl.weights.normal>`).
187
+
188
+ - 'he_normal': He normal distribution
189
+ (:func:`he_normal <gfdl.weights.he_normal>`).
190
+
191
+ - 'lecun_normal': Lecun normal distribution
192
+ (:func:`lecun_normal <gfdl.weights.lecun_normal>`).
193
+
194
+ - 'glorot_normal': Glorot normal distribution
195
+ (:func:`glorot_normal <gfdl.weights.glorot_normal>`).
196
+
197
+ direct_links : bool, default=True
198
+ Whether to connect input layer to output nodes.
199
+ When set to False, only the hidden-layer activations are used, corresponding
200
+ to the Extreme Learning Machine (ELM) architecture.
201
+
202
+ seed : int, RandomState instance, default=None
203
+ Determines random number generation for weights and bias
204
+ initialization.
205
+ Pass an int for reproducible results across multiple function calls.
206
+ See :term:`Glossary <random_state>`.
207
+
208
+ reg_alpha : float, default=None
209
+ Amount of ridge shrinkage to apply in order to improve
210
+ conditioning during Ridge regression. When set to zero or `None`,
211
+ model uses direct solve using Moore-Penrose Pseudo-Inverse.
212
+
213
+ rtol : float, default=None
214
+ Cutoff for small singular values for the Moore-Penrose
215
+ pseudo-inverse. Only applies when ``reg_alpha=None``.
216
+ When ``rtol=None``, the array API standard default for
217
+ ``pinv`` is used.
218
+
219
+ Attributes
220
+ ----------
221
+ n_features_in_ : int
222
+ Number of features seen during :term:`fit`.
223
+
224
+ classes_ : ndarray or list of ndarray of shape (n_classes,)
225
+ Class labels for each output.
226
+
227
+ W_ : list of ndarray of shape (n_layers,)
228
+ Weight matrices of the hidden layers. The ith element in the list represents the
229
+ weight matrix corresponding to layer i.
230
+
231
+ b_ : list of ndarray of shape (n_layers,)
232
+ Bias vectors of the hidden layers. The ith element in the list represents the
233
+ bias term corresponding to layer i.
234
+
235
+ coeff_ : ndarray of shape (n_features_out, n_outputs)
236
+ Output weight matrix learned by fit method.
237
+
238
+ See Also
239
+ --------
240
+ GFDLRegressor : Regressor variant for the RVFL architecture.
241
+
242
+ Examples
243
+ --------
244
+ >>> from gfdl.model import GFDLClassifier
245
+ >>> from sklearn.datasets import make_classification
246
+ >>> from sklearn.model_selection import train_test_split
247
+ >>> X, y = make_classification(n_samples=100, random_state=1)
248
+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,
249
+ ... random_state=1)
250
+ >>> clf = GFDLClassifier(seed=1).fit(X_train, y_train)
251
+ >>> clf.predict_proba(X_test[:1])
252
+ array([[0.46123716, 0.53876284]])
253
+ >>> clf.predict(X_test[:5, :])
254
+ array([1, 0, 1, 0, 1])
255
+ """
256
+ def __init__(
257
+ self,
258
+ hidden_layer_sizes: np.typing.ArrayLike = (100,),
259
+ activation: str = "identity",
260
+ weight_scheme: str = "uniform",
261
+ direct_links: bool = True,
262
+ seed: int = None,
263
+ reg_alpha: float = None,
264
+ rtol: float = None
265
+ ):
266
+ super().__init__(hidden_layer_sizes=hidden_layer_sizes,
267
+ activation=activation,
268
+ weight_scheme=weight_scheme,
269
+ direct_links=direct_links,
270
+ seed=seed,
271
+ reg_alpha=reg_alpha,
272
+ rtol=rtol)
273
+
274
+ def fit(self, X, y):
275
+ """
276
+ Build a gradient-free neural network from the training set (X, y).
277
+
278
+ Parameters
279
+ ----------
280
+
281
+ X : array-like of shape (n_samples, n_features)
282
+ The training input samples.
283
+ y : array-like of shape (n_samples,) or (n_samples, n_outputs)
284
+ The target values (class labels).
285
+
286
+ Returns
287
+ -------
288
+ object
289
+ Fitted estimator.
290
+ """
291
+ # shape: (n_samples, n_features)
292
+ X, Y = validate_data(self, X, y)
293
+ self.classes_ = unique_labels(Y)
294
+
295
+ # onehot y
296
+ # (this is necessary for everything beyond binary classification)
297
+ self.enc_ = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
298
+ # shape: (n_samples, n_classes-1)
299
+ Y = self.enc_.fit_transform(Y.reshape(-1, 1))
300
+
301
+ # call base fit method
302
+ super().fit(X, Y)
303
+ return self
304
+
305
+ def predict(self, X):
306
+ """
307
+ Predict class for X.
308
+
309
+ Parameters
310
+ ----------
311
+ X : array-like of shape (n_samples, n_features)
312
+ The input samples.
313
+
314
+ Returns
315
+ -------
316
+ ndarray
317
+ The predicted classes, with shape (n_samples,) or (n_samples, n_outputs).
318
+ """
319
+ check_is_fitted(self)
320
+ X = validate_data(self, X, reset=False)
321
+ out = self.predict_proba(X)
322
+ y_hat = self.classes_[np.argmax(out, axis=1)]
323
+ return y_hat
324
+
325
+ def predict_proba(self, X):
326
+ """
327
+ Predict class probabilities for X.
328
+
329
+ Parameters
330
+ ----------
331
+ X : array-like of shape (n_samples, n_features)
332
+ The input samples.
333
+
334
+ Returns
335
+ -------
336
+ ndarray
337
+ The class probabilities of the input samples. The order of the classes
338
+ corresponds to that in the attribute ``classes_``. The ndarray should
339
+ have shape (n_samples, n_classes).
340
+ """
341
+ check_is_fitted(self)
342
+ X = validate_data(self, X, reset=False)
343
+ out = super().predict(X)
344
+ out = np.exp(out - logsumexp(out, axis=1, keepdims=True))
345
+ return out
346
+
347
+
348
+ class EnsembleGFDL(GFDL):
349
+ """Base class for ensemble GFDL model for classification and regression."""
350
+ def __init__(
351
+ self,
352
+ hidden_layer_sizes: np.typing.ArrayLike = (100,),
353
+ activation: str = "identity",
354
+ weight_scheme: str = "uniform",
355
+ seed: int = None,
356
+ reg_alpha: float = None,
357
+ rtol: float | None = None,
358
+ ):
359
+ super().__init__(hidden_layer_sizes=hidden_layer_sizes,
360
+ activation=activation,
361
+ weight_scheme=weight_scheme,
362
+ direct_links=True,
363
+ seed=seed,
364
+ reg_alpha=reg_alpha,
365
+ rtol=rtol)
366
+
367
+ def fit(self, X, Y):
368
+
369
+ if self.reg_alpha is not None and self.reg_alpha < 0.0:
370
+ raise ValueError("Negative reg_alpha. Expected range : None or [0.0, inf).")
371
+
372
+ fn = resolve_activation(self.activation)[1]
373
+ self._activation_fn = fn
374
+ self._N = X.shape[1]
375
+ hidden_layer_sizes = np.asarray(self.hidden_layer_sizes)
376
+ self._weight_mode = resolve_weight(self.weight_scheme)
377
+
378
+ self.W_ = []
379
+ self.b_ = []
380
+ rng = self.get_generator(self.seed)
381
+
382
+ self.W_.append(
383
+ self._weight_mode(
384
+ self._N, hidden_layer_sizes[0], rng=self.get_generator(self.seed)
385
+ )
386
+ )
387
+ self.b_.append(
388
+ self._weight_mode(1, hidden_layer_sizes[0], rng=rng)
389
+ .reshape(-1)
390
+ )
391
+
392
+ for i, layer in enumerate(hidden_layer_sizes[1:]):
393
+ # (n_hidden, n_features)
394
+ self.W_.append(
395
+ self._weight_mode(hidden_layer_sizes[i] + self._N, layer, rng=rng)
396
+ )
397
+ # (n_hidden,)
398
+ self.b_.append(
399
+ self._weight_mode(1, layer, rng=rng,).reshape(-1)
400
+ )
401
+
402
+ self.coeffs_ = []
403
+ D = X
404
+
405
+ for W, b in zip(self.W_, self.b_, strict=False):
406
+ Z = D @ W.T + b # (n_samples, n_hidden_layer_i)
407
+ H = self._activation_fn(Z)
408
+ # design matrix shape: (n_samples, n_hidden_layer_i+n_features)
409
+ # or (n_samples, n_hidden_final)
410
+ D = np.hstack((H, X))
411
+
412
+ # beta shape: (n_hidden_final+n_features, n_classes-1)
413
+ # or (n_hidden_final, n_classes-1)
414
+
415
+ # If reg_alpha is None, use direct solve using
416
+ # MoorePenrose Pseudo-Inverse, otherwise use ridge regularized form.
417
+ if self.reg_alpha is None:
418
+ coeff = np.linalg.pinv(D, rtol=self.rtol) @ Y
419
+ else:
420
+ ridge = Ridge(alpha=self.reg_alpha, fit_intercept=False)
421
+ ridge.fit(D, Y)
422
+ coeff = ridge.coef_.T
423
+ self.coeffs_.append(coeff)
424
+
425
+ return self
426
+
427
+ def _forward(self, X):
428
+ check_is_fitted(self)
429
+ outs = []
430
+
431
+ D = X
432
+ for W, b, coeff in zip(self.W_, self.b_, self.coeffs_, strict=True):
433
+ Z = D @ W.T + b
434
+ H = self._activation_fn(Z)
435
+
436
+ D = np.hstack((H, X))
437
+
438
+ out = D @ coeff
439
+ outs.append(out)
440
+
441
+ return outs
442
+
443
+
444
+ class EnsembleGFDLClassifier(ClassifierMixin, EnsembleGFDL):
445
+ """
446
+ Ensemble random vector functional link network classifier.
447
+
448
+ Parameters
449
+ ----------
450
+
451
+ hidden_layer_sizes : array-like of shape (n_layers,)
452
+ The ith element represents the number of neurons in the ith
453
+ hidden layer.
454
+
455
+ activation : str, default='identity'
456
+ Activation function for the hidden layers.
457
+
458
+ - 'identity', no-op activation, useful to implement linear bottleneck,
459
+ returns f(x) = x
460
+
461
+ - 'tanh': :func:`tanh <gfdl.activations.tanh>`.
462
+
463
+ - 'relu': :func:`relu <gfdl.activations.relu>`.
464
+
465
+ - 'sigmoid': :func:`sigmoid <gfdl.activations.sigmoid>`.
466
+
467
+ - 'softmax': :func:`softmax <gfdl.activations.softmax>`.
468
+
469
+ - 'softmin': :func:`softmin <gfdl.activations.softmin>`.
470
+
471
+ - 'log_sigmoid': :func:`log_sigmoid <gfdl.activations.log_sigmoid>`.
472
+
473
+ - 'log_softmax': :func:`log_softmax <gfdl.activations.log_softmax>`.
474
+
475
+ weight_scheme : str, default='uniform'
476
+ Distribution used to initialize the random hidden-layer weights.
477
+
478
+ The initialization functions generate weight matrices of shape
479
+ (n_hidden_units, n_features), where values are drawn
480
+ according to the selected scheme.
481
+
482
+ - 'zeros': set weights to zeros (:func:`zeros <gfdl.weights.zeros>`).
483
+
484
+ - 'range': set weights to normalized np.arange
485
+ (:func:`range <gfdl.weights.range>`).
486
+
487
+ - 'uniform': uniform distribution (:func:`uniform <gfdl.weights.uniform>`).
488
+
489
+ - 'he_uniform': He uniform distribution
490
+ (:func:`he_uniform <gfdl.weights.he_uniform>`).
491
+
492
+ - 'lecun_uniform': Lecun uniform distribution
493
+ (:func:`lecun_uniform <gfdl.weights.lecun_uniform>`).
494
+
495
+ - 'glorot_uniform': Glorot uniform distribution
496
+ (:func:`glorot_uniform <gfdl.weights.glorot_uniform>`).
497
+
498
+ - 'normal': Normal distribution (:func:`normal <gfdl.weights.normal>`).
499
+
500
+ - 'he_normal': He normal distribution
501
+ (:func:`he_normal <gfdl.weights.he_normal>`).
502
+
503
+ - 'lecun_normal': Lecun normal distribution
504
+ (:func:`lecun_normal <gfdl.weights.lecun_normal>`).
505
+
506
+ - 'glorot_normal': Glorot normal distribution
507
+ (:func:`glorot_normal <gfdl.weights.glorot_normal>`).
508
+
509
+ seed : int, default=`None`
510
+ Random seed used to initialize the network.
511
+
512
+ reg_alpha : float, default=`None`
513
+ When `None`, use Moore-Penrose inversion to solve for the output
514
+ weights of the network. Otherwise, it specifies the constant that
515
+ multiplies the L2 term of `sklearn` `Ridge`, controlling the
516
+ regularization strength. `reg_alpha` must be a non-negative float.
517
+
518
+ rtol : float, default=None
519
+ Cutoff for small singular values for the Moore-Penrose
520
+ pseudo-inverse. Only applies when ``reg_alpha=None``.
521
+ When ``rtol=None``, the array API standard default for
522
+ ``pinv`` is used.
523
+
524
+ voting : str, default=`"soft"`
525
+ Whether to use soft or hard voting in the ensemble.
526
+
527
+ Notes
528
+ -----
529
+ The implementation is based on the one described by Shi et al. in [1]_.
530
+
531
+ References
532
+ ----------
533
+ .. [1] Shi, Katuwal, Suganthan, Tanveer, "Random vector functional
534
+ link neural network based ensemble deep learning." Pattern Recognition,
535
+ vol. 117, pp. 107978, 2021, https://doi.org/10.1016/j.patcog.2021.107978.
536
+
537
+ Examples
538
+ --------
539
+ >>> from sklearn.datasets import make_classification
540
+ >>> from gfdl.model import EnsembleGFDLClassifier
541
+ >>> X, y = make_classification(n_samples=1000, n_features=4,
542
+ ... n_informative=2, n_redundant=0,
543
+ ... random_state=0, shuffle=False)
544
+ >>> clf = EnsembleGFDLClassifier(seed=0)
545
+ >>> clf.fit(X, y)
546
+ >>> print(clf.predict([[0, 0, 0, 0]]))
547
+ [1]
548
+ """
549
+ def __init__(
550
+ self,
551
+ hidden_layer_sizes: np.typing.ArrayLike = (100,),
552
+ activation: str = "identity",
553
+ weight_scheme: str = "uniform",
554
+ seed: int = None,
555
+ reg_alpha: float = None,
556
+ rtol: float = None,
557
+ voting: str = "soft", # "soft" or "hard"
558
+ ):
559
+ super().__init__(hidden_layer_sizes=hidden_layer_sizes,
560
+ activation=activation,
561
+ weight_scheme=weight_scheme,
562
+ seed=seed,
563
+ reg_alpha=reg_alpha,
564
+ rtol=rtol
565
+ )
566
+ self.voting = voting
567
+
568
+ def fit(self, X, y):
569
+ """
570
+ Train the ensemble of connected RVFL networks on the training set (X, y).
571
+
572
+ Parameters
573
+ ----------
574
+
575
+ X : array-like of shape (n_samples, n_features)
576
+ The training input samples.
577
+ y : array-like of shape (n_samples,) or (n_samples, n_outputs)
578
+ The target values.
579
+
580
+ Returns
581
+ -------
582
+ object
583
+ The fitted estimator.
584
+ """
585
+ # shape: (n_samples, n_features)
586
+ X, Y = validate_data(self, X, y)
587
+ self.classes_ = unique_labels(Y)
588
+
589
+ # onehot y
590
+ # (this is necessary for everything beyond binary classification)
591
+ self.enc_ = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
592
+ # shape: (n_samples, n_classes-1)
593
+ Y = self.enc_.fit_transform(Y.reshape(-1, 1))
594
+
595
+ # call base fit method
596
+ super().fit(X, Y)
597
+ return self
598
+
599
+ def _check_voting(self):
600
+ if self.voting == "hard":
601
+ raise AttributeError(
602
+ f"predict_proba is not available when voting={self.voting!r}"
603
+ )
604
+ return True
605
+
606
+ @available_if(_check_voting)
607
+ def predict_proba(self, X):
608
+ """
609
+ Predict class probabilities for X.
610
+
611
+ Parameters
612
+ ----------
613
+ X : array-like of shape (n_samples, n_features)
614
+ The input samples.
615
+
616
+ Returns
617
+ -------
618
+ ndarray
619
+ The class probabilities of the input samples. The order of the classes
620
+ corresponds to that in the attribute ``classes_``. The ndarray should
621
+ have shape (n_samples, n_classes).
622
+ """
623
+ check_is_fitted(self)
624
+ X = validate_data(self, X, reset=False)
625
+
626
+ outs = self._forward(X)
627
+ probs = []
628
+
629
+ for out in outs:
630
+ p = np.exp(out - logsumexp(out, axis=1, keepdims=True))
631
+ probs.append(p)
632
+
633
+ return np.mean(probs, axis=0)
634
+
635
+ def predict(self, X):
636
+ """
637
+ Predict class for X.
638
+
639
+ Parameters
640
+ ----------
641
+ X : array-like of shape (n_samples, n_features)
642
+ The input samples.
643
+
644
+ Returns
645
+ -------
646
+ ndarray
647
+ The predicted classes, with shape (n_samples,) or (n_samples, n_outputs).
648
+ """
649
+ check_is_fitted(self)
650
+ X = validate_data(self, X, reset=False)
651
+
652
+ if self.voting == "soft":
653
+ P = self.predict_proba(X)
654
+ return self.classes_[np.argmax(P, axis=1)]
655
+
656
+ outs = self._forward(X)
657
+ votes = []
658
+
659
+ for out in outs:
660
+ p = np.exp(out - logsumexp(out, axis=1, keepdims=True))
661
+ votes.append(self.classes_[np.argmax(p, axis=1)])
662
+
663
+ votes = np.stack(votes, axis=1)
664
+ m = mode(votes, axis=1, keepdims=False)
665
+ return m.mode
666
+
667
+
668
+ class GFDLRegressor(RegressorMixin, MultiOutputMixin, GFDL):
669
+ """
670
+ Random vector functional link network regressor.
671
+
672
+ This model fits a feedforward neural network with fixed random hidden-layer
673
+ parameters and solves for the output weights using linear least squares or
674
+ ridge regression. When direct links are disabled, the model architecture corresponds
675
+ to an Extreme Learning Machine (ELM) architecture.
676
+
677
+ Parameters
678
+ ----------
679
+ hidden_layer_sizes : array-like of shape (n_layers,), default=(100,)
680
+ The ith element represents the number of neurons in the ith
681
+ hidden layer.
682
+
683
+ activation : str, default='identity'
684
+ Activation function for the hidden layers.
685
+
686
+ - 'identity', no-op activation, useful to implement linear bottleneck,
687
+ returns f(x) = x
688
+
689
+ - 'tanh': :func:`tanh <gfdl.activations.tanh>`.
690
+
691
+ - 'relu': :func:`relu <gfdl.activations.relu>`.
692
+
693
+ - 'sigmoid': :func:`sigmoid <gfdl.activations.sigmoid>`.
694
+
695
+ - 'softmax': :func:`softmax <gfdl.activations.softmax>`.
696
+
697
+ - 'softmin': :func:`softmin <gfdl.activations.softmin>`.
698
+
699
+ - 'log_sigmoid': :func:`log_sigmoid <gfdl.activations.log_sigmoid>`.
700
+
701
+ - 'log_softmax': :func:`log_softmax <gfdl.activations.log_softmax>`.
702
+
703
+ weight_scheme : str, default='uniform'
704
+ Distribution used to initialize the random hidden-layer weights.
705
+
706
+ The initialization functions generate weight matrices of shape
707
+ (n_hidden_units, n_features), where values are drawn
708
+ according to the selected scheme.
709
+
710
+ - 'zeros': set weights to zeros (:func:`zeros <gfdl.weights.zeros>`).
711
+
712
+ - 'range': set weights to normalized np.arange
713
+ (:func:`range <gfdl.weights.range>`).
714
+
715
+ - 'uniform': uniform distribution (:func:`uniform <gfdl.weights.uniform>`).
716
+
717
+ - 'he_uniform': He uniform distribution
718
+ (:func:`he_uniform <gfdl.weights.he_uniform>`).
719
+
720
+ - 'lecun_uniform': Lecun uniform distribution
721
+ (:func:`lecun_uniform <gfdl.weights.lecun_uniform>`).
722
+
723
+ - 'glorot_uniform': Glorot uniform distribution
724
+ (:func:`glorot_uniform <gfdl.weights.glorot_uniform>`).
725
+
726
+ - 'normal': Normal distribution (:func:`normal <gfdl.weights.normal>`).
727
+
728
+ - 'he_normal': He normal distribution
729
+ (:func:`he_normal <gfdl.weights.he_normal>`).
730
+
731
+ - 'lecun_normal': Lecun normal distribution
732
+ (:func:`lecun_normal <gfdl.weights.lecun_normal>`).
733
+
734
+ - 'glorot_normal': Glorot normal distribution
735
+ (:func:`glorot_normal <gfdl.weights.glorot_normal>`).
736
+
737
+ direct_links : bool, default=True
738
+ Whether to connect input layer to output nodes.
739
+
740
+ When set to False, only the hidden-layer activations are used, corresponding
741
+ to the Extreme Learning Machine (ELM) architecture.
742
+
743
+ seed : int, RandomState instance, default=None
744
+ Determines random number generation for weights and bias
745
+ initialization.
746
+ Pass an int for reproducible results across multiple function calls.
747
+ See :term:`Glossary <random_state>`.
748
+
749
+ reg_alpha : float, default=None
750
+ Amount of ridge shrinkage to apply in order to improve
751
+ conditioning during Ridge regression. When set to zero or `None`,
752
+ model uses direct solve using Moore-Penrose Pseudo-Inverse.
753
+
754
+ rtol : float, default=None
755
+ Cutoff for small singular values for the Moore-Penrose
756
+ pseudo-inverse. Only applies when ``reg_alpha=None``.
757
+ When ``rtol=None``, the array API standard default for
758
+ ``pinv`` is used.
759
+
760
+ Attributes
761
+ ----------
762
+ n_features_in_ : int
763
+ Number of features seen during :term:`fit`.
764
+
765
+ W_ : list of ndarray of shape (n_layers,)
766
+ Weight matrices of the hidden layers. The ith element in the list represents the
767
+ weight matrix corresponding to layer i.
768
+
769
+ b_ : list of ndarray of shape (n_layers,)
770
+ Bias vectors of the hidden layers. The ith element in the list represents the
771
+ bias term corresponding to layer i.
772
+
773
+ coeff_ : ndarray of shape (n_features_out, n_outputs)
774
+ Output weight matrix learned by the fit method.
775
+
776
+ See Also
777
+ --------
778
+ GFDLClassifier : Classifier variant for the RVFL architecture.
779
+
780
+ Examples
781
+ --------
782
+ >>> from gfdl.model import GFDLRegressor
783
+ >>> from sklearn.datasets import make_regression
784
+ >>> from sklearn.model_selection import train_test_split
785
+ >>> X, y = make_regression(n_samples=200, n_features=20, random_state=1)
786
+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
787
+ ... random_state=1)
788
+ >>> regr = GFDLRegressor(seed=1)
789
+ >>> regr.fit(X_train, y_train)
790
+ GFDLRegressor(seed=1)
791
+ >>> regr.predict(X_test[:2])
792
+ array([ 18.368, -278.014])
793
+ """
794
+ def __init__(
795
+ self,
796
+ hidden_layer_sizes: np.typing.ArrayLike = (100,),
797
+ activation: str = "identity",
798
+ weight_scheme: str = "uniform",
799
+ direct_links: bool = True,
800
+ seed: int = None,
801
+ reg_alpha: float = None,
802
+ rtol: float | None = None,
803
+ ):
804
+ super().__init__(hidden_layer_sizes=hidden_layer_sizes,
805
+ activation=activation,
806
+ weight_scheme=weight_scheme,
807
+ direct_links=direct_links,
808
+ seed=seed,
809
+ reg_alpha=reg_alpha,
810
+ rtol=rtol)
811
+
812
+ def fit(self, X, y):
813
+ """
814
+ Train the gradient-free neural network on the training set (X, y).
815
+
816
+ Parameters
817
+ ----------
818
+
819
+ X : array-like of shape (n_samples, n_features)
820
+ The training input samples.
821
+ y : array-like of shape (n_samples,) or (n_samples, n_outputs)
822
+ The target values.
823
+
824
+ Returns
825
+ -------
826
+ object
827
+ The fitted estimator.
828
+ """
829
+ X, Y = validate_data(self, X, y, multi_output=True)
830
+ super().fit(X, Y)
831
+ return self
832
+
833
+ def predict(self, X):
834
+ """
835
+ Predict regression target for X.
836
+
837
+ Parameters
838
+ ----------
839
+
840
+ X : array-like of shape (n_samples, n_features)
841
+ The input samples.
842
+
843
+ Returns
844
+ -------
845
+ ndarray
846
+ The predicted values. Should have shape (n_samples,) or
847
+ (n_samples, n_outputs).
848
+ """
849
+ check_is_fitted(self)
850
+ X = validate_data(self, X, reset=False)
851
+ return super().predict(X)