phylokrr-dev 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,920 @@
1
+ import sys
2
+ import time
3
+
4
+
5
+ import numpy as np
6
+ from phylokrr.kernels import linear_kernel
7
+ # from scipy import linalg as la
8
+ from numpy import linalg as la
9
+
10
+ def distance_matrix(a, b):
11
+ """
12
+ l2 norm squared matrix
13
+ """
14
+ return np.linalg.norm(a[:, None, :] - b[None, :, :], axis=-1)**2
15
+
16
+
17
+ def RBF_kernel(a, b, gamma, extra_K = None):
18
+ """
19
+ Radial Basis Function
20
+ """
21
+ # data sim
22
+ tmp_rbf = -gamma * distance_matrix(a, b)
23
+
24
+ # if extra_K is not None:
25
+ # tmp_rbf += extra_K
26
+
27
+ np.exp(tmp_rbf, tmp_rbf) # RBF kernel. Inplace exponentiation
28
+
29
+ if extra_K is not None:
30
+ tmp_rbf += extra_K
31
+
32
+ return tmp_rbf
33
+
34
+
35
+ class KRGEE:
36
+ """
37
+ Kernel Ridge Generalized Estimating Equations for 1 cluster.
38
+ Using 1 cluster is what phylogenetic regresion deals with.
39
+ """
40
+ def __init__(self,
41
+ kernel='rbf',
42
+ max_iter=300,
43
+ lam=0.1,
44
+ warm_start = False,
45
+ tol = 0.001,
46
+ seed = 12038,
47
+ verbose = False,
48
+ get_corr = True,
49
+ copy_R_inv = False,
50
+ copy_K_train = False,
51
+ fix_phi = False,
52
+ add_extraK= False
53
+ ) -> None:
54
+
55
+ self.add_extraK = add_extraK
56
+
57
+ self.Rnn = None
58
+ self.Rzn = None
59
+
60
+ self.alpha = np.array([])
61
+ self.b = 0
62
+
63
+ self.verbose = verbose
64
+ self.warm_start = warm_start
65
+
66
+ self.kernel = kernel
67
+ self.lam = lam
68
+
69
+ self.gamma = 0.1
70
+ self.c = 0.1
71
+
72
+ self.max_iter = max_iter
73
+ self.tol = tol
74
+ self.seed = seed
75
+
76
+ # speicify if the correlation matrix is needed
77
+ # it might be the case that the input matrix is already
78
+ # in form of a correlation matrix
79
+ self.get_corr = get_corr
80
+
81
+ self.Du_mu_func = lambda x: (np.ones(x.shape[0]), x)
82
+ self.Vu_func = lambda x: np.ones( x.shape[0] )
83
+
84
+ self.X = np.array([])
85
+ self.y = np.array([])
86
+
87
+ self.copy_R_inv = copy_R_inv
88
+ self.R_inv = np.array([])
89
+
90
+ self.copy_K = copy_K_train
91
+ self.K_train = np.array([])
92
+ self.K_col_mean = np.array([])
93
+ self.K_all_mean = np.array([])
94
+
95
+
96
+ self.phi = 1
97
+ self.fix_phi = fix_phi
98
+ self.r = np.array([])
99
+ self.rho = np.array([])
100
+ self.dof = 1 # degrees of freedom
101
+ self.L_inv = np.array([]) # R^-1/2
102
+
103
+
104
+ self.f_curr = np.array([]) # current function values
105
+
106
+ self._verbose = False
107
+ self._update_rho = False
108
+ self.grad_tol = 1e-10
109
+ self.hessian_tol = 1e-10
110
+ self.grad_clip = 2 # gradient clipping
111
+
112
+ def __repr__(self):
113
+ return f"KRGEE(kernel={self.kernel}, max_iter={self.max_iter}, lam={self.lam}, warm_start={self.warm_start}, tol={self.tol}, seed={self.seed})"
114
+
115
+
116
+ def center_kernel(self, K, trainxtest = False):
117
+ """
118
+ Center the kernel matrix
119
+
120
+ ref: Kernel methods in computational biology,
121
+ Scholkopf, Tsuda, Vert. pp. 50\\
122
+ \\tilde K = (I - e/n)( K )(I - e/n)
123
+
124
+ e is singular matrix of ones\\
125
+ I is identity matrix\\
126
+ n is the number of samples\\
127
+ K is the kernel matrix of size n x p
128
+
129
+ Only works for square matrices
130
+
131
+ Here is a general attempt for non-square matrices
132
+ """
133
+ K_row_mean = np.mean(K, axis = 1)
134
+
135
+ if not trainxtest:
136
+ self.K_col_mean = np.mean(K, axis = 0)
137
+ self.K_all_mean = np.mean(K)
138
+
139
+ K -= self.K_col_mean
140
+ K -= K_row_mean.reshape(-1,1)
141
+ K += self.K_all_mean
142
+ return K
143
+
144
+ def set_params(self, **params):
145
+ """
146
+ Set parameters for the ISLEPath instance.
147
+ """
148
+ for key, value in params.items():
149
+ if hasattr(self, key):
150
+ setattr(self, key, value)
151
+ else:
152
+ raise ValueError(f"Parameter {key} is not recognized.")
153
+
154
+ def get_params(self):
155
+ return {'max_iter': self.max_iter,
156
+ 'lam': self.lam,
157
+ 'intercept': self.b,
158
+ 'tol': self.tol}
159
+
160
+ def get_K(self, A, B, trainxtest=False):
161
+ """
162
+ Get the kernel matrix
163
+ depending on the kernel type specified
164
+ a parameter is passed to the kernel function
165
+ Otherwise, the input matrix A is returned
166
+
167
+ Parameters
168
+ ----------
169
+ A : np.ndarray
170
+ input matrix
171
+ B : np.ndarray
172
+ input matrix
173
+
174
+ Returns
175
+ -------
176
+ np.ndarray
177
+ kernel
178
+ """
179
+ if self.kernel == 'rbf':
180
+
181
+ if self.add_extraK:
182
+ R = self.Rzn if trainxtest else self.Rnn
183
+ else:
184
+ R = None
185
+
186
+ K = RBF_kernel(A, B, self.gamma, extra_K=R)
187
+
188
+ elif self.kernel == 'linear':
189
+ K = linear_kernel(A, B, self.c)
190
+
191
+ else:
192
+ K = A
193
+
194
+ return self.center_kernel(K, trainxtest)
195
+
196
+ def pre_process_V(self, V):
197
+ """
198
+ Pre-process the V matrix
199
+ 1. Normalize the matrix
200
+ 2. Get the n-th root of the determinant
201
+ 3. Normalize the matrix again
202
+
203
+ it improves the numerical stability of the algorithm
204
+
205
+ refs:
206
+
207
+ https://daijiang.github.io/phyr/reference/pglmm_compare.html
208
+ https://ives.labs.wisc.edu/pdf/correlateddata.pdf (pp. 94)
209
+
210
+ """
211
+ V = np.asfortranarray(V, dtype=V.dtype)
212
+
213
+ n, _ = V.shape
214
+ V = V/np.max(V)
215
+ detV_n = np.linalg.det(V)**(1/n)
216
+ V = V/detV_n if detV_n != 0 else V
217
+
218
+ return V
219
+
220
+ def get_R(self, vcv):
221
+ """
222
+ Get correlation matrix from vcv matrix
223
+ """
224
+ kr = 1/np.sqrt(np.diag(vcv))
225
+ R = (kr[:, None] * vcv) * kr
226
+ return R
227
+
228
+ def _invR(self, R):
229
+ """
230
+ Inverse of the correlation matrix
231
+ """
232
+ # L = np.linalg.cholesky(R + np.eye(R.shape[0]) * self.hessian_tol)
233
+ # self.L_inv = np.linalg.inv(L)
234
+
235
+ # return self.L_inv.T @ self.L_inv
236
+
237
+ E,Q = la.eigh(R + np.eye(R.shape[0]) * self.hessian_tol)
238
+ # E,Q = la.eigh(R)
239
+
240
+ self.L_inv = Q @ np.diag(E**(-1/2)) @ Q.T
241
+ return Q @ np.diag(E**(-1)) @ Q.T
242
+
243
+ def get_R_inv(self, vcv):
244
+ """
245
+ Get correlation matrix from vcv matrix
246
+ and return its inverse
247
+ """
248
+ # TODO: get_corr might not be needed
249
+ # as if it is already a correlation matrix
250
+ # is it is like divding by 1
251
+ # vcv = self.pre_process_V(vcv)
252
+
253
+ R = self.get_R(vcv) if self.get_corr else vcv
254
+
255
+ return self._invR(R)
256
+
257
+ def set_R_inv(self, vcv):
258
+ """
259
+ Set correlation matrix from vcv matrix
260
+ and return its inverse
261
+
262
+ The general logic is as follows:
263
+
264
+ if copy_S and S == 0:
265
+ set_S
266
+
267
+ elif not copy_S and S == 0:
268
+ set_S (overwrite)
269
+
270
+ elif copy_S and S != 0:
271
+ do nothing
272
+
273
+ elif not copy_S and S != 0:
274
+ set_S (overwrite)
275
+
276
+ where S is Some matrix,
277
+ in this case the correlation matrix
278
+
279
+ """
280
+ if self.copy_R_inv and len(self.R_inv) != 0:
281
+ pass
282
+
283
+ else:
284
+ if isinstance(vcv, np.ndarray):
285
+ self.R_inv = self.get_R_inv(vcv)
286
+
287
+ else:
288
+ n,_ = self.X.shape
289
+ self.R_inv = np.eye(n)
290
+ self.L_inv = np.eye(n)
291
+
292
+ def set_K_train(self, X):
293
+ """
294
+ Set the kernel matrix
295
+
296
+ The general logic is as follows:
297
+
298
+ if copy_S and S == 0:
299
+ set_S
300
+
301
+ elif not copy_S and S == 0:
302
+ set_S (overwrite)
303
+
304
+ elif copy_S and S != 0:
305
+ do nothing
306
+
307
+ elif not copy_S and S != 0:
308
+ set_S (overwrite)
309
+
310
+ where S is Some matrix,
311
+ in this case the kernel matrix
312
+
313
+ Parameters
314
+ ----------
315
+ X : np.ndarray
316
+ input matrix
317
+ """
318
+
319
+ if X is None:
320
+ self.K_train = None
321
+ return
322
+
323
+ if self.copy_K and len(self.K_train) != 0:
324
+ pass
325
+
326
+ else:
327
+ self.K_train = np.asfortranarray( self.get_K(X, X), dtype=float)
328
+
329
+
330
+ def get_Du(self, f_curr):
331
+ """
332
+ Get mu vector (estimator) and W matrix with mu derivatives
333
+ with respect to f(x).
334
+
335
+
336
+ Wii = \\frac{\partial \mu}{\partial f(x)}
337
+ if \mu is the identity function, then Wii = 1
338
+
339
+
340
+ from the current function values.
341
+
342
+ The function is will depend on the model being used.
343
+
344
+ Parameters
345
+ ----------
346
+ f_curr : np.ndarray
347
+ current function values
348
+
349
+ Returns
350
+ -------
351
+ np.ndarray, np.ndarray
352
+ Du, u
353
+ """
354
+ return self.Du_mu_func(f_curr)
355
+
356
+ def get_Vu_sqrt_inv(self, f_curr):
357
+ """
358
+ get the square root of the inverse of
359
+ the variance of the estimator mu.
360
+
361
+ the function is model dependent
362
+
363
+ Parameters
364
+ ----------
365
+ f_curr : np.ndarray
366
+ current function values
367
+
368
+ Returns
369
+ -------
370
+ np.ndarray
371
+ square root of the inverse of the variance
372
+ """
373
+ # Vu = self.Vu_func(f_curr)
374
+ # Vu[Vu < self.epsilon] = self.epsilon
375
+ # return Vu**(-1/2)
376
+ return self.Vu_func(f_curr)**(-1/2)
377
+
378
+ def set_df(self, H):
379
+ """
380
+ residual degrees of freedom
381
+
382
+ ref: Elements of Statistical Learning, Hastie, Tibshirani, Friedman
383
+ pp. 233
384
+
385
+ Parameters
386
+ ----------
387
+ H : np.ndarray
388
+ Hessian matrix
389
+
390
+ lam : float
391
+ regularization parameter
392
+
393
+ """
394
+ n = H.shape[0]
395
+
396
+ theta = np.linalg.eigvalsh(H) # O(n^3)
397
+ theta = np.real(theta)
398
+ df_model = np.sum(theta / (theta + self.lam))
399
+ self.dof = np.clip(n - df_model, self.grad_tol, n)
400
+
401
+ def set_rdf(self, y, y_hat):
402
+ """
403
+ residual degrees of freedom
404
+ using the covariance method
405
+
406
+ ref: Elements of Statistical Learning, Hastie, Tibshirani, Friedman
407
+ pp. 77, 233
408
+ """
409
+ # effective number of parameters
410
+ df_model = np.cov(y_hat, y, ddof=1)[0, 1] / np.var(y - y_hat, ddof=1)
411
+ n = len(y_hat)
412
+ self.dof = np.clip(n - df_model, self.grad_tol, n)
413
+
414
+ def set_phi(self, e):
415
+ # pearson residual
416
+ err = e * self.Vu_func(self.f_curr)**(-1/2)
417
+ self.phi = np.dot(err, err)/self.dof # O(n)
418
+
419
+ def get_V_Inv(self, f_curr):
420
+ """
421
+ Get the square root of the inverse of the V matrix
422
+ V = A^1/2 R A^1/2 \phi
423
+ A = variance of \mu
424
+ R = correlation matrix
425
+ phi = dispersion parameter
426
+
427
+ V^(-1) = ( A^1/2 R A^1/2 \phi)^-1
428
+ = A^-1/2 R^-1 A^-1/2 \phi^-1
429
+
430
+ O(n^2) complexity
431
+
432
+ Parameters
433
+ ----------
434
+ f_curr : np.ndarray
435
+ current function values
436
+ Returns
437
+ -------
438
+ np.ndarray
439
+ V^(-1/2)
440
+ """
441
+ A_sqInv = self.Vu_func(f_curr)**(-1/2)
442
+ V_Inv = (A_sqInv[:, None] * self.R_inv) * A_sqInv
443
+ V_Inv *= (self.phi)**(-1) # O(n^2)
444
+ return V_Inv
445
+
446
+ def krgee_fit(self, K, y):
447
+
448
+ n = y.shape[0]
449
+ p = K.shape[1] if K is not None else 0
450
+ K_dtype = K.dtype if K is not None else float
451
+
452
+ ones = np.ones(n, dtype=K_dtype)
453
+
454
+ if p > 0:
455
+ K_0 = np.zeros((n + 1, p + 1), dtype=K_dtype)
456
+ K_0[1:, 1:] = K
457
+ lK_0 = self.lam * K_0
458
+ lK = self.lam * K
459
+
460
+ else:
461
+ lK_0 = np.array([[0]], dtype=K_dtype)
462
+ lK = np.array([[0]], dtype=K_dtype)
463
+
464
+
465
+ # concatenate ones to K for intercept
466
+ K_a = np.column_stack((ones, K)) if p > 0 else ones.reshape(-1, 1)
467
+ K_a = np.asfortranarray(K_a, dtype=K_dtype)
468
+
469
+ if not self.warm_start or np.any(np.isnan(self.alpha)):
470
+ # np.random.seed(self.seed)
471
+ # self.alpha = np.random.normal(size=p + 1)*np.sqrt(1./n) # alpha weights
472
+ self.alpha = np.zeros(p + 1) # alpha weights
473
+ self.alpha = np.ascontiguousarray(self.alpha, dtype=K_dtype)
474
+ self.phi = 1
475
+
476
+ assert len(self.alpha) > 0, "The model needs to be fitted first"
477
+
478
+ f_curr = K_a @ self.alpha
479
+ conv = False
480
+ for i in range(self.max_iter):
481
+ # get derivatives of \mu and \mu
482
+
483
+ Du, u = self.get_Du(f_curr)
484
+ V_Inv = self.get_V_Inv(f_curr) # O(n^2)
485
+
486
+ DuDa = Du[:,None] * K_a # O(n^2)
487
+ DuDaT_V = np.matmul(DuDa.T, V_Inv) # O(n^3)
488
+ DuDaT_V_DuDa = np.matmul(DuDaT_V, DuDa) # O(n^3)
489
+
490
+ H = ((1/n)*(DuDaT_V_DuDa)) + lK_0 # O(n^2)
491
+ # if np.linalg.cond(H) > 1/sys.float_info.epsilon:
492
+ # H += np.eye(H.shape[0]) * self.hessian_tol
493
+
494
+ e = y - u # O(n)
495
+
496
+ # gradient
497
+ G = ((-1/n)*(DuDaT_V @ e)) + lK_0 @ self.alpha # O(n^2)
498
+
499
+ G_norm = np.linalg.norm(G, ord=2)
500
+ # gradient clipping, O(n)
501
+ # to avoid exploding gradients
502
+ # if G_norm >= self.grad_clip:
503
+ # G *= (self.grad_clip / G_norm)
504
+ if G_norm/max(1, np.linalg.norm(self.alpha, ord=2)) <= 1e-8:
505
+ if self.verbose:
506
+ print("Gradient norm is below the clipping threshold")
507
+ conv = True
508
+ break
509
+
510
+ self.alpha -= np.linalg.solve( H, G ) # O(n^3)
511
+
512
+ # new optimal function val
513
+ f_new = K_a @ self.alpha
514
+ if self.check_convergence(f_new, f_curr) and i > 1:
515
+ if self.verbose:
516
+ print("Converged at update = %i" %i)
517
+ conv = True
518
+ break
519
+
520
+ if self._verbose:
521
+ print("Iteration: ", i)
522
+ print('phi: ', self.phi)
523
+ print('b: ', self.b)
524
+ print('fix_phi: ', self.fix_phi)
525
+ print()
526
+
527
+ if not self.fix_phi:
528
+ if p > 0:
529
+ # hessian of the penalized coefficients (alpha without intercept)
530
+ HA = ((1/n)*(DuDaT_V_DuDa[1:, 1:])) + lK # O(n^2)
531
+ # UHA = (DuDaT_V_DuDa[1:, 1:] + lK) # O(n^2)
532
+ self.set_df(HA) # O(n^3)
533
+ else:
534
+ self.dof = n - 1
535
+
536
+ self.set_phi(e)
537
+
538
+ f_curr = f_new
539
+
540
+ self.f_curr = f_curr
541
+ self.y = y.copy()
542
+
543
+ if not conv:
544
+ print("No convergence after %i iterations" % self.max_iter)
545
+ print("gamma, phi, lam:")
546
+ print(self.gamma, self.phi, self.lam)
547
+
548
+ def fit(self, X, y, vcv = None):
549
+ """
550
+ Fit the model
551
+ """
552
+ self.X = X # store the training data for prediction
553
+ self.set_R_inv(vcv) # it returns identity matrix if vcv is None
554
+ self.set_K_train(self.X)
555
+
556
+ self.krgee_fit(self.K_train, y)
557
+
558
+ def check_convergence(self, f_new, f_curr):
559
+ """
560
+ Check convergence of the algorithm
561
+
562
+ f_new: updated function values
563
+ f_curr: current function values
564
+ tol: tolerance level
565
+
566
+ return: True if converged, False otherwise
567
+ """
568
+ max_updt = np.linalg.norm(f_new - f_curr, ord = np.inf)
569
+ w_max = np.linalg.norm(f_new, ord = np.inf)
570
+
571
+ if w_max == 0 or max_updt/w_max < self.tol:
572
+ return True
573
+
574
+ return False
575
+
576
+ def f_predict(self, X_test):
577
+ assert len(self.alpha) > 0, "The model needs to be fitted first"
578
+ assert X_test is not None, "X_test should not be None"
579
+
580
+ if self.K_train is None:
581
+ f = self.alpha[0] * np.ones(X_test.shape[0])
582
+
583
+ else:
584
+ K_test = self.get_K(X_test, self.X, trainxtest = True)
585
+ b, alpha = self.alpha[0], self.alpha[1:]
586
+ f = K_test @ alpha + b
587
+
588
+ return f
589
+
590
+ def predict(self, X_test, R_12 = None):
591
+ """
592
+ Predict the response for identity link function
593
+
594
+ Parameters
595
+ ----------
596
+ X_test : np.ndarray
597
+ Test data
598
+
599
+ Returns
600
+ -------
601
+ np.ndarray
602
+ Predicted response
603
+ """
604
+
605
+ f = self.f_predict(X_test)
606
+
607
+ if R_12 is not None:
608
+ f = self.conditional_latent(f, self.f_curr, self.y, R_12)
609
+
610
+ return f
611
+
612
+ def conditional_latent(self, f_z, f_n, y, R_12):
613
+ f_z += R_12 @ self.R_inv @ (y - f_n)
614
+ return f_z
615
+
616
+ def score(self, X_test, y_test, metric='rmse', R_12 = None):
617
+ """
618
+ The parameters already account for error correlation
619
+ and the errors are assumed to be independent
620
+
621
+ Parameters
622
+ ----------
623
+ X_test : np.ndarray
624
+ test data
625
+
626
+ y_test : np.ndarray
627
+ test target
628
+
629
+ metric : str, optional
630
+ metric to use, by default 'rmse' and 'r2' for continuous
631
+ targets and 'loss' for binary targets.
632
+
633
+ R_12 : np.ndarray, optional
634
+ Cross-correlation matrix between training and test data, by default None
635
+
636
+ Returns
637
+ -------
638
+ float
639
+ score
640
+ """
641
+ assert len(self.alpha) > 0, "The model needs to be fitted first"
642
+ assert metric in ['rmse', 'loss', 'r2', 'log_loss'], "Invalid metric"
643
+
644
+ # predict function is defined
645
+ # in the subclass
646
+ y_pred = self.predict(X_test, R_12)
647
+
648
+ if metric == 'rmse':
649
+ return np.sqrt(np.mean((y_pred - y_test) ** 2))
650
+
651
+ else:
652
+ u = ((y_test - y_pred) ** 2).sum()
653
+ v = ((y_test - y_test.mean()) ** 2).sum()
654
+
655
+ return 1 - (u / v)
656
+
657
+ class Logistic(KRGEE):
658
+ """
659
+ Kernel Ridge Generalized Estimating Equations for 1 cluster.
660
+ Using 1 cluster is what phylogenetic regresion deals with.
661
+
662
+ This class is a subclass of KRGEE and implements the logistic function
663
+ for the estimation of the mu vector and diagonal W matrix (derivative of mu).
664
+ """
665
+ def __init__(self,
666
+ kernel='rbf',
667
+ max_iter=300,
668
+ lam=None,
669
+ warm_start = False,
670
+ tol = 0.001,
671
+ seed = 12038,
672
+ verbose = False,
673
+ get_corr = True,
674
+ copy_R_inv = False,
675
+ copy_K_train = False,
676
+ fix_phi = False,
677
+ add_extraK = False
678
+ ) -> None:
679
+
680
+ super().__init__(kernel, max_iter, lam, warm_start, tol, seed,
681
+ verbose, get_corr, copy_R_inv, copy_K_train, fix_phi,
682
+ add_extraK=add_extraK)
683
+
684
+ self.grad_tol = 1e-15
685
+ self.hessian_tol = 1e-12
686
+
687
+ self.Du_mu_func = self.get_Du_mu_internal
688
+ self.Vu_func = self.get_Vu_func
689
+ self.Du = np.array([])
690
+
691
+ def mu(self, f):
692
+
693
+ f_tresh = 30
694
+ f = np.clip(f, -f_tresh, f_tresh)
695
+ mu = 1/(1 + np.exp(-f))
696
+ return mu
697
+
698
+ def get_Du_mu_internal(self, x):
699
+ """
700
+ Get mu vector (estimator) and D_u matrix with mu derivatives
701
+ from the current function values using the logistic function
702
+ """
703
+ p1_out = self.mu(x)
704
+
705
+ # p1 * p0, derivative of mu estimator
706
+ self.Du = p1_out * (1 - p1_out)
707
+ self.Du = np.clip(self.Du, self.grad_tol, 1 - self.grad_tol)
708
+ return self.Du, p1_out
709
+
710
+ def get_Vu_func(self, f_curr):
711
+ """
712
+ Get the variance of the estimator mu
713
+ """
714
+ return self.Du
715
+
716
+ def conditional_latent(self, f_z, f_n, y, R_12):
717
+ y_c = np.clip(y, self.hessian_tol, 1 - self.hessian_tol)
718
+ # y_c = np.clip(y, 1e-3, 1 - 1e-3)
719
+ sig_inv_y = np.log(y_c/(1 - y_c))
720
+ e = sig_inv_y - f_n
721
+ bias = R_12 @ self.R_inv @ e
722
+ # print(f"e: {e[:5]}")
723
+ # print(f"bias: {bias[:5]}")
724
+ f_z += bias
725
+ return f_z
726
+
727
+ def predict_proba(self, X_test, R_12=None):
728
+ f = self.f_predict(X_test)
729
+
730
+ if R_12 is not None:
731
+ f = self.conditional_latent(f, self.f_curr, self.y, R_12)
732
+
733
+ return self.mu(f)
734
+
735
+ def predict(self, X_test, R_12=None):
736
+ """
737
+ Predict the target values for the test data
738
+ using the fitted model.
739
+
740
+ Parameters
741
+ ----------
742
+ X_test : np.ndarray
743
+ test data
744
+
745
+ R_12 : np.ndarray, optional
746
+ correlation matrix, by default None
747
+
748
+ Returns
749
+ -------
750
+ np.ndarray
751
+ predicted target values
752
+ """
753
+ mu = self.predict_proba(X_test, R_12)
754
+ return (mu >= 0.5).astype(float)
755
+
756
+ def score(self, X_test, y_test, R_12 = None, metric = 'log_loss', eps = 1e-3):
757
+ """
758
+ Score the model using the specified metric.
759
+ Parameters
760
+ ----------
761
+ X_test : np.ndarray
762
+ test data
763
+ y_test : np.ndarray
764
+ true labels
765
+ R_12 : np.ndarray, optional
766
+ correlation matrix, by default None
767
+ metric : str, optional
768
+ evaluation metric, by default 'log_loss'
769
+ eps : float, optional
770
+ small value to avoid division by zero, by default 1e-3
771
+
772
+ Returns
773
+ -------
774
+ float
775
+ evaluation score
776
+ """
777
+
778
+ if metric == 'loss':
779
+ y_pred = self.predict(X_test, R_12)
780
+ return np.mean(y_pred != y_test)
781
+
782
+ elif metric == 'log_loss':
783
+ y_pred_prob = self.predict_proba(X_test, R_12)
784
+ y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)
785
+
786
+ log_losses = -( y_test*np.log(y_pred_prob) + (1 - y_test)*np.log(1 - y_pred_prob) )
787
+ return np.mean( log_losses )
788
+
789
+ else:
790
+ raise ValueError("Invalid metric. Use 'loss' or 'log_loss'.")
791
+
792
+
793
+ class Poisson(KRGEE):
794
+ def __init__(self, kernel='rbf', max_iter=300, lam=0.1, warm_start=False, tol=0.001, seed=12038, verbose=False):
795
+ super().__init__(kernel, max_iter, lam, warm_start, tol, seed, verbose)
796
+
797
+ self.Du_mu_func = self.get_Du_mu_internal
798
+ self.Vu_func = self.get_Vu_func
799
+ self.Du = np.array([])
800
+
801
+ def mu(self, f):
802
+ return np.exp(f)
803
+
804
+ def get_Du_mu_internal(self, x):
805
+ """
806
+ Get mu vector (estimator) and D_u matrix with mu derivatives
807
+ from the current function values using the logistic function
808
+ """
809
+ mu = self.mu(x)
810
+ self.Du = mu
811
+ return self.Du, mu
812
+
813
+ def get_Vu_func(self, f_curr):
814
+ """
815
+ Get the variance of the estimator mu
816
+ """
817
+ return self.Du
818
+
819
+ def predict(self, X_test):
820
+ f = self.f_predict(X_test)
821
+ return self.mu(f)
822
+
823
+
824
+ class Gamma(KRGEE):
825
+ def __init__(self, kernel='rbf', max_iter=300, lam=0.1,
826
+ warm_start=False, tol=0.001, seed=12038,
827
+ verbose=False, dispersion=1):
828
+ super().__init__(kernel, max_iter, lam, warm_start, tol, seed, verbose)
829
+
830
+ self.Du_mu_func = self.get_Du_mu_internal
831
+ self.Vu_func = self.get_Vu_func
832
+ self.Du = np.array([])
833
+ self.dispersion = dispersion
834
+
835
+ def mu(self, f):
836
+ return -np.exp(-f)
837
+
838
+ def get_Du_mu_internal(self, x):
839
+ """
840
+ Get mu vector (estimator) and D_u matrix with mu derivatives
841
+ from the current function values using the logistic function
842
+ """
843
+ mu = self.mu(x)
844
+ self.Du = mu**2
845
+ return self.Du, mu
846
+
847
+ def get_Vu_func(self, f_curr):
848
+ """
849
+ Get the variance of the estimator mu
850
+ """
851
+ return self.Du*self.dispersion
852
+
853
+ def predict(self, X_test):
854
+ f = self.f_predict(X_test)
855
+ return self.mu(f)
856
+
857
+
858
+
859
+ # region: old code
860
+
861
+
862
+ # def get_DFP(spps_train, vcv):
863
+ # """
864
+ # Get degrees of freedom sensu Paradis
865
+ # Params
866
+ # ------
867
+ # tree: nodes, root
868
+ # T_spps_i: list of species index in nodes
869
+ # vcv: vcv of the species
870
+ # """
871
+
872
+ # n1 = spps_train[0]
873
+ # # O(n), find the root
874
+ # while n1.ancestor:
875
+ # n1 = n1.ancestor
876
+ # root = n1
877
+
878
+ # vn = set()
879
+ # bls = 0
880
+ # for n in spps_train:
881
+ # bls += n.branch_length
882
+ # tmp_anc = n.ancestor
883
+ # while (tmp_anc not in vn) and (tmp_anc != root):
884
+ # vn.add(tmp_anc)
885
+ # tmp_anc = tmp_anc.ancestor
886
+
887
+ # for i in vn:
888
+ # bls += i.branch_length
889
+
890
+ # return (bls * vcv.shape[0])/np.sum(np.diag(vcv))
891
+
892
+
893
+ # def get_V_sqInv(self, f_curr):
894
+ # """
895
+ # Get the square root of the inverse of the V matrix
896
+ # V = A^1/2 R A^1/2 \phi
897
+ # A = variance of \mu
898
+ # R = correlation matrix
899
+ # phi = dispersion parameter
900
+
901
+ # V^(-1/2) = ( A^1/2 R A^1/2 \phi)^-1/2
902
+ # = A^-1/4 R^-1/2 A^-1/4 \phi^-1/2
903
+
904
+ # O(n^2) complexity
905
+
906
+ # Parameters
907
+ # ----------
908
+ # f_curr : np.ndarray
909
+ # current function values
910
+ # Returns
911
+ # -------
912
+ # np.ndarray
913
+ # V^(-1/2)
914
+ # """
915
+ # A_qInv = self.Vu_func(f_curr)**(-1/4)
916
+ # V_sqInv = (A_qInv[:, None] * self.L_inv) * A_qInv
917
+ # V_sqInv *= (self.phi)**(-1/2) # O(n^2)
918
+ # return V_sqInv
919
+
920
+ #endregion