phylokrr-dev 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- phylokrr_dev/__init__.py +0 -0
- phylokrr_dev/alpha_beta_weighting.py +252 -0
- phylokrr_dev/block_cross_validation.py +256 -0
- phylokrr_dev/phyloKPQL.py +920 -0
- phylokrr_dev/utils.py +23 -0
- phylokrr_dev-0.2.0.dist-info/METADATA +9 -0
- phylokrr_dev-0.2.0.dist-info/RECORD +9 -0
- phylokrr_dev-0.2.0.dist-info/WHEEL +5 -0
- phylokrr_dev-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,920 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
from phylokrr.kernels import linear_kernel
|
|
7
|
+
# from scipy import linalg as la
|
|
8
|
+
from numpy import linalg as la
|
|
9
|
+
|
|
10
|
+
def distance_matrix(a, b):
|
|
11
|
+
"""
|
|
12
|
+
l2 norm squared matrix
|
|
13
|
+
"""
|
|
14
|
+
return np.linalg.norm(a[:, None, :] - b[None, :, :], axis=-1)**2
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def RBF_kernel(a, b, gamma, extra_K = None):
|
|
18
|
+
"""
|
|
19
|
+
Radial Basis Function
|
|
20
|
+
"""
|
|
21
|
+
# data sim
|
|
22
|
+
tmp_rbf = -gamma * distance_matrix(a, b)
|
|
23
|
+
|
|
24
|
+
# if extra_K is not None:
|
|
25
|
+
# tmp_rbf += extra_K
|
|
26
|
+
|
|
27
|
+
np.exp(tmp_rbf, tmp_rbf) # RBF kernel. Inplace exponentiation
|
|
28
|
+
|
|
29
|
+
if extra_K is not None:
|
|
30
|
+
tmp_rbf += extra_K
|
|
31
|
+
|
|
32
|
+
return tmp_rbf
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class KRGEE:
|
|
36
|
+
"""
|
|
37
|
+
Kernel Ridge Generalized Estimating Equations for 1 cluster.
|
|
38
|
+
Using 1 cluster is what phylogenetic regresion deals with.
|
|
39
|
+
"""
|
|
40
|
+
def __init__(self,
|
|
41
|
+
kernel='rbf',
|
|
42
|
+
max_iter=300,
|
|
43
|
+
lam=0.1,
|
|
44
|
+
warm_start = False,
|
|
45
|
+
tol = 0.001,
|
|
46
|
+
seed = 12038,
|
|
47
|
+
verbose = False,
|
|
48
|
+
get_corr = True,
|
|
49
|
+
copy_R_inv = False,
|
|
50
|
+
copy_K_train = False,
|
|
51
|
+
fix_phi = False,
|
|
52
|
+
add_extraK= False
|
|
53
|
+
) -> None:
|
|
54
|
+
|
|
55
|
+
self.add_extraK = add_extraK
|
|
56
|
+
|
|
57
|
+
self.Rnn = None
|
|
58
|
+
self.Rzn = None
|
|
59
|
+
|
|
60
|
+
self.alpha = np.array([])
|
|
61
|
+
self.b = 0
|
|
62
|
+
|
|
63
|
+
self.verbose = verbose
|
|
64
|
+
self.warm_start = warm_start
|
|
65
|
+
|
|
66
|
+
self.kernel = kernel
|
|
67
|
+
self.lam = lam
|
|
68
|
+
|
|
69
|
+
self.gamma = 0.1
|
|
70
|
+
self.c = 0.1
|
|
71
|
+
|
|
72
|
+
self.max_iter = max_iter
|
|
73
|
+
self.tol = tol
|
|
74
|
+
self.seed = seed
|
|
75
|
+
|
|
76
|
+
# speicify if the correlation matrix is needed
|
|
77
|
+
# it might be the case that the input matrix is already
|
|
78
|
+
# in form of a correlation matrix
|
|
79
|
+
self.get_corr = get_corr
|
|
80
|
+
|
|
81
|
+
self.Du_mu_func = lambda x: (np.ones(x.shape[0]), x)
|
|
82
|
+
self.Vu_func = lambda x: np.ones( x.shape[0] )
|
|
83
|
+
|
|
84
|
+
self.X = np.array([])
|
|
85
|
+
self.y = np.array([])
|
|
86
|
+
|
|
87
|
+
self.copy_R_inv = copy_R_inv
|
|
88
|
+
self.R_inv = np.array([])
|
|
89
|
+
|
|
90
|
+
self.copy_K = copy_K_train
|
|
91
|
+
self.K_train = np.array([])
|
|
92
|
+
self.K_col_mean = np.array([])
|
|
93
|
+
self.K_all_mean = np.array([])
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
self.phi = 1
|
|
97
|
+
self.fix_phi = fix_phi
|
|
98
|
+
self.r = np.array([])
|
|
99
|
+
self.rho = np.array([])
|
|
100
|
+
self.dof = 1 # degrees of freedom
|
|
101
|
+
self.L_inv = np.array([]) # R^-1/2
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
self.f_curr = np.array([]) # current function values
|
|
105
|
+
|
|
106
|
+
self._verbose = False
|
|
107
|
+
self._update_rho = False
|
|
108
|
+
self.grad_tol = 1e-10
|
|
109
|
+
self.hessian_tol = 1e-10
|
|
110
|
+
self.grad_clip = 2 # gradient clipping
|
|
111
|
+
|
|
112
|
+
def __repr__(self):
|
|
113
|
+
return f"KRGEE(kernel={self.kernel}, max_iter={self.max_iter}, lam={self.lam}, warm_start={self.warm_start}, tol={self.tol}, seed={self.seed})"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def center_kernel(self, K, trainxtest = False):
|
|
117
|
+
"""
|
|
118
|
+
Center the kernel matrix
|
|
119
|
+
|
|
120
|
+
ref: Kernel methods in computational biology,
|
|
121
|
+
Scholkopf, Tsuda, Vert. pp. 50\\
|
|
122
|
+
\\tilde K = (I - e/n)( K )(I - e/n)
|
|
123
|
+
|
|
124
|
+
e is singular matrix of ones\\
|
|
125
|
+
I is identity matrix\\
|
|
126
|
+
n is the number of samples\\
|
|
127
|
+
K is the kernel matrix of size n x p
|
|
128
|
+
|
|
129
|
+
Only works for square matrices
|
|
130
|
+
|
|
131
|
+
Here is a general attempt for non-square matrices
|
|
132
|
+
"""
|
|
133
|
+
K_row_mean = np.mean(K, axis = 1)
|
|
134
|
+
|
|
135
|
+
if not trainxtest:
|
|
136
|
+
self.K_col_mean = np.mean(K, axis = 0)
|
|
137
|
+
self.K_all_mean = np.mean(K)
|
|
138
|
+
|
|
139
|
+
K -= self.K_col_mean
|
|
140
|
+
K -= K_row_mean.reshape(-1,1)
|
|
141
|
+
K += self.K_all_mean
|
|
142
|
+
return K
|
|
143
|
+
|
|
144
|
+
def set_params(self, **params):
|
|
145
|
+
"""
|
|
146
|
+
Set parameters for the ISLEPath instance.
|
|
147
|
+
"""
|
|
148
|
+
for key, value in params.items():
|
|
149
|
+
if hasattr(self, key):
|
|
150
|
+
setattr(self, key, value)
|
|
151
|
+
else:
|
|
152
|
+
raise ValueError(f"Parameter {key} is not recognized.")
|
|
153
|
+
|
|
154
|
+
def get_params(self):
|
|
155
|
+
return {'max_iter': self.max_iter,
|
|
156
|
+
'lam': self.lam,
|
|
157
|
+
'intercept': self.b,
|
|
158
|
+
'tol': self.tol}
|
|
159
|
+
|
|
160
|
+
def get_K(self, A, B, trainxtest=False):
|
|
161
|
+
"""
|
|
162
|
+
Get the kernel matrix
|
|
163
|
+
depending on the kernel type specified
|
|
164
|
+
a parameter is passed to the kernel function
|
|
165
|
+
Otherwise, the input matrix A is returned
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
A : np.ndarray
|
|
170
|
+
input matrix
|
|
171
|
+
B : np.ndarray
|
|
172
|
+
input matrix
|
|
173
|
+
|
|
174
|
+
Returns
|
|
175
|
+
-------
|
|
176
|
+
np.ndarray
|
|
177
|
+
kernel
|
|
178
|
+
"""
|
|
179
|
+
if self.kernel == 'rbf':
|
|
180
|
+
|
|
181
|
+
if self.add_extraK:
|
|
182
|
+
R = self.Rzn if trainxtest else self.Rnn
|
|
183
|
+
else:
|
|
184
|
+
R = None
|
|
185
|
+
|
|
186
|
+
K = RBF_kernel(A, B, self.gamma, extra_K=R)
|
|
187
|
+
|
|
188
|
+
elif self.kernel == 'linear':
|
|
189
|
+
K = linear_kernel(A, B, self.c)
|
|
190
|
+
|
|
191
|
+
else:
|
|
192
|
+
K = A
|
|
193
|
+
|
|
194
|
+
return self.center_kernel(K, trainxtest)
|
|
195
|
+
|
|
196
|
+
def pre_process_V(self, V):
|
|
197
|
+
"""
|
|
198
|
+
Pre-process the V matrix
|
|
199
|
+
1. Normalize the matrix
|
|
200
|
+
2. Get the n-th root of the determinant
|
|
201
|
+
3. Normalize the matrix again
|
|
202
|
+
|
|
203
|
+
it improves the numerical stability of the algorithm
|
|
204
|
+
|
|
205
|
+
refs:
|
|
206
|
+
|
|
207
|
+
https://daijiang.github.io/phyr/reference/pglmm_compare.html
|
|
208
|
+
https://ives.labs.wisc.edu/pdf/correlateddata.pdf (pp. 94)
|
|
209
|
+
|
|
210
|
+
"""
|
|
211
|
+
V = np.asfortranarray(V, dtype=V.dtype)
|
|
212
|
+
|
|
213
|
+
n, _ = V.shape
|
|
214
|
+
V = V/np.max(V)
|
|
215
|
+
detV_n = np.linalg.det(V)**(1/n)
|
|
216
|
+
V = V/detV_n if detV_n != 0 else V
|
|
217
|
+
|
|
218
|
+
return V
|
|
219
|
+
|
|
220
|
+
def get_R(self, vcv):
|
|
221
|
+
"""
|
|
222
|
+
Get correlation matrix from vcv matrix
|
|
223
|
+
"""
|
|
224
|
+
kr = 1/np.sqrt(np.diag(vcv))
|
|
225
|
+
R = (kr[:, None] * vcv) * kr
|
|
226
|
+
return R
|
|
227
|
+
|
|
228
|
+
def _invR(self, R):
|
|
229
|
+
"""
|
|
230
|
+
Inverse of the correlation matrix
|
|
231
|
+
"""
|
|
232
|
+
# L = np.linalg.cholesky(R + np.eye(R.shape[0]) * self.hessian_tol)
|
|
233
|
+
# self.L_inv = np.linalg.inv(L)
|
|
234
|
+
|
|
235
|
+
# return self.L_inv.T @ self.L_inv
|
|
236
|
+
|
|
237
|
+
E,Q = la.eigh(R + np.eye(R.shape[0]) * self.hessian_tol)
|
|
238
|
+
# E,Q = la.eigh(R)
|
|
239
|
+
|
|
240
|
+
self.L_inv = Q @ np.diag(E**(-1/2)) @ Q.T
|
|
241
|
+
return Q @ np.diag(E**(-1)) @ Q.T
|
|
242
|
+
|
|
243
|
+
def get_R_inv(self, vcv):
|
|
244
|
+
"""
|
|
245
|
+
Get correlation matrix from vcv matrix
|
|
246
|
+
and return its inverse
|
|
247
|
+
"""
|
|
248
|
+
# TODO: get_corr might not be needed
|
|
249
|
+
# as if it is already a correlation matrix
|
|
250
|
+
# is it is like divding by 1
|
|
251
|
+
# vcv = self.pre_process_V(vcv)
|
|
252
|
+
|
|
253
|
+
R = self.get_R(vcv) if self.get_corr else vcv
|
|
254
|
+
|
|
255
|
+
return self._invR(R)
|
|
256
|
+
|
|
257
|
+
def set_R_inv(self, vcv):
|
|
258
|
+
"""
|
|
259
|
+
Set correlation matrix from vcv matrix
|
|
260
|
+
and return its inverse
|
|
261
|
+
|
|
262
|
+
The general logic is as follows:
|
|
263
|
+
|
|
264
|
+
if copy_S and S == 0:
|
|
265
|
+
set_S
|
|
266
|
+
|
|
267
|
+
elif not copy_S and S == 0:
|
|
268
|
+
set_S (overwrite)
|
|
269
|
+
|
|
270
|
+
elif copy_S and S != 0:
|
|
271
|
+
do nothing
|
|
272
|
+
|
|
273
|
+
elif not copy_S and S != 0:
|
|
274
|
+
set_S (overwrite)
|
|
275
|
+
|
|
276
|
+
where S is Some matrix,
|
|
277
|
+
in this case the correlation matrix
|
|
278
|
+
|
|
279
|
+
"""
|
|
280
|
+
if self.copy_R_inv and len(self.R_inv) != 0:
|
|
281
|
+
pass
|
|
282
|
+
|
|
283
|
+
else:
|
|
284
|
+
if isinstance(vcv, np.ndarray):
|
|
285
|
+
self.R_inv = self.get_R_inv(vcv)
|
|
286
|
+
|
|
287
|
+
else:
|
|
288
|
+
n,_ = self.X.shape
|
|
289
|
+
self.R_inv = np.eye(n)
|
|
290
|
+
self.L_inv = np.eye(n)
|
|
291
|
+
|
|
292
|
+
def set_K_train(self, X):
|
|
293
|
+
"""
|
|
294
|
+
Set the kernel matrix
|
|
295
|
+
|
|
296
|
+
The general logic is as follows:
|
|
297
|
+
|
|
298
|
+
if copy_S and S == 0:
|
|
299
|
+
set_S
|
|
300
|
+
|
|
301
|
+
elif not copy_S and S == 0:
|
|
302
|
+
set_S (overwrite)
|
|
303
|
+
|
|
304
|
+
elif copy_S and S != 0:
|
|
305
|
+
do nothing
|
|
306
|
+
|
|
307
|
+
elif not copy_S and S != 0:
|
|
308
|
+
set_S (overwrite)
|
|
309
|
+
|
|
310
|
+
where S is Some matrix,
|
|
311
|
+
in this case the kernel matrix
|
|
312
|
+
|
|
313
|
+
Parameters
|
|
314
|
+
----------
|
|
315
|
+
X : np.ndarray
|
|
316
|
+
input matrix
|
|
317
|
+
"""
|
|
318
|
+
|
|
319
|
+
if X is None:
|
|
320
|
+
self.K_train = None
|
|
321
|
+
return
|
|
322
|
+
|
|
323
|
+
if self.copy_K and len(self.K_train) != 0:
|
|
324
|
+
pass
|
|
325
|
+
|
|
326
|
+
else:
|
|
327
|
+
self.K_train = np.asfortranarray( self.get_K(X, X), dtype=float)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def get_Du(self, f_curr):
|
|
331
|
+
"""
|
|
332
|
+
Get mu vector (estimator) and W matrix with mu derivatives
|
|
333
|
+
with respect to f(x).
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
Wii = \\frac{\partial \mu}{\partial f(x)}
|
|
337
|
+
if \mu is the identity function, then Wii = 1
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
from the current function values.
|
|
341
|
+
|
|
342
|
+
The function is will depend on the model being used.
|
|
343
|
+
|
|
344
|
+
Parameters
|
|
345
|
+
----------
|
|
346
|
+
f_curr : np.ndarray
|
|
347
|
+
current function values
|
|
348
|
+
|
|
349
|
+
Returns
|
|
350
|
+
-------
|
|
351
|
+
np.ndarray, np.ndarray
|
|
352
|
+
Du, u
|
|
353
|
+
"""
|
|
354
|
+
return self.Du_mu_func(f_curr)
|
|
355
|
+
|
|
356
|
+
def get_Vu_sqrt_inv(self, f_curr):
|
|
357
|
+
"""
|
|
358
|
+
get the square root of the inverse of
|
|
359
|
+
the variance of the estimator mu.
|
|
360
|
+
|
|
361
|
+
the function is model dependent
|
|
362
|
+
|
|
363
|
+
Parameters
|
|
364
|
+
----------
|
|
365
|
+
f_curr : np.ndarray
|
|
366
|
+
current function values
|
|
367
|
+
|
|
368
|
+
Returns
|
|
369
|
+
-------
|
|
370
|
+
np.ndarray
|
|
371
|
+
square root of the inverse of the variance
|
|
372
|
+
"""
|
|
373
|
+
# Vu = self.Vu_func(f_curr)
|
|
374
|
+
# Vu[Vu < self.epsilon] = self.epsilon
|
|
375
|
+
# return Vu**(-1/2)
|
|
376
|
+
return self.Vu_func(f_curr)**(-1/2)
|
|
377
|
+
|
|
378
|
+
def set_df(self, H):
|
|
379
|
+
"""
|
|
380
|
+
residual degrees of freedom
|
|
381
|
+
|
|
382
|
+
ref: Elements of Statistical Learning, Hastie, Tibshirani, Friedman
|
|
383
|
+
pp. 233
|
|
384
|
+
|
|
385
|
+
Parameters
|
|
386
|
+
----------
|
|
387
|
+
H : np.ndarray
|
|
388
|
+
Hessian matrix
|
|
389
|
+
|
|
390
|
+
lam : float
|
|
391
|
+
regularization parameter
|
|
392
|
+
|
|
393
|
+
"""
|
|
394
|
+
n = H.shape[0]
|
|
395
|
+
|
|
396
|
+
theta = np.linalg.eigvalsh(H) # O(n^3)
|
|
397
|
+
theta = np.real(theta)
|
|
398
|
+
df_model = np.sum(theta / (theta + self.lam))
|
|
399
|
+
self.dof = np.clip(n - df_model, self.grad_tol, n)
|
|
400
|
+
|
|
401
|
+
def set_rdf(self, y, y_hat):
|
|
402
|
+
"""
|
|
403
|
+
residual degrees of freedom
|
|
404
|
+
using the covariance method
|
|
405
|
+
|
|
406
|
+
ref: Elements of Statistical Learning, Hastie, Tibshirani, Friedman
|
|
407
|
+
pp. 77, 233
|
|
408
|
+
"""
|
|
409
|
+
# effective number of parameters
|
|
410
|
+
df_model = np.cov(y_hat, y, ddof=1)[0, 1] / np.var(y - y_hat, ddof=1)
|
|
411
|
+
n = len(y_hat)
|
|
412
|
+
self.dof = np.clip(n - df_model, self.grad_tol, n)
|
|
413
|
+
|
|
414
|
+
def set_phi(self, e):
|
|
415
|
+
# pearson residual
|
|
416
|
+
err = e * self.Vu_func(self.f_curr)**(-1/2)
|
|
417
|
+
self.phi = np.dot(err, err)/self.dof # O(n)
|
|
418
|
+
|
|
419
|
+
def get_V_Inv(self, f_curr):
|
|
420
|
+
"""
|
|
421
|
+
Get the square root of the inverse of the V matrix
|
|
422
|
+
V = A^1/2 R A^1/2 \phi
|
|
423
|
+
A = variance of \mu
|
|
424
|
+
R = correlation matrix
|
|
425
|
+
phi = dispersion parameter
|
|
426
|
+
|
|
427
|
+
V^(-1) = ( A^1/2 R A^1/2 \phi)^-1
|
|
428
|
+
= A^-1/2 R^-1 A^-1/2 \phi^-1
|
|
429
|
+
|
|
430
|
+
O(n^2) complexity
|
|
431
|
+
|
|
432
|
+
Parameters
|
|
433
|
+
----------
|
|
434
|
+
f_curr : np.ndarray
|
|
435
|
+
current function values
|
|
436
|
+
Returns
|
|
437
|
+
-------
|
|
438
|
+
np.ndarray
|
|
439
|
+
V^(-1/2)
|
|
440
|
+
"""
|
|
441
|
+
A_sqInv = self.Vu_func(f_curr)**(-1/2)
|
|
442
|
+
V_Inv = (A_sqInv[:, None] * self.R_inv) * A_sqInv
|
|
443
|
+
V_Inv *= (self.phi)**(-1) # O(n^2)
|
|
444
|
+
return V_Inv
|
|
445
|
+
|
|
446
|
+
def krgee_fit(self, K, y):
|
|
447
|
+
|
|
448
|
+
n = y.shape[0]
|
|
449
|
+
p = K.shape[1] if K is not None else 0
|
|
450
|
+
K_dtype = K.dtype if K is not None else float
|
|
451
|
+
|
|
452
|
+
ones = np.ones(n, dtype=K_dtype)
|
|
453
|
+
|
|
454
|
+
if p > 0:
|
|
455
|
+
K_0 = np.zeros((n + 1, p + 1), dtype=K_dtype)
|
|
456
|
+
K_0[1:, 1:] = K
|
|
457
|
+
lK_0 = self.lam * K_0
|
|
458
|
+
lK = self.lam * K
|
|
459
|
+
|
|
460
|
+
else:
|
|
461
|
+
lK_0 = np.array([[0]], dtype=K_dtype)
|
|
462
|
+
lK = np.array([[0]], dtype=K_dtype)
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
# concatenate ones to K for intercept
|
|
466
|
+
K_a = np.column_stack((ones, K)) if p > 0 else ones.reshape(-1, 1)
|
|
467
|
+
K_a = np.asfortranarray(K_a, dtype=K_dtype)
|
|
468
|
+
|
|
469
|
+
if not self.warm_start or np.any(np.isnan(self.alpha)):
|
|
470
|
+
# np.random.seed(self.seed)
|
|
471
|
+
# self.alpha = np.random.normal(size=p + 1)*np.sqrt(1./n) # alpha weights
|
|
472
|
+
self.alpha = np.zeros(p + 1) # alpha weights
|
|
473
|
+
self.alpha = np.ascontiguousarray(self.alpha, dtype=K_dtype)
|
|
474
|
+
self.phi = 1
|
|
475
|
+
|
|
476
|
+
assert len(self.alpha) > 0, "The model needs to be fitted first"
|
|
477
|
+
|
|
478
|
+
f_curr = K_a @ self.alpha
|
|
479
|
+
conv = False
|
|
480
|
+
for i in range(self.max_iter):
|
|
481
|
+
# get derivatives of \mu and \mu
|
|
482
|
+
|
|
483
|
+
Du, u = self.get_Du(f_curr)
|
|
484
|
+
V_Inv = self.get_V_Inv(f_curr) # O(n^2)
|
|
485
|
+
|
|
486
|
+
DuDa = Du[:,None] * K_a # O(n^2)
|
|
487
|
+
DuDaT_V = np.matmul(DuDa.T, V_Inv) # O(n^3)
|
|
488
|
+
DuDaT_V_DuDa = np.matmul(DuDaT_V, DuDa) # O(n^3)
|
|
489
|
+
|
|
490
|
+
H = ((1/n)*(DuDaT_V_DuDa)) + lK_0 # O(n^2)
|
|
491
|
+
# if np.linalg.cond(H) > 1/sys.float_info.epsilon:
|
|
492
|
+
# H += np.eye(H.shape[0]) * self.hessian_tol
|
|
493
|
+
|
|
494
|
+
e = y - u # O(n)
|
|
495
|
+
|
|
496
|
+
# gradient
|
|
497
|
+
G = ((-1/n)*(DuDaT_V @ e)) + lK_0 @ self.alpha # O(n^2)
|
|
498
|
+
|
|
499
|
+
G_norm = np.linalg.norm(G, ord=2)
|
|
500
|
+
# gradient clipping, O(n)
|
|
501
|
+
# to avoid exploding gradients
|
|
502
|
+
# if G_norm >= self.grad_clip:
|
|
503
|
+
# G *= (self.grad_clip / G_norm)
|
|
504
|
+
if G_norm/max(1, np.linalg.norm(self.alpha, ord=2)) <= 1e-8:
|
|
505
|
+
if self.verbose:
|
|
506
|
+
print("Gradient norm is below the clipping threshold")
|
|
507
|
+
conv = True
|
|
508
|
+
break
|
|
509
|
+
|
|
510
|
+
self.alpha -= np.linalg.solve( H, G ) # O(n^3)
|
|
511
|
+
|
|
512
|
+
# new optimal function val
|
|
513
|
+
f_new = K_a @ self.alpha
|
|
514
|
+
if self.check_convergence(f_new, f_curr) and i > 1:
|
|
515
|
+
if self.verbose:
|
|
516
|
+
print("Converged at update = %i" %i)
|
|
517
|
+
conv = True
|
|
518
|
+
break
|
|
519
|
+
|
|
520
|
+
if self._verbose:
|
|
521
|
+
print("Iteration: ", i)
|
|
522
|
+
print('phi: ', self.phi)
|
|
523
|
+
print('b: ', self.b)
|
|
524
|
+
print('fix_phi: ', self.fix_phi)
|
|
525
|
+
print()
|
|
526
|
+
|
|
527
|
+
if not self.fix_phi:
|
|
528
|
+
if p > 0:
|
|
529
|
+
# hessian of the penalized coefficients (alpha without intercept)
|
|
530
|
+
HA = ((1/n)*(DuDaT_V_DuDa[1:, 1:])) + lK # O(n^2)
|
|
531
|
+
# UHA = (DuDaT_V_DuDa[1:, 1:] + lK) # O(n^2)
|
|
532
|
+
self.set_df(HA) # O(n^3)
|
|
533
|
+
else:
|
|
534
|
+
self.dof = n - 1
|
|
535
|
+
|
|
536
|
+
self.set_phi(e)
|
|
537
|
+
|
|
538
|
+
f_curr = f_new
|
|
539
|
+
|
|
540
|
+
self.f_curr = f_curr
|
|
541
|
+
self.y = y.copy()
|
|
542
|
+
|
|
543
|
+
if not conv:
|
|
544
|
+
print("No convergence after %i iterations" % self.max_iter)
|
|
545
|
+
print("gamma, phi, lam:")
|
|
546
|
+
print(self.gamma, self.phi, self.lam)
|
|
547
|
+
|
|
548
|
+
def fit(self, X, y, vcv = None):
|
|
549
|
+
"""
|
|
550
|
+
Fit the model
|
|
551
|
+
"""
|
|
552
|
+
self.X = X # store the training data for prediction
|
|
553
|
+
self.set_R_inv(vcv) # it returns identity matrix if vcv is None
|
|
554
|
+
self.set_K_train(self.X)
|
|
555
|
+
|
|
556
|
+
self.krgee_fit(self.K_train, y)
|
|
557
|
+
|
|
558
|
+
def check_convergence(self, f_new, f_curr):
|
|
559
|
+
"""
|
|
560
|
+
Check convergence of the algorithm
|
|
561
|
+
|
|
562
|
+
f_new: updated function values
|
|
563
|
+
f_curr: current function values
|
|
564
|
+
tol: tolerance level
|
|
565
|
+
|
|
566
|
+
return: True if converged, False otherwise
|
|
567
|
+
"""
|
|
568
|
+
max_updt = np.linalg.norm(f_new - f_curr, ord = np.inf)
|
|
569
|
+
w_max = np.linalg.norm(f_new, ord = np.inf)
|
|
570
|
+
|
|
571
|
+
if w_max == 0 or max_updt/w_max < self.tol:
|
|
572
|
+
return True
|
|
573
|
+
|
|
574
|
+
return False
|
|
575
|
+
|
|
576
|
+
def f_predict(self, X_test):
|
|
577
|
+
assert len(self.alpha) > 0, "The model needs to be fitted first"
|
|
578
|
+
assert X_test is not None, "X_test should not be None"
|
|
579
|
+
|
|
580
|
+
if self.K_train is None:
|
|
581
|
+
f = self.alpha[0] * np.ones(X_test.shape[0])
|
|
582
|
+
|
|
583
|
+
else:
|
|
584
|
+
K_test = self.get_K(X_test, self.X, trainxtest = True)
|
|
585
|
+
b, alpha = self.alpha[0], self.alpha[1:]
|
|
586
|
+
f = K_test @ alpha + b
|
|
587
|
+
|
|
588
|
+
return f
|
|
589
|
+
|
|
590
|
+
def predict(self, X_test, R_12 = None):
|
|
591
|
+
"""
|
|
592
|
+
Predict the response for identity link function
|
|
593
|
+
|
|
594
|
+
Parameters
|
|
595
|
+
----------
|
|
596
|
+
X_test : np.ndarray
|
|
597
|
+
Test data
|
|
598
|
+
|
|
599
|
+
Returns
|
|
600
|
+
-------
|
|
601
|
+
np.ndarray
|
|
602
|
+
Predicted response
|
|
603
|
+
"""
|
|
604
|
+
|
|
605
|
+
f = self.f_predict(X_test)
|
|
606
|
+
|
|
607
|
+
if R_12 is not None:
|
|
608
|
+
f = self.conditional_latent(f, self.f_curr, self.y, R_12)
|
|
609
|
+
|
|
610
|
+
return f
|
|
611
|
+
|
|
612
|
+
def conditional_latent(self, f_z, f_n, y, R_12):
|
|
613
|
+
f_z += R_12 @ self.R_inv @ (y - f_n)
|
|
614
|
+
return f_z
|
|
615
|
+
|
|
616
|
+
def score(self, X_test, y_test, metric='rmse', R_12 = None):
|
|
617
|
+
"""
|
|
618
|
+
The parameters already account for error correlation
|
|
619
|
+
and the errors are assumed to be independent
|
|
620
|
+
|
|
621
|
+
Parameters
|
|
622
|
+
----------
|
|
623
|
+
X_test : np.ndarray
|
|
624
|
+
test data
|
|
625
|
+
|
|
626
|
+
y_test : np.ndarray
|
|
627
|
+
test target
|
|
628
|
+
|
|
629
|
+
metric : str, optional
|
|
630
|
+
metric to use, by default 'rmse' and 'r2' for continuous
|
|
631
|
+
targets and 'loss' for binary targets.
|
|
632
|
+
|
|
633
|
+
R_12 : np.ndarray, optional
|
|
634
|
+
Cross-correlation matrix between training and test data, by default None
|
|
635
|
+
|
|
636
|
+
Returns
|
|
637
|
+
-------
|
|
638
|
+
float
|
|
639
|
+
score
|
|
640
|
+
"""
|
|
641
|
+
assert len(self.alpha) > 0, "The model needs to be fitted first"
|
|
642
|
+
assert metric in ['rmse', 'loss', 'r2', 'log_loss'], "Invalid metric"
|
|
643
|
+
|
|
644
|
+
# predict function is defined
|
|
645
|
+
# in the subclass
|
|
646
|
+
y_pred = self.predict(X_test, R_12)
|
|
647
|
+
|
|
648
|
+
if metric == 'rmse':
|
|
649
|
+
return np.sqrt(np.mean((y_pred - y_test) ** 2))
|
|
650
|
+
|
|
651
|
+
else:
|
|
652
|
+
u = ((y_test - y_pred) ** 2).sum()
|
|
653
|
+
v = ((y_test - y_test.mean()) ** 2).sum()
|
|
654
|
+
|
|
655
|
+
return 1 - (u / v)
|
|
656
|
+
|
|
657
|
+
class Logistic(KRGEE):
|
|
658
|
+
"""
|
|
659
|
+
Kernel Ridge Generalized Estimating Equations for 1 cluster.
|
|
660
|
+
Using 1 cluster is what phylogenetic regresion deals with.
|
|
661
|
+
|
|
662
|
+
This class is a subclass of KRGEE and implements the logistic function
|
|
663
|
+
for the estimation of the mu vector and diagonal W matrix (derivative of mu).
|
|
664
|
+
"""
|
|
665
|
+
def __init__(self,
|
|
666
|
+
kernel='rbf',
|
|
667
|
+
max_iter=300,
|
|
668
|
+
lam=None,
|
|
669
|
+
warm_start = False,
|
|
670
|
+
tol = 0.001,
|
|
671
|
+
seed = 12038,
|
|
672
|
+
verbose = False,
|
|
673
|
+
get_corr = True,
|
|
674
|
+
copy_R_inv = False,
|
|
675
|
+
copy_K_train = False,
|
|
676
|
+
fix_phi = False,
|
|
677
|
+
add_extraK = False
|
|
678
|
+
) -> None:
|
|
679
|
+
|
|
680
|
+
super().__init__(kernel, max_iter, lam, warm_start, tol, seed,
|
|
681
|
+
verbose, get_corr, copy_R_inv, copy_K_train, fix_phi,
|
|
682
|
+
add_extraK=add_extraK)
|
|
683
|
+
|
|
684
|
+
self.grad_tol = 1e-15
|
|
685
|
+
self.hessian_tol = 1e-12
|
|
686
|
+
|
|
687
|
+
self.Du_mu_func = self.get_Du_mu_internal
|
|
688
|
+
self.Vu_func = self.get_Vu_func
|
|
689
|
+
self.Du = np.array([])
|
|
690
|
+
|
|
691
|
+
def mu(self, f):
|
|
692
|
+
|
|
693
|
+
f_tresh = 30
|
|
694
|
+
f = np.clip(f, -f_tresh, f_tresh)
|
|
695
|
+
mu = 1/(1 + np.exp(-f))
|
|
696
|
+
return mu
|
|
697
|
+
|
|
698
|
+
def get_Du_mu_internal(self, x):
|
|
699
|
+
"""
|
|
700
|
+
Get mu vector (estimator) and D_u matrix with mu derivatives
|
|
701
|
+
from the current function values using the logistic function
|
|
702
|
+
"""
|
|
703
|
+
p1_out = self.mu(x)
|
|
704
|
+
|
|
705
|
+
# p1 * p0, derivative of mu estimator
|
|
706
|
+
self.Du = p1_out * (1 - p1_out)
|
|
707
|
+
self.Du = np.clip(self.Du, self.grad_tol, 1 - self.grad_tol)
|
|
708
|
+
return self.Du, p1_out
|
|
709
|
+
|
|
710
|
+
def get_Vu_func(self, f_curr):
|
|
711
|
+
"""
|
|
712
|
+
Get the variance of the estimator mu
|
|
713
|
+
"""
|
|
714
|
+
return self.Du
|
|
715
|
+
|
|
716
|
+
def conditional_latent(self, f_z, f_n, y, R_12):
|
|
717
|
+
y_c = np.clip(y, self.hessian_tol, 1 - self.hessian_tol)
|
|
718
|
+
# y_c = np.clip(y, 1e-3, 1 - 1e-3)
|
|
719
|
+
sig_inv_y = np.log(y_c/(1 - y_c))
|
|
720
|
+
e = sig_inv_y - f_n
|
|
721
|
+
bias = R_12 @ self.R_inv @ e
|
|
722
|
+
# print(f"e: {e[:5]}")
|
|
723
|
+
# print(f"bias: {bias[:5]}")
|
|
724
|
+
f_z += bias
|
|
725
|
+
return f_z
|
|
726
|
+
|
|
727
|
+
def predict_proba(self, X_test, R_12=None):
|
|
728
|
+
f = self.f_predict(X_test)
|
|
729
|
+
|
|
730
|
+
if R_12 is not None:
|
|
731
|
+
f = self.conditional_latent(f, self.f_curr, self.y, R_12)
|
|
732
|
+
|
|
733
|
+
return self.mu(f)
|
|
734
|
+
|
|
735
|
+
def predict(self, X_test, R_12=None):
|
|
736
|
+
"""
|
|
737
|
+
Predict the target values for the test data
|
|
738
|
+
using the fitted model.
|
|
739
|
+
|
|
740
|
+
Parameters
|
|
741
|
+
----------
|
|
742
|
+
X_test : np.ndarray
|
|
743
|
+
test data
|
|
744
|
+
|
|
745
|
+
R_12 : np.ndarray, optional
|
|
746
|
+
correlation matrix, by default None
|
|
747
|
+
|
|
748
|
+
Returns
|
|
749
|
+
-------
|
|
750
|
+
np.ndarray
|
|
751
|
+
predicted target values
|
|
752
|
+
"""
|
|
753
|
+
mu = self.predict_proba(X_test, R_12)
|
|
754
|
+
return (mu >= 0.5).astype(float)
|
|
755
|
+
|
|
756
|
+
def score(self, X_test, y_test, R_12 = None, metric = 'log_loss', eps = 1e-3):
|
|
757
|
+
"""
|
|
758
|
+
Score the model using the specified metric.
|
|
759
|
+
Parameters
|
|
760
|
+
----------
|
|
761
|
+
X_test : np.ndarray
|
|
762
|
+
test data
|
|
763
|
+
y_test : np.ndarray
|
|
764
|
+
true labels
|
|
765
|
+
R_12 : np.ndarray, optional
|
|
766
|
+
correlation matrix, by default None
|
|
767
|
+
metric : str, optional
|
|
768
|
+
evaluation metric, by default 'log_loss'
|
|
769
|
+
eps : float, optional
|
|
770
|
+
small value to avoid division by zero, by default 1e-3
|
|
771
|
+
|
|
772
|
+
Returns
|
|
773
|
+
-------
|
|
774
|
+
float
|
|
775
|
+
evaluation score
|
|
776
|
+
"""
|
|
777
|
+
|
|
778
|
+
if metric == 'loss':
|
|
779
|
+
y_pred = self.predict(X_test, R_12)
|
|
780
|
+
return np.mean(y_pred != y_test)
|
|
781
|
+
|
|
782
|
+
elif metric == 'log_loss':
|
|
783
|
+
y_pred_prob = self.predict_proba(X_test, R_12)
|
|
784
|
+
y_pred_prob = np.clip(y_pred_prob, eps, 1 - eps)
|
|
785
|
+
|
|
786
|
+
log_losses = -( y_test*np.log(y_pred_prob) + (1 - y_test)*np.log(1 - y_pred_prob) )
|
|
787
|
+
return np.mean( log_losses )
|
|
788
|
+
|
|
789
|
+
else:
|
|
790
|
+
raise ValueError("Invalid metric. Use 'loss' or 'log_loss'.")
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
class Poisson(KRGEE):
|
|
794
|
+
def __init__(self, kernel='rbf', max_iter=300, lam=0.1, warm_start=False, tol=0.001, seed=12038, verbose=False):
|
|
795
|
+
super().__init__(kernel, max_iter, lam, warm_start, tol, seed, verbose)
|
|
796
|
+
|
|
797
|
+
self.Du_mu_func = self.get_Du_mu_internal
|
|
798
|
+
self.Vu_func = self.get_Vu_func
|
|
799
|
+
self.Du = np.array([])
|
|
800
|
+
|
|
801
|
+
def mu(self, f):
|
|
802
|
+
return np.exp(f)
|
|
803
|
+
|
|
804
|
+
def get_Du_mu_internal(self, x):
|
|
805
|
+
"""
|
|
806
|
+
Get mu vector (estimator) and D_u matrix with mu derivatives
|
|
807
|
+
from the current function values using the logistic function
|
|
808
|
+
"""
|
|
809
|
+
mu = self.mu(x)
|
|
810
|
+
self.Du = mu
|
|
811
|
+
return self.Du, mu
|
|
812
|
+
|
|
813
|
+
def get_Vu_func(self, f_curr):
|
|
814
|
+
"""
|
|
815
|
+
Get the variance of the estimator mu
|
|
816
|
+
"""
|
|
817
|
+
return self.Du
|
|
818
|
+
|
|
819
|
+
def predict(self, X_test):
|
|
820
|
+
f = self.f_predict(X_test)
|
|
821
|
+
return self.mu(f)
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
class Gamma(KRGEE):
|
|
825
|
+
def __init__(self, kernel='rbf', max_iter=300, lam=0.1,
|
|
826
|
+
warm_start=False, tol=0.001, seed=12038,
|
|
827
|
+
verbose=False, dispersion=1):
|
|
828
|
+
super().__init__(kernel, max_iter, lam, warm_start, tol, seed, verbose)
|
|
829
|
+
|
|
830
|
+
self.Du_mu_func = self.get_Du_mu_internal
|
|
831
|
+
self.Vu_func = self.get_Vu_func
|
|
832
|
+
self.Du = np.array([])
|
|
833
|
+
self.dispersion = dispersion
|
|
834
|
+
|
|
835
|
+
def mu(self, f):
|
|
836
|
+
return -np.exp(-f)
|
|
837
|
+
|
|
838
|
+
def get_Du_mu_internal(self, x):
|
|
839
|
+
"""
|
|
840
|
+
Get mu vector (estimator) and D_u matrix with mu derivatives
|
|
841
|
+
from the current function values using the logistic function
|
|
842
|
+
"""
|
|
843
|
+
mu = self.mu(x)
|
|
844
|
+
self.Du = mu**2
|
|
845
|
+
return self.Du, mu
|
|
846
|
+
|
|
847
|
+
def get_Vu_func(self, f_curr):
|
|
848
|
+
"""
|
|
849
|
+
Get the variance of the estimator mu
|
|
850
|
+
"""
|
|
851
|
+
return self.Du*self.dispersion
|
|
852
|
+
|
|
853
|
+
def predict(self, X_test):
|
|
854
|
+
f = self.f_predict(X_test)
|
|
855
|
+
return self.mu(f)
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
# region: old code
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
# def get_DFP(spps_train, vcv):
|
|
863
|
+
# """
|
|
864
|
+
# Get degrees of freedom sensu Paradis
|
|
865
|
+
# Params
|
|
866
|
+
# ------
|
|
867
|
+
# tree: nodes, root
|
|
868
|
+
# T_spps_i: list of species index in nodes
|
|
869
|
+
# vcv: vcv of the species
|
|
870
|
+
# """
|
|
871
|
+
|
|
872
|
+
# n1 = spps_train[0]
|
|
873
|
+
# # O(n), find the root
|
|
874
|
+
# while n1.ancestor:
|
|
875
|
+
# n1 = n1.ancestor
|
|
876
|
+
# root = n1
|
|
877
|
+
|
|
878
|
+
# vn = set()
|
|
879
|
+
# bls = 0
|
|
880
|
+
# for n in spps_train:
|
|
881
|
+
# bls += n.branch_length
|
|
882
|
+
# tmp_anc = n.ancestor
|
|
883
|
+
# while (tmp_anc not in vn) and (tmp_anc != root):
|
|
884
|
+
# vn.add(tmp_anc)
|
|
885
|
+
# tmp_anc = tmp_anc.ancestor
|
|
886
|
+
|
|
887
|
+
# for i in vn:
|
|
888
|
+
# bls += i.branch_length
|
|
889
|
+
|
|
890
|
+
# return (bls * vcv.shape[0])/np.sum(np.diag(vcv))
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
# def get_V_sqInv(self, f_curr):
|
|
894
|
+
# """
|
|
895
|
+
# Get the square root of the inverse of the V matrix
|
|
896
|
+
# V = A^1/2 R A^1/2 \phi
|
|
897
|
+
# A = variance of \mu
|
|
898
|
+
# R = correlation matrix
|
|
899
|
+
# phi = dispersion parameter
|
|
900
|
+
|
|
901
|
+
# V^(-1/2) = ( A^1/2 R A^1/2 \phi)^-1/2
|
|
902
|
+
# = A^-1/4 R^-1/2 A^-1/4 \phi^-1/2
|
|
903
|
+
|
|
904
|
+
# O(n^2) complexity
|
|
905
|
+
|
|
906
|
+
# Parameters
|
|
907
|
+
# ----------
|
|
908
|
+
# f_curr : np.ndarray
|
|
909
|
+
# current function values
|
|
910
|
+
# Returns
|
|
911
|
+
# -------
|
|
912
|
+
# np.ndarray
|
|
913
|
+
# V^(-1/2)
|
|
914
|
+
# """
|
|
915
|
+
# A_qInv = self.Vu_func(f_curr)**(-1/4)
|
|
916
|
+
# V_sqInv = (A_qInv[:, None] * self.L_inv) * A_qInv
|
|
917
|
+
# V_sqInv *= (self.phi)**(-1/2) # O(n^2)
|
|
918
|
+
# return V_sqInv
|
|
919
|
+
|
|
920
|
+
#endregion
|