pcntoolkit 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,752 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Thu Jul 25 17:01:24 2019
5
+
6
+ @author: seykia
7
+ @author: augub
8
+ """
9
+
10
+ from __future__ import division, print_function
11
+
12
+ import os
13
+ import sys
14
+ from sys import exit
15
+
16
+ import arviz as az
17
+ import numpy as np
18
+ import xarray
19
+ from scipy import special as spp
20
+
21
+ try:
22
+ from pcntoolkit.dataio import fileio
23
+ from pcntoolkit.model.hbr import HBR
24
+ from pcntoolkit.normative_model.norm_base import NormBase
25
+ except ImportError:
26
+ pass
27
+
28
+ path = os.path.abspath(os.path.dirname(__file__))
29
+ if path not in sys.path:
30
+ sys.path.append(path)
31
+ del path
32
+ import dataio.fileio as fileio
33
+ from model.hbr import HBR
34
+ from norm_base import NormBase
35
+
36
+
37
+ class NormHBR(NormBase):
38
+ """HBR multi-batch normative modelling class. By default, this function
39
+ estimates a linear model with random intercept, random slope, and random
40
+ homoscedastic noise.
41
+
42
+ :param X: [N×P] array of clinical covariates
43
+ :param y: [N×1] array of neuroimaging measures
44
+ :param trbefile: the address to the batch effects file for the training set.
45
+ the batch effect array should be a [N×M] array where M is the number of
46
+ the type of batch effects. For example when the site and gender is modeled
47
+ as batch effects M=2. Each column in the batch effect array contains the
48
+ batch ID (starting from 0) for each sample. If not specified (default=None)
49
+ then all samples assumed to be from the same batch (i.e., the batch effect
50
+ is not modelled).
51
+ :param tsbefile: Similar to trbefile for the test set.
52
+ :param model_type: Specifies the type of the model from 'linear', 'plynomial',
53
+ and 'bspline' (defauls is 'linear').
54
+ :param likelihood: specifies the type of likelihood among 'Normal' 'SHASHb','SHASHo',
55
+ and 'SHASHo2' (defauls is normal).
56
+ :param linear_mu: Boolean (default='True') to decide whether the mean (mu) is
57
+ parametrized on a linear function (thus changes with covariates) or it is fixed.
58
+ :param linear_sigma: Boolean (default='False') to decide whether the variance (sigma) is
59
+ parametrized on a linear function (heteroscedastic noise) or it is fixed for
60
+ each batch (homoscedastic noise).
61
+ :param linear_epsilon: Boolean (default='False') to decide the parametrization
62
+ of epsilon for the SHASH likelihood that controls its skewness.
63
+ If True, epsilon is parametrized on a linear function
64
+ (thus changes with covariates) otherwise it is fixed for each batch.
65
+ :param linear_delta: Boolean (default='False') to decide the parametrization
66
+ of delta for the SHASH likelihood that controls its kurtosis.
67
+ If True, delta is parametrized on a linear function
68
+ (thus changes with covariates) otherwise it is fixed for each batch.
69
+ :param random_intercept_{parameter}: if parameters mu (default='True'),
70
+ sigma (default='False'), epsilon (default='False'), and delta (default='False')
71
+ are parametrized on a linear function, then this boolean decides
72
+ whether the intercept can vary across batches.
73
+ :param random_slope_{parameter}: if parameters mu (default='True'),
74
+ sigma (default='False'), epsilon (default='False'), and delta (default='False')
75
+ are parametrized on a linear function, then this boolean decides
76
+ whether the slope can vary across batches.
77
+ :param centered_intercept_{parameter}: if parameters mu (default='False'),
78
+ sigma (default='False'), epsilon (default='False'), and delta (default='False')
79
+ are parametrized on a linear function, then this boolean decides
80
+ whether the parameters of intercept are estimated in a centered or
81
+ non-centered manner (default). While centered estimation runs faster
82
+ it may cause some problems for the sampler (the funnel of hell).
83
+ :param centered_slope_{parameter}: if parameters mu (default='False'),
84
+ sigma (default='False'), epsilon (default='False'), and delta (default='False')
85
+ are parametrized on a linear function, then this boolean decides
86
+ whether the parameters of slope are estimated in a centered or
87
+ non-centered manner (default). While centered estimation runs faster
88
+ it may cause some problems for the sampler (the funnel of hell).
89
+ :param sampler: specifies the type of PyMC sampler (Defauls is 'NUTS').
90
+ :param n_samples: The number of samples to draw (Default is '1000'). Please
91
+ note that this parameter must be specified in a string fromat ('1000' and
92
+ not 1000).
93
+ :param n_tuning: String that specifies the number of iterations to adjust
94
+ the samplers's step sizes, scalings or similar (defauls is '500').
95
+ :param n_chains: String that specifies the number of chains to sample. Defauls
96
+ is '1' for faster estimation, but note that sampling independent chains
97
+ is important for some convergence checks.
98
+ :param cores: String that specifies the number of chains to run in parallel.
99
+ (defauls is '1').
100
+ :param init: Initialization method to use for auto-assigned NUTS samplers. The
101
+ defauls is 'jitter+adapt_diag' that starts with a identity mass matrix
102
+ and then adapt a diagonal based on the variance of the tuning samples
103
+ while adding a uniform jitter in [-1, 1] to the starting point in each chain.
104
+ :param target_accept: String that of a float in [0, 1] that regulates the
105
+ step size such that we approximate this acceptance rate. The defauls is '0.8'
106
+ but higher values like 0.9 or 0.95 often work better for problematic posteriors.
107
+ :param order: String that defines the order of bspline or polynomial model.
108
+ The defauls is '3'.
109
+ :param nknots: String that defines the numbers of interior knots for the bspline model.
110
+ The defauls is '3'. Two knots will be added to this number for boundries. So final
111
+ number of knots will be nknots+2. Higher values increase the model complexity with negative
112
+ effect on the spped of estimations.
113
+ :param nn_hidden_layers_num: String the specifies the number of hidden layers
114
+ in neural network model. It can be either '1' or '2'. The default is set to '2'.
115
+ :param nn_hidden_neuron_num: String that specifies the number of neurons in
116
+ the hidden layers. The defauls is set to '2'.
117
+
118
+ Written by S.de Boer and S.M. Kia
119
+
120
+ """
121
+
122
+ def __init__(self, **kwargs):
123
+ self.configs = dict()
124
+ # inputs
125
+ self.configs["trbefile"] = kwargs.get("trbefile", None)
126
+ self.configs["tsbefile"] = kwargs.get("tsbefile", None)
127
+ # Model settings
128
+ self.configs["type"] = kwargs.get("model_type", "linear")
129
+ self.configs["random_noise"] = kwargs.get(
130
+ "random_noise", "True") == "True"
131
+ self.configs["likelihood"] = kwargs.get("likelihood", "Normal")
132
+ # sampler settings
133
+ self.configs["nuts_sampler"] = kwargs.get("nuts_sampler", "pymc")
134
+ self.configs["n_samples"] = int(kwargs.get("n_samples", "1000"))
135
+ self.configs["n_tuning"] = int(kwargs.get("n_tuning", "500"))
136
+ self.configs["n_chains"] = int(kwargs.get("n_chains", "1"))
137
+ self.configs["sampler"] = kwargs.get("sampler", "NUTS")
138
+ self.configs["target_accept"] = float(
139
+ kwargs.get("target_accept", "0.8"))
140
+ self.configs["init"] = kwargs.get("init", "jitter+adapt_diag_grad")
141
+ self.configs["cores"] = int(kwargs.get("cores", "1"))
142
+ self.configs["remove_datapoints_from_posterior"] = (
143
+ kwargs.get("remove_datapoints_from_posterior", "True") == "True"
144
+ )
145
+ # model transfer setting
146
+ self.configs["freedom"] = int(kwargs.get("freedom", "1"))
147
+ self.configs["transferred"] = False
148
+ # deprecated settings
149
+ self.configs["skewed_likelihood"] = (
150
+ kwargs.get("skewed_likelihood", "False") == "True"
151
+ )
152
+ # misc
153
+ self.configs["pred_type"] = kwargs.get("pred_type", "single")
154
+
155
+ if self.configs["type"] == "bspline":
156
+ self.configs["order"] = int(kwargs.get("order", "3"))
157
+ self.configs["nknots"] = int(kwargs.get("nknots", "3"))
158
+ elif self.configs["type"] == "polynomial":
159
+ self.configs["order"] = int(kwargs.get("order", "3"))
160
+ elif self.configs["type"] == "nn":
161
+ self.configs["nn_hidden_neuron_num"] = int(
162
+ kwargs.get("nn_hidden_neuron_num", "2")
163
+ )
164
+ self.configs["nn_hidden_layers_num"] = int(
165
+ kwargs.get("nn_hidden_layers_num", "2")
166
+ )
167
+ if self.configs["nn_hidden_layers_num"] > 2:
168
+ raise ValueError(
169
+ "Using "
170
+ + str(self.configs["nn_hidden_layers_num"])
171
+ + " layers was not implemented. The number of "
172
+ + " layers has to be less than 3."
173
+ )
174
+ elif self.configs["type"] == "linear":
175
+ pass
176
+ else:
177
+ raise ValueError(
178
+ "Unknown model type, please specify from 'linear', \
179
+ 'polynomial', 'bspline', or 'nn'."
180
+ )
181
+
182
+ if self.configs["type"] in ["bspline", "polynomial", "linear"]:
183
+ for p in ["mu", "sigma", "epsilon", "delta"]:
184
+ self.configs[f"linear_{p}"] = (
185
+ kwargs.get(f"linear_{p}", "False") == "True"
186
+ )
187
+
188
+ # Deprecations (remove in later version)
189
+ if f"{p}_linear" in kwargs.keys():
190
+ print(
191
+ f"The keyword '{p}_linear' is deprecated. It is now automatically replaced with 'linear_{p}'"
192
+ )
193
+ self.configs[f"linear_{p}"] = (
194
+ kwargs.get(f"{p}_linear", "False") == "True"
195
+ )
196
+ # End Deprecations
197
+
198
+ for c in ["centered", "random"]:
199
+ self.configs[f"{c}_{p}"] = kwargs.get(
200
+ f"{c}_{p}", "False") == "True"
201
+ for sp in ["slope", "intercept"]:
202
+ self.configs[f"{c}_{sp}_{p}"] = (
203
+ kwargs.get(f"{c}_{sp}_{p}", "False") == "True"
204
+ )
205
+
206
+ # Deprecations (remove in later version)
207
+ if self.configs["linear_sigma"]:
208
+ if "random_noise" in kwargs.keys():
209
+ print(
210
+ "The keyword 'random_noise' is deprecated. It is now automatically replaced with 'random_intercept_sigma', because sigma is linear"
211
+ )
212
+ self.configs["random_intercept_sigma"] = (
213
+ kwargs.get("random_noise", "True") == "True"
214
+ )
215
+ elif "random_noise" in kwargs.keys():
216
+ print(
217
+ "The keyword 'random_noise' is deprecated. It is now automatically replaced with 'random_sigma', because sigma is fixed"
218
+ )
219
+ self.configs["random_sigma"] = (
220
+ kwargs.get("random_noise", "True") == "True"
221
+ )
222
+ if "random_slope" in kwargs.keys():
223
+ print(
224
+ "The keyword 'random_slope' is deprecated. It is now automatically replaced with 'random_intercept_mu'"
225
+ )
226
+ self.configs["random_slope_mu"] = (
227
+ kwargs.get("random_slope", "True") == "True"
228
+ )
229
+ # End Deprecations
230
+
231
+ # Default parameters
232
+ self.configs["linear_mu"] = kwargs.get("linear_mu", "True") == "True"
233
+ self.configs["random_mu"] = kwargs.get("random_mu", "True") == "True"
234
+ self.configs["random_intercept_mu"] = (
235
+ kwargs.get("random_intercept_mu", "True") == "True"
236
+ )
237
+ self.configs["random_slope_mu"] = (
238
+ kwargs.get("random_slope_mu", "True") == "True"
239
+ )
240
+ self.configs["random_sigma"] = kwargs.get(
241
+ "random_sigma", "True") == "True"
242
+ self.configs["centered_sigma"] = kwargs.get(
243
+ "centered_sigma", "True") == "True"
244
+ # End default parameters
245
+
246
+ self.hbr = HBR(self.configs)
247
+
248
+ @property
249
+ def n_params(self):
250
+ return 1
251
+
252
+ @property
253
+ def neg_log_lik(self):
254
+ return -1
255
+
256
+ def estimate(self, X, y, **kwargs):
257
+ """
258
+ Sample from the posterior of the Hierarchical Bayesian Regression model.
259
+
260
+ This function samples from the posterior distribution of the Hierarchical Bayesian Regression (HBR) model given the data matrix 'X' and target 'y'.
261
+ If 'trbefile' is provided in kwargs, it is used as batch effects for the training data.
262
+ Otherwise, the batch effects are initialized as zeros.
263
+
264
+ :param X: Data matrix.
265
+ :param y: Target values.
266
+ :param kwargs: Keyword arguments which may include:
267
+ - 'trbefile': File containing the batch effects for the training data. Optional.
268
+ :return: The instance of the NormHBR object.
269
+ """
270
+ trbefile = kwargs.get("trbefile", None)
271
+ if trbefile is not None:
272
+ batch_effects_train = fileio.load(trbefile)
273
+ else:
274
+ print("Could not find batch-effects file! Initilizing all as zeros ...")
275
+ batch_effects_train = np.zeros([X.shape[0], 1])
276
+
277
+ self.batch_effects_maps = [
278
+ {v: i for i, v in enumerate(np.unique(batch_effects_train[:, j]))}
279
+ for j in range(batch_effects_train.shape[1])
280
+ ]
281
+
282
+ self.hbr.estimate(X, y, batch_effects_train)
283
+
284
+ return self
285
+
286
+ def predict(self, Xs, X=None, Y=None, **kwargs):
287
+ """
288
+ Predict the target values for the given test data.
289
+
290
+ This function predicts the target values for the given test data 'Xs' using the Hierarchical Bayesian Regression (HBR) model.
291
+ If 'X' and 'Y' are provided, they are used to update the model before prediction.
292
+ If 'tsbefile' is provided in kwargs, it is used to as batch effects for the test data.
293
+ Otherwise, the batch effects are initialized as zeros.
294
+
295
+ :param Xs: Test data matrix.
296
+ :param X: Training data matrix. Optional.
297
+ :param Y: Training target values. Optional.
298
+ :param kwargs: Keyword arguments which may include:
299
+ - 'tsbefile': File containing the batch effects for the test data. Optional.
300
+ :return: A tuple containing the predicted target values and the marginal variances for the test data.
301
+ :raises ValueError: If the model is a transferred model. In this case, use the predict_on_new_sites function.
302
+ """
303
+ tsbefile = kwargs.get("tsbefile", None)
304
+ if tsbefile is not None:
305
+ batch_effects_test = fileio.load(tsbefile)
306
+ else:
307
+ print("Could not find batch-effects file! Initilizing all as zeros ...")
308
+ batch_effects_test = np.zeros([Xs.shape[0], 1])
309
+
310
+ pred_type = self.configs["pred_type"]
311
+
312
+ # if self.configs["transferred"] == False:
313
+ yhat, s2 = self.hbr.predict(
314
+ X=Xs,
315
+ batch_effects=batch_effects_test,
316
+ batch_effects_maps=self.batch_effects_maps,
317
+ pred=pred_type,
318
+ **kwargs,
319
+ )
320
+ # else:
321
+ # raise ValueError(
322
+ # "This is a transferred model. Please use predict_on_new_sites function."
323
+ # )
324
+
325
+ return yhat.squeeze(), s2.squeeze()
326
+
327
+ def transfer(self, X, y, batch_effects):
328
+ """
329
+ Samples from the posterior of the Hierarchical Bayesian Regression model.
330
+
331
+ This function samples from the posterior of the Hierarchical Bayesian Regression (HBR) model given the data matrix 'X' and target 'y'. The posterior samples from the previous iteration are used to construct the priors for this one.
332
+ If 'trbefile' is provided in kwargs, it is used as batch effects for the training data.
333
+ Otherwise, the batch effects are initialized as zeros.
334
+
335
+ :param X: Data matrix.
336
+ :param y: Target values.
337
+ :param kwargs: Keyword arguments which may include:
338
+ - 'trbefile': File containing the batch effects for the training data. Optional.
339
+ :return: The instance of the NormHBR object.
340
+ """
341
+ self.hbr.transfer(X, y, batch_effects)
342
+ self.configs["transferred"] = True
343
+ return self
344
+
345
+ def predict_on_new_sites(self, X, batch_effects):
346
+ """
347
+ Predict the target values for the given test data on new sites.
348
+
349
+ This function predicts the target values for the given test data 'X' on new sites using the Hierarchical Bayesian Regression (HBR) model.
350
+ The batch effects for the new sites must be provided.
351
+
352
+ :param X: Test data matrix for the new sites.
353
+ :param batch_effects: Batch effects for the new sites.
354
+ :return: A tuple containing the predicted target values and the marginal variances for the test data on the new sites.
355
+ """
356
+
357
+ yhat, s2 = self.hbr.predict(
358
+ X,
359
+ batch_effects=batch_effects,
360
+ batch_effects_maps=self.batch_effects_maps
361
+ )
362
+
363
+ return yhat, s2
364
+
365
+
366
+ def extend(
367
+ self,
368
+ X,
369
+ y,
370
+ batch_effects,
371
+ X_dummy_ranges=[[0.1, 0.9, 0.01]],
372
+ merge_batch_dim=0,
373
+ samples=10,
374
+ informative_prior=False
375
+ ):
376
+ """
377
+ Extend the Hierarchical Bayesian Regression model using data sampled from the posterior predictive distribution.
378
+
379
+ This function extends the Hierarchical Bayesian Regression (HBR) model, given the data matrix 'X' and target 'y'.
380
+ It also generates data from the posterior predictive distribution and merges it with the new data before estimation.
381
+ If 'informative_prior' is True, it uses the adapt method for estimation. Otherwise, it uses the estimate method.
382
+
383
+ :param X: Data matrix for the new sites.
384
+ :param y: Target values for the new sites.
385
+ :param batch_effects: Batch effects for the new sites.
386
+ :param X_dummy_ranges: Ranges for generating the dummy data. Default is [[0.1, 0.9, 0.01]].
387
+ :param merge_batch_dim: Dimension for merging the batch effects. Default is 0.
388
+ :param samples: Number of samples to generate for the dummy data. Default is 10.
389
+ :param informative_prior: Whether to use the adapt method for estimation. Default is False.
390
+ :return: The instance of the NormHBR object.
391
+ """
392
+
393
+ X_dummy, batch_effects_dummy = self.hbr.create_dummy_inputs(X)
394
+
395
+ X_dummy, batch_effects_dummy, Y_dummy = self.hbr.generate(
396
+ X_dummy, batch_effects_dummy, samples, batch_effects_maps=self.batch_effects_maps
397
+ )
398
+
399
+ batch_effects[:, merge_batch_dim] = (
400
+ batch_effects[:, merge_batch_dim]
401
+ + np.max(batch_effects_dummy[:, merge_batch_dim])
402
+ + 1
403
+ )
404
+
405
+ X = np.concatenate((X_dummy, X))
406
+ y = np.concatenate((Y_dummy, y))
407
+ batch_effects = np.concatenate((batch_effects_dummy, batch_effects))
408
+
409
+ self.batch_effects_maps = [ {v: i for i, v in enumerate(np.unique(batch_effects[:, j]))}
410
+ for j in range(batch_effects.shape[1])
411
+ ]
412
+
413
+ if informative_prior:
414
+ #raise NotImplementedError("The extension with informaitve prior is not implemented yet.")
415
+ self.hbr.transfer(X, y, batch_effects)
416
+ else:
417
+
418
+ self.hbr.estimate(X, y, batch_effects)
419
+
420
+ return self
421
+
422
+ def tune(
423
+ self,
424
+ X,
425
+ y,
426
+ batch_effects,
427
+ X_dummy_ranges=[[0.1, 0.9, 0.01]],
428
+ merge_batch_dim=0,
429
+ samples=10,
430
+ informative_prior=False,
431
+ ):
432
+ """
433
+ This function tunes the Hierarchical Bayesian Regression model using data sampled from the posterior predictive distribution. Its behavior is not tested, and it is unclear if the desired behavior is achieved.
434
+ """
435
+
436
+ # TODO need to check if this is correct
437
+
438
+ print(
439
+ "The 'tune' function is being called, but it is currently in development and its behavior is not tested. It is unclear if the desired behavior is achieved. Any output following this should be treated as unreliable."
440
+ )
441
+
442
+ tune_ids = list(np.unique(batch_effects[:, merge_batch_dim]))
443
+
444
+ X_dummy, batch_effects_dummy = self.hbr.create_dummy_inputs(
445
+ X_dummy_ranges)
446
+
447
+ for idx in tune_ids:
448
+ X_dummy = X_dummy[batch_effects_dummy[:,
449
+ merge_batch_dim] != idx, :]
450
+ batch_effects_dummy = batch_effects_dummy[
451
+ batch_effects_dummy[:, merge_batch_dim] != idx, :
452
+ ]
453
+
454
+ X_dummy, batch_effects_dummy, Y_dummy = self.hbr.generate(
455
+ X_dummy, batch_effects_dummy, samples
456
+ )
457
+
458
+ if informative_prior:
459
+ self.hbr.adapt(
460
+ np.concatenate((X_dummy, X)),
461
+ np.concatenate((Y_dummy, y)),
462
+ np.concatenate((batch_effects_dummy, batch_effects)),
463
+ )
464
+ else:
465
+ self.hbr.estimate(
466
+ np.concatenate((X_dummy, X)),
467
+ np.concatenate((Y_dummy, y)),
468
+ np.concatenate((batch_effects_dummy, batch_effects)),
469
+ )
470
+
471
+ return self
472
+
473
+ def merge(
474
+ self, nm, X_dummy_ranges=[[0.1, 0.9, 0.01]], merge_batch_dim=0, samples=10
475
+ ):
476
+ """
477
+ Samples from the posterior predictive distribitions of two models, merges them, and estimates a model on the merged data.
478
+
479
+ This function samples from the posterior predictive distribitions of two models, merges them, and estimates a model on the merged data.
480
+
481
+ :param nm: The other NormHBR object.
482
+ :param X_dummy_ranges: Ranges for generating the dummy data. Default is [[0.1, 0.9, 0.01]].
483
+ :param merge_batch_dim: Dimension for merging the batch effects. Default is 0.
484
+ :param samples: Number of samples to generate for the dummy data. Default is 10.
485
+ """
486
+
487
+ X_dummy1, batch_effects_dummy1 = self.hbr.create_dummy_inputs(
488
+ X_dummy_ranges)
489
+ X_dummy2, batch_effects_dummy2 = nm.hbr.create_dummy_inputs(
490
+ X_dummy_ranges)
491
+
492
+ X_dummy1, batch_effects_dummy1, Y_dummy1 = self.hbr.generate(
493
+ X_dummy1, batch_effects_dummy1, samples
494
+ )
495
+ X_dummy2, batch_effects_dummy2, Y_dummy2 = nm.hbr.generate(
496
+ X_dummy2, batch_effects_dummy2, samples
497
+ )
498
+
499
+ batch_effects_dummy2[:, merge_batch_dim] = (
500
+ batch_effects_dummy2[:, merge_batch_dim]
501
+ + np.max(batch_effects_dummy1[:, merge_batch_dim])
502
+ + 1
503
+ )
504
+
505
+ self.hbr.estimate(
506
+ np.concatenate((X_dummy1, X_dummy2)),
507
+ np.concatenate((Y_dummy1, Y_dummy2)),
508
+ np.concatenate((batch_effects_dummy1, batch_effects_dummy2)),
509
+ )
510
+
511
+ return self
512
+
513
+ def generate(self, X, batch_effects, samples=10):
514
+ X, batch_effects, generated_samples = self.hbr.generate(
515
+ X, batch_effects, samples
516
+ )
517
+ return X, batch_effects, generated_samples
518
+
519
+ def get_mcmc_quantiles(self, X, batch_effects=None, z_scores=None):
520
+ """
521
+ Computes quantiles of an estimated normative model.
522
+
523
+ Args:
524
+ X ([N*p]ndarray): covariates for which the quantiles are computed (must be scaled if scaler is set)
525
+ batch_effects (ndarray): the batch effects corresponding to X
526
+ z_scores (ndarray): Use this to determine which quantiles will be computed. The resulting quantiles will have the z-scores given in this list.
527
+ """
528
+ # Set batch effects to zero if none are provided
529
+ if batch_effects is None:
530
+ batch_effects = np.zeros([X.shape[0], 1])
531
+
532
+ # Set the z_scores for which the quantiles are computed
533
+ if z_scores is None:
534
+ z_scores = np.arange(-3, 4)
535
+ elif len(z_scores.shape) == 2:
536
+ if not z_scores.shape[0] == X.shape[0]:
537
+ raise ValueError("The number of columns in z_scores must match the number of columns in X")
538
+ z_scores = z_scores.T
539
+
540
+ # Determine the variables to predict
541
+ match self.configs["likelihood"]:
542
+ case "Normal":
543
+ var_names = ["mu_samples", "sigma_samples", "sigma_plus_samples"]
544
+ case "SHASHo" | "SHASHo2" | "SHASHb":
545
+ var_names = [
546
+ "mu_samples",
547
+ "sigma_samples",
548
+ "sigma_plus_samples",
549
+ "epsilon_samples",
550
+ "delta_samples",
551
+ "delta_plus_samples",
552
+ ]
553
+ case _:
554
+ exit("Unknown likelihood: " + self.configs["likelihood"])
555
+
556
+ # Delete the posterior predictive if it already exists
557
+ if "posterior_predictive" in self.hbr.idata.groups():
558
+ del self.hbr.idata.posterior_predictive
559
+
560
+ self.hbr.predict(
561
+ X=X,
562
+ batch_effects=batch_effects,
563
+ batch_effects_maps=self.batch_effects_maps,
564
+ pred="single",
565
+ var_names=var_names + ["y_like"],
566
+ )
567
+
568
+ # Extract the relevant samples from the idata
569
+ post_pred = az.extract(
570
+ self.hbr.idata, "posterior_predictive", var_names=var_names
571
+ )
572
+
573
+ # Remove superfluous var_nammes
574
+ var_names.remove("sigma_samples")
575
+ if "delta_samples" in var_names:
576
+ var_names.remove("delta_samples")
577
+
578
+ # Separate the samples into a list so that they can be unpacked
579
+ array_of_vars = list(map(lambda x: post_pred[x], var_names))
580
+
581
+ # Create an array to hold the quantiles
582
+ len_synth_data, n_mcmc_samples = post_pred["mu_samples"].shape
583
+ quantiles = np.zeros(
584
+ (z_scores.shape[0], len_synth_data, n_mcmc_samples))
585
+
586
+ # Compute the quantile iteratively for each z-score
587
+
588
+ for i, j in enumerate(z_scores):
589
+ if len(z_scores.shape) == 1:
590
+ zs = np.full((len_synth_data, n_mcmc_samples), j, dtype=float)
591
+ else:
592
+ zs = np.repeat(j[:,None], n_mcmc_samples, axis=1)
593
+ quantiles[i] = xarray.apply_ufunc(
594
+ quantile,
595
+ *array_of_vars,
596
+ kwargs={"zs": zs, "likelihood": self.configs["likelihood"]},
597
+ )
598
+ return quantiles.mean(axis=-1)
599
+
600
+ def get_mcmc_zscores(self, X, y, **kwargs):
601
+ """
602
+ Computes zscores of data given an estimated model
603
+
604
+ Args:
605
+ X ([N*p]ndarray): covariates
606
+ y ([N*1]ndarray): response variables
607
+ """
608
+
609
+ print(self.configs["likelihood"])
610
+
611
+ tsbefile = kwargs.get("tsbefile", None)
612
+ if tsbefile is not None:
613
+ batch_effects_test = fileio.load(tsbefile)
614
+ else: # Set batch effects to zero if none are provided
615
+ print("Could not find batch-effects file! Initializing all as zeros ...")
616
+ batch_effects_test = np.zeros([X.shape[0], 1])
617
+
618
+ # Determine the variables to predict
619
+ if self.configs["likelihood"] == "Normal":
620
+ var_names = ["mu_samples", "sigma_samples", "sigma_plus_samples"]
621
+ elif self.configs["likelihood"].startswith("SHASH"):
622
+ var_names = [
623
+ "mu_samples",
624
+ "sigma_samples",
625
+ "sigma_plus_samples",
626
+ "epsilon_samples",
627
+ "delta_samples",
628
+ "delta_plus_samples",
629
+ ]
630
+ else:
631
+ exit("Unknown likelihood: " + self.configs["likelihood"])
632
+
633
+ # Delete the posterior predictive if it already exists
634
+ if "posterior_predictive" in self.hbr.idata.groups():
635
+ del self.hbr.idata.posterior_predictive
636
+
637
+ # Do a forward to get the posterior predictive in the idata
638
+ self.hbr.predict(
639
+ X=X,
640
+ batch_effects=batch_effects_test,
641
+ batch_effects_maps=self.batch_effects_maps,
642
+ pred="single",
643
+ var_names=var_names + ["y_like"],
644
+ )
645
+
646
+ # Extract the relevant samples from the idata
647
+ post_pred = az.extract(
648
+ self.hbr.idata, "posterior_predictive", var_names=var_names
649
+ )
650
+
651
+ # Remove superfluous var_names
652
+ var_names.remove("sigma_samples")
653
+ if "delta_samples" in var_names:
654
+ var_names.remove("delta_samples")
655
+
656
+ # Separate the samples into a list so that they can be unpacked
657
+ array_of_vars = list(map(lambda x: post_pred[x], var_names))
658
+
659
+ # Create an array to hold the quantiles
660
+ len_data, n_mcmc_samples = post_pred["mu_samples"].shape
661
+
662
+ # Compute the quantile iteratively for each z-score
663
+ z_scores = xarray.apply_ufunc(
664
+ z_score,
665
+ *array_of_vars,
666
+ kwargs={"y": y, "likelihood": self.configs["likelihood"]},
667
+ )
668
+ return z_scores.mean(axis=-1).values
669
+
670
+
671
+ def S_inv(x, e, d):
672
+ return np.sinh((np.arcsinh(x) + e) / d)
673
+
674
+
675
+ def K(p, x):
676
+ """
677
+ Computes the values of spp.kv(p,x) for only the unique values of p
678
+ """
679
+
680
+ ps, idxs = np.unique(p, return_inverse=True)
681
+ return spp.kv(ps, x)[idxs].reshape(p.shape)
682
+
683
+
684
+ def P(q):
685
+ """
686
+ The P function as given in Jones et al.
687
+ :param q:
688
+ :return:
689
+ """
690
+ frac = np.exp(1 / 4) / np.sqrt(8 * np.pi)
691
+ K1 = K((q + 1) / 2, 1 / 4)
692
+ K2 = K((q - 1) / 2, 1 / 4)
693
+ a = (K1 + K2) * frac
694
+ return a
695
+
696
+
697
+ def m(epsilon, delta, r):
698
+ """
699
+ The r'th uncentered moment. Given by Jones et al.
700
+ """
701
+ frac1 = 1 / np.power(2, r)
702
+ acc = 0
703
+ for i in range(r + 1):
704
+ combs = spp.comb(r, i)
705
+ flip = np.power(-1, i)
706
+ ex = np.exp((r - 2 * i) * epsilon / delta)
707
+ p = P((r - 2 * i) / delta)
708
+ acc += combs * flip * ex * p
709
+ return frac1 * acc
710
+
711
+
712
+ def quantile(mu, sigma, epsilon=None, delta=None, zs=0, likelihood="Normal"):
713
+ """Get the zs'th quantiles given likelihood parameters"""
714
+ if likelihood.startswith("SHASH"):
715
+ if likelihood == "SHASHo":
716
+ quantiles = S_inv(zs, epsilon, delta) * sigma + mu
717
+ elif likelihood == "SHASHo2":
718
+ sigma_d = sigma / delta
719
+ quantiles = S_inv(zs, epsilon, delta) * sigma_d + mu
720
+ elif likelihood == "SHASHb":
721
+ true_mu = m(epsilon, delta, 1)
722
+ true_sigma = np.sqrt((m(epsilon, delta, 2) - true_mu**2))
723
+ SHASH_c = (S_inv(zs, epsilon, delta) - true_mu) / true_sigma
724
+ quantiles = SHASH_c * sigma + mu
725
+ elif likelihood == "Normal":
726
+ quantiles = zs * sigma + mu
727
+ else:
728
+ exit("Unsupported likelihood")
729
+ return quantiles
730
+
731
+
732
+ def z_score(mu, sigma, epsilon=None, delta=None, y=None, likelihood="Normal"):
733
+ """Get the z-scores of Y, given likelihood parameters"""
734
+ if likelihood.startswith("SHASH"):
735
+ if likelihood == "SHASHo":
736
+ SHASH = (y - mu) / sigma
737
+ Z = np.sinh(np.arcsinh(SHASH) * delta - epsilon)
738
+ elif likelihood == "SHASHo2":
739
+ sigma_d = sigma / delta
740
+ SHASH = (y - mu) / sigma_d
741
+ Z = np.sinh(np.arcsinh(SHASH) * delta - epsilon)
742
+ elif likelihood == "SHASHb":
743
+ true_mu = m(epsilon, delta, 1)
744
+ true_sigma = np.sqrt((m(epsilon, delta, 2) - true_mu**2))
745
+ SHASH_c = (y - mu) / sigma
746
+ SHASH = SHASH_c * true_sigma + true_mu
747
+ Z = np.sinh(np.arcsinh(SHASH) * delta - epsilon)
748
+ elif likelihood == "Normal":
749
+ Z = (y - mu) / sigma
750
+ else:
751
+ exit("Unsupported likelihood")
752
+ return Z