pcntoolkit 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1584 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Thu Jul 25 13:23:15 2019
5
+
6
+ @author: seykia
7
+ @author: augub
8
+ """
9
+
10
+ from __future__ import print_function
11
+ from __future__ import division
12
+ from collections import OrderedDict
13
+
14
+ import numpy as np
15
+ import pymc as pm
16
+ import pytensor
17
+ import arviz as az
18
+ import xarray
19
+ from itertools import product
20
+ from functools import reduce
21
+ from scipy import stats
22
+
23
+ from util.utils import create_poly_basis
24
+ from util.utils import expand_all
25
+ from pcntoolkit.util.utils import cartesian_product
26
+ from pcntoolkit.util.bspline import BSplineBasis
27
+ from pcntoolkit.model.SHASH import *
28
+
29
+
30
+ def create_poly_basis(X, order):
31
+ """
32
+ Create a polynomial basis expansion of the specified order
33
+ :param X: [N×P] array of clinical covariates
34
+ :param order: order of the polynomial
35
+ :return: a [N×(P×order)] array of transformed data
36
+ """
37
+ if len(X.shape) == 1:
38
+ X = X[:, np.newaxis]
39
+ D = X.shape[1]
40
+ Phi = np.zeros((X.shape[0], D * order))
41
+ colid = np.arange(0, D)
42
+ for d in range(1, order + 1):
43
+ Phi[:, colid] = X**d
44
+ colid += D
45
+ return Phi
46
+
47
+
48
+ def from_posterior(param, samples, shape, distribution=None, dims=None, half=False, freedom=1):
49
+ """
50
+ Create a PyMC distribution from posterior samples
51
+
52
+ :param param: name of the parameter
53
+ :param samples: samples from the posterior
54
+ :param shape: shape of the parameter
55
+ :param distribution: distribution to use for the parameter
56
+ :param dims: dims of the parameter
57
+ :param half: if true, the distribution is assumed to be defined on the positive real line
58
+ :param freedom: freedom parameter for the distribution
59
+ :return: a PyMC distribution
60
+ """
61
+ if dims == []:
62
+ dims = None
63
+ if distribution is None:
64
+ smin, smax = np.min(samples), np.max(samples)
65
+ width = smax - smin
66
+ x = np.linspace(smin, smax, 1000)
67
+ y = stats.gaussian_kde(np.ravel(samples))(x)
68
+ if half:
69
+ x = np.concatenate([x, [x[-1] + 0.1 * width]])
70
+ y = np.concatenate([y, [0]])
71
+ else:
72
+ x = np.concatenate(
73
+ [[x[0] - 0.1 * width], x, [x[-1] + 0.1 * width]])
74
+ y = np.concatenate([[0], y, [0]])
75
+ if shape is None:
76
+ return pm.distributions.Interpolated(param, x, y)
77
+ else:
78
+ return pm.distributions.Interpolated(param, x, y, shape=shape, dims=dims)
79
+ elif distribution == "normal":
80
+ temp = stats.norm.fit(samples)
81
+ if shape is None:
82
+ return pm.Normal(param, mu=temp[0], sigma=freedom * temp[1])
83
+ else:
84
+ return pm.Normal(param, mu=temp[0], sigma=freedom * temp[1], shape=shape, dims=dims)
85
+ elif distribution == "hnormal":
86
+ temp = stats.halfnorm.fit(samples)
87
+ if shape is None:
88
+ return pm.HalfNormal(param, sigma=freedom * temp[1])
89
+ else:
90
+ return pm.HalfNormal(param, sigma=freedom * temp[1], shape=shape, dims=dims)
91
+ elif distribution == "hcauchy":
92
+ temp = stats.halfcauchy.fit(samples)
93
+ if shape is None:
94
+ return pm.HalfCauchy(param, freedom * temp[1])
95
+ else:
96
+ return pm.HalfCauchy(param, freedom * temp[1], shape=shape, dims=dims)
97
+ elif distribution == "uniform":
98
+ upper_bound = np.percentile(samples, 95)
99
+ lower_bound = np.percentile(samples, 5)
100
+ r = np.abs(upper_bound - lower_bound)
101
+ if shape is None:
102
+ return pm.Uniform(
103
+ param, lower=lower_bound - freedom * r, upper=upper_bound + freedom * r
104
+ )
105
+ else:
106
+ return pm.Uniform(
107
+ param,
108
+ lower=lower_bound - freedom * r,
109
+ upper=upper_bound + freedom * r,
110
+ shape=shape,
111
+ dims=dims,
112
+ )
113
+ elif distribution == "huniform":
114
+ upper_bound = np.percentile(samples, 95)
115
+ lower_bound = np.percentile(samples, 5)
116
+ r = np.abs(upper_bound - lower_bound)
117
+ if shape is None:
118
+ return pm.Uniform(param, lower=0, upper=upper_bound + freedom * r)
119
+ else:
120
+ return pm.Uniform(
121
+ param, lower=0, upper=upper_bound + freedom * r, shape=shape, dims=dims
122
+ )
123
+
124
+ elif distribution == "gamma":
125
+ alpha_fit, loc_fit, invbeta_fit = stats.gamma.fit(samples)
126
+ if shape is None:
127
+ return pm.Gamma(
128
+ param, alpha=freedom * alpha_fit, beta=freedom / invbeta_fit
129
+ )
130
+ else:
131
+ return pm.Gamma(
132
+ param,
133
+ alpha=freedom * alpha_fit,
134
+ beta=freedom / invbeta_fit,
135
+ shape=shape,
136
+ dims=dims,
137
+ )
138
+
139
+ elif distribution == "igamma":
140
+ alpha_fit, loc_fit, beta_fit = stats.gamma.fit(samples)
141
+ if shape is None:
142
+ return pm.InverseGamma(
143
+ param, alpha=freedom * alpha_fit, beta=freedom * beta_fit
144
+ )
145
+ else:
146
+ return pm.InverseGamma(
147
+ param, alpha=freedom * alpha_fit, beta=freedom * beta_fit, shape=shape, dims=dims
148
+ )
149
+
150
+
151
+ def hbr(X, y, batch_effects, configs, idata=None):
152
+ """
153
+ Create a Hierarchical Bayesian Regression model
154
+
155
+ :param X: [N×P] array of clinical covariates
156
+ :param y: [N×1] array of neuroimaging measures
157
+ :param batch_effects: [N×M] array of batch effects
158
+ :param configs:
159
+ :param idata:
160
+ :param return_shared_variables: If true, returns references to the shared variables. The values of the shared variables can be set manually, allowing running the same model on different data without re-compiling it.
161
+ :return:
162
+ """
163
+
164
+ # Make a param builder that contains all the data and configs
165
+ pb = ParamBuilder(X, y, batch_effects, idata, configs)
166
+
167
+ def get_sample_dims(var):
168
+ if configs[f'random_{var}']:
169
+ return 'datapoints'
170
+ elif configs[f'random_slope_{var}']:
171
+ return 'datapoints'
172
+ elif configs[f'random_intercept_{var}']:
173
+ return 'datapoints'
174
+ elif configs[f'linear_{var}']:
175
+ return 'datapoints'
176
+ return None
177
+
178
+ with pm.Model(coords=pb.coords) as model:
179
+ model.add_coord("datapoints", np.arange(X.shape[0]))
180
+ X = pm.Data("X", X, dims=("datapoints", "basis_functions"))
181
+ pb.X = X
182
+ y = pm.Data("y", np.squeeze(y), dims="datapoints")
183
+ pb.y = y
184
+ pb.model = model
185
+ pb.batch_effect_indices = tuple(
186
+ [
187
+ pm.Data(
188
+ pb.batch_effect_dim_names[i]+"_data",
189
+ pb.batch_effect_indices[i],
190
+ dims="datapoints",
191
+ )
192
+ for i in range(len(pb.batch_effect_indices))
193
+ ]
194
+ )
195
+
196
+ if configs["likelihood"] == "Normal":
197
+ mu = pm.Deterministic(
198
+ "mu_samples",
199
+ pb.make_param(
200
+ "mu",
201
+ intercept_mu_params=(0.0, 10.0),
202
+ slope_mu_params=(0.0, 10.0),
203
+ mu_slope_mu_params=(0.0, 10.0),
204
+ sigma_slope_mu_params=(10.0,),
205
+ mu_intercept_mu_params=(0.0, 10.0),
206
+ sigma_intercept_mu_params=(10.0,),
207
+ ).get_samples(pb),
208
+ dims=get_sample_dims('mu'),
209
+ )
210
+ sigma = pm.Deterministic(
211
+ "sigma_samples",
212
+ pb.make_param(
213
+ "sigma",
214
+ sigma_params=(10., 10.0),
215
+ sigma_dist="normal",
216
+ slope_sigma_params=(0.0, 10.0),
217
+ intercept_sigma_params=(10.0, 10.0),
218
+ ).get_samples(pb),
219
+ dims=get_sample_dims('sigma'),
220
+ )
221
+ sigma_plus = pm.Deterministic(
222
+ "sigma_plus_samples", np.log(1+np.exp(sigma/10))*10, dims=get_sample_dims('sigma')
223
+ )
224
+ y_like = pm.Normal(
225
+ "y_like",
226
+ mu=mu,
227
+ sigma=sigma_plus,
228
+ observed=y,
229
+ dims="datapoints",
230
+ )
231
+
232
+ elif configs["likelihood"] in ["SHASHb", "SHASHo", "SHASHo2"]:
233
+ """
234
+ Comment 1
235
+ The current parameterizations are tuned towards standardized in- and output data.
236
+ It is possible to adjust the priors through the XXX_dist and XXX_params kwargs, like here we do with epsilon_params.
237
+ Supported distributions are listed in the Prior class.
238
+ Comment 2
239
+ Any mapping that is applied here after sampling should also be applied in util.hbr_utils.forward in order for the functions there to properly work.
240
+ For example, the softplus applied to sigma here is also applied in util.hbr_utils.forward
241
+ """
242
+ SHASH_map = {"SHASHb": SHASHb,
243
+ "SHASHo": SHASHo, "SHASHo2": SHASHo2}
244
+
245
+ mu = pm.Deterministic(
246
+ "mu_samples",
247
+ pb.make_param(
248
+ "mu",
249
+ intercept_mu_params=(0.0, 10.0),
250
+ slope_mu_params=(0.0, 10.0),
251
+ mu_slope_mu_params=(0.0, 10.0),
252
+ sigma_slope_mu_params=(10.0,),
253
+ mu_intercept_mu_params=(0.0, 10.0),
254
+ sigma_intercept_mu_params=(10.0,),
255
+ ).get_samples(pb),
256
+ dims=get_sample_dims('mu'),
257
+ )
258
+ sigma = pm.Deterministic(
259
+ "sigma_samples",
260
+ pb.make_param(
261
+ "sigma",
262
+ sigma_params=(10., 10.0),
263
+ sigma_dist="normal",
264
+ slope_sigma_params=(0.0, 10.0),
265
+ intercept_sigma_params=(10.0, 10.0),
266
+ ).get_samples(pb),
267
+ dims=get_sample_dims('sigma'),
268
+ )
269
+ sigma_plus = pm.Deterministic(
270
+ "sigma_plus_samples", np.log(1+np.exp(sigma/10))*10, dims=get_sample_dims('sigma')
271
+ )
272
+ epsilon = pm.Deterministic(
273
+ "epsilon_samples",
274
+ pb.make_param(
275
+ "epsilon",
276
+ epsilon_params=(0.0, 2.0),
277
+ slope_epsilon_params=(0.0, 3.0),
278
+ intercept_epsilon_params=(0.0, 3.0),
279
+ ).get_samples(pb),
280
+ dims=get_sample_dims('epsilon'),
281
+ )
282
+ delta = pm.Deterministic(
283
+ "delta_samples",
284
+ pb.make_param(
285
+ "delta",
286
+ delta_params=(0., 2.0),
287
+ delta_dist="normal",
288
+ slope_delta_params=(0.0, 1.0),
289
+ intercept_delta_params=(0.0, 1.0),
290
+ ).get_samples(pb),
291
+ dims=get_sample_dims('delta'),
292
+ )
293
+ delta_plus = pm.Deterministic(
294
+ "delta_plus_samples",
295
+ np.log(1+np.exp(delta/3))*3 + 0.3,
296
+ dims=get_sample_dims('delta'),
297
+ )
298
+ y_like = SHASH_map[configs["likelihood"]](
299
+ "y_like",
300
+ mu=mu,
301
+ sigma=sigma_plus,
302
+ epsilon=epsilon,
303
+ delta=delta_plus,
304
+ observed=y,
305
+ dims="datapoints",
306
+ )
307
+ return model
308
+
309
+
310
+ class HBR:
311
+
312
+ """Hierarchical Bayesian Regression for normative modeling
313
+
314
+ Basic usage::
315
+
316
+ model = HBR(configs)
317
+ idata = model.estimate(X, y, batch_effects)
318
+ ys,s2 = model.predict(X, batch_effects)
319
+
320
+ where the variables are
321
+
322
+ :param configs: a dictionary of model configurations.
323
+ :param X: N-by-P input matrix of P features for N subjects
324
+ :param y: N-by-1 vector of outputs.
325
+ :param batch_effects: N-by-B matrix of B batch ids for N subjects.
326
+
327
+ :returns: * ys - predictive mean
328
+ * s2 - predictive variance
329
+
330
+ Written by S.M. Kia
331
+ """
332
+
333
+ def __init__(self, configs):
334
+ self.bsp = None
335
+ self.model_type = configs["type"]
336
+ self.configs = configs
337
+
338
+ def get_modeler(self):
339
+ """
340
+ This used to return hbr or nnhbr, but now it returns hbr.
341
+ Can be removed in a future release
342
+ //TODO remove this in a future release
343
+ """
344
+ return hbr
345
+
346
+ def transform_X(self, X, adapt=False):
347
+ """
348
+ Transform the covariates according to the model type
349
+
350
+ :param X: N-by-P input matrix of P features for N subjects
351
+ :return: transformed covariates
352
+ :adapt: Set to true when range adaptation for bspline is needed (for example in the
353
+ transfer scenario)
354
+ """
355
+ if self.model_type == "polynomial":
356
+ Phi = create_poly_basis(X, self.configs["order"])
357
+ elif self.model_type == "bspline":
358
+ if self.bsp is None:
359
+ self.bsp = BSplineBasis(order=self.configs["order"],
360
+ nknots=self.configs["nknots"])
361
+ self.bsp.fit(X)
362
+ #self.bsp = bspline_fit(
363
+ # X, self.configs["order"], self.configs["nknots"])
364
+ elif adapt:
365
+ self.bsp.adapt(X)
366
+
367
+ bspline = self.bsp.transform(X)
368
+ #bspline = bspline_transform(X, self.bsp)
369
+ Phi = np.concatenate((X, bspline), axis=1)
370
+ else:
371
+ Phi = X
372
+ return Phi
373
+
374
+ def find_map(self, X, y, batch_effects, method="L-BFGS-B"):
375
+ """
376
+ Find the maximum a posteriori (MAP) estimate of the model parameters.
377
+
378
+ This function transforms the data according to the model type,
379
+ and then uses the modeler to find the MAP estimate of the model parameters.
380
+ The results are stored in the instance variable `MAP`.
381
+
382
+ :param X: N-by-P input matrix of P features for N subjects. This is the input data for the model.
383
+ :param y: N-by-1 vector of outputs. This is the target data for the model.
384
+ :param batch_effects: N-by-B matrix of B batch ids for N subjects. This represents the batch effects to be considered in the model.
385
+ :param method: String representing the optimization method to use. Default is "L-BFGS-B".
386
+ :return: A dictionary of MAP estimates.
387
+ """
388
+ X, y, batch_effects = expand_all(X, y, batch_effects)
389
+ X = self.transform_X(X)
390
+ modeler = self.get_modeler()
391
+ with modeler(X, y, batch_effects, self.configs) as m:
392
+ self.MAP = pm.find_MAP(method=method)
393
+ return self.MAP
394
+
395
+ def estimate(self, X, y, batch_effects, **kwargs):
396
+ """
397
+ Estimate the model parameters using the provided data.
398
+
399
+ This function transforms the data according to the model type,
400
+ and then samples from the posterior using pymc. The results are stored
401
+ in the instance variable `idata`.
402
+
403
+ :param X: N-by-P input matrix of P features for N subjects. This is the input data for the model.
404
+ :param y: N-by-1 vector of outputs. This is the target data for the model.
405
+ :param batch_effects: N-by-B matrix of B batch ids for N subjects. This represents the batch effects to be considered in the model.
406
+ :param kwargs: Additional keyword arguments to be passed to the modeler.
407
+ :return: idata. The results are also stored in the instance variable `self.idata`.
408
+ """
409
+ X, y, batch_effects = expand_all(X, y, batch_effects)
410
+
411
+ self.batch_effects_num = batch_effects.shape[1]
412
+ self.batch_effects_size = [len(np.unique(batch_effects[:,i])) for i in range(self.batch_effects_num)]
413
+
414
+ X = self.transform_X(X)
415
+ modeler = self.get_modeler()
416
+ if hasattr(self, 'idata'):
417
+ del self.idata
418
+ with modeler(X, y, batch_effects, self.configs) as m:
419
+ self.idata = pm.sample(
420
+ draws=self.configs["n_samples"],
421
+ tune=self.configs["n_tuning"],
422
+ chains=self.configs["n_chains"],
423
+ init=self.configs["init"],
424
+ n_init=500000,
425
+ cores=self.configs["cores"],
426
+ nuts_sampler=self.configs["nuts_sampler"],
427
+ )
428
+ self.vars_to_sample = ['y_like']
429
+ if self.configs['remove_datapoints_from_posterior']:
430
+ chain = self.idata.posterior.coords['chain'].data
431
+ draw = self.idata.posterior.coords['draw'].data
432
+ for j in self.idata.posterior.variables.mapping.keys():
433
+ if j.endswith('_samples'):
434
+ dummy_array = xarray.DataArray(data=np.zeros((len(chain), len(draw), 1)), coords={
435
+ 'chain': chain, 'draw': draw, 'empty': np.array([0])}, name=j)
436
+ self.idata.posterior[j] = dummy_array
437
+ self.vars_to_sample.append(j)
438
+
439
+ # zero-out all data
440
+ for i in self.idata.constant_data.data_vars:
441
+ self.idata.constant_data[i] *= 0
442
+ for i in self.idata.observed_data.data_vars:
443
+ self.idata.observed_data[i] *= 0
444
+
445
+ return self.idata
446
+
447
+ def predict(
448
+ self, X, batch_effects, batch_effects_maps, pred="single", var_names=None, **kwargs
449
+ ):
450
+ """
451
+ Make predictions from the model.
452
+
453
+ This function expands the input data, transforms it according to the model type,
454
+ and then uses the modeler to make predictions. The results are stored in the instance variable `idata`.
455
+
456
+ :param X: Covariates. This is the input data for the model.
457
+ :param batch_effects: Batch effects corresponding to X. This represents the batch effects to be considered in the model.
458
+ :param batch_effects_maps: A map from batch_effect values to indices. This is used to map the batch effects to the indices used by the model.
459
+ :param pred: String representing the prediction method to use. Default is "single".
460
+ :param var_names: List of variable names to consider in the prediction. If None or ['y_like'], self.vars_to_sample is used.
461
+ :param kwargs: Additional keyword arguments to be passed to the modeler.
462
+ :return: A 2-tuple of xarray datasets with the mean and variance of the posterior predictive distribution. The results are also stored in the instance variable `self.idata`.
463
+ """
464
+ X, batch_effects = expand_all(X, batch_effects)
465
+
466
+ samples = self.configs["n_samples"]
467
+ y = np.zeros([X.shape[0], 1])
468
+ X = self.transform_X(X)
469
+ modeler = self.get_modeler()
470
+
471
+ # Make an array with occurences of all the values in be_train, but with the same size as be_test
472
+ truncated_batch_effects_train = np.stack(
473
+ [
474
+ np.resize(
475
+ np.array(list(batch_effects_maps[i].keys())), X.shape[0])
476
+ for i in range(batch_effects.shape[1])
477
+ ],
478
+ axis=1,
479
+ )
480
+
481
+ # See if a list of var_names is provided, set to self.vars_to_sample otherwise
482
+ if (var_names is None) or (var_names == ['y_like']):
483
+ var_names = self.vars_to_sample
484
+
485
+ n_samples = X.shape[0]
486
+
487
+ # Need to delete self.idata.posterior_predictive, otherwise, if it exists, it will not be overwritten
488
+ if hasattr(self.idata, 'posterior_predictive'):
489
+ del self.idata.posterior_predictive
490
+
491
+ with modeler(X, y, truncated_batch_effects_train, self.configs) as model:
492
+ # For each batch effect dim
493
+ for i in range(batch_effects.shape[1]):
494
+ # Make a map that maps batch effect values to their index
495
+ valmap = batch_effects_maps[i]
496
+ # Compute those indices for the test data
497
+ indices = list(map(lambda x: valmap[x], batch_effects[:, i]))
498
+ # Those indices need to be used by the model
499
+ pm.set_data({f"batch_effect_{i}_data": indices})
500
+
501
+ self.idata = pm.sample_posterior_predictive(
502
+ trace=self.idata,
503
+ extend_inferencedata=True,
504
+ progressbar=True,
505
+ var_names=var_names
506
+ )
507
+ pred_mean = self.idata.posterior_predictive["y_like"].to_numpy().mean(
508
+ axis=(0, 1))
509
+ pred_var = self.idata.posterior_predictive["y_like"].to_numpy().var(
510
+ axis=(0, 1))
511
+
512
+ return pred_mean, pred_var
513
+
514
+ def transfer(self, X, y, batch_effects):
515
+
516
+ """
517
+ This function is used to transfer a reference model (i.e. the source model that is estimated on source big datasets)
518
+ to data from new sites (i.e. target data). It uses the posterior
519
+ of the reference model as a prior for the target model.
520
+
521
+ :param X: Covariates. This is the input data for the model.
522
+ :param y: Outputs. This is the target data for the model.
523
+ :param batch_effects: Batch effects corresponding to X. This represents the batch effects to be considered in the model.
524
+ :return: An inferencedata object containing samples from the posterior distribution.
525
+ """
526
+ X, y, batch_effects = expand_all(X, y, batch_effects)
527
+
528
+ self.batch_effects_num = batch_effects.shape[1]
529
+ self.batch_effects_size = [len(np.unique(batch_effects[:,i])) for i in range(self.batch_effects_num)]
530
+
531
+
532
+ X = self.transform_X(X, adapt=True)
533
+ modeler = self.get_modeler()
534
+ with modeler(X, y, batch_effects, self.configs, idata=self.idata) as m:
535
+ self.idata = pm.sample(
536
+ self.configs["n_samples"],
537
+ tune=self.configs["n_tuning"],
538
+ chains=self.configs["n_chains"],
539
+ target_accept=self.configs["target_accept"],
540
+ init=self.configs["init"],
541
+ n_init=500000,
542
+ cores=self.configs["cores"],
543
+ nuts_sampler=self.configs["nuts_sampler"],
544
+ )
545
+
546
+ self.vars_to_sample = ['y_like']
547
+
548
+ # This part is for data privacy
549
+ if self.configs['remove_datapoints_from_posterior']:
550
+ chain = self.idata.posterior.coords['chain'].data
551
+ draw = self.idata.posterior.coords['draw'].data
552
+ for j in self.idata.posterior.variables.mapping.keys():
553
+ if j.endswith('_samples'):
554
+ dummy_array = xarray.DataArray(data=np.zeros((len(chain), len(draw), 1)), coords={
555
+ 'chain': chain, 'draw': draw, 'empty': np.array([0])}, name=j)
556
+ self.idata.posterior[j] = dummy_array
557
+ self.vars_to_sample.append(j)
558
+
559
+ # zero-out all data
560
+ for i in self.idata.constant_data.data_vars:
561
+ self.idata.constant_data[i] *= 0
562
+ for i in self.idata.observed_data.data_vars:
563
+ self.idata.observed_data[i] *= 0
564
+
565
+ return self.idata
566
+
567
+
568
+ def generate(self, X, batch_effects, samples, batch_effects_maps, var_names=None):
569
+ """
570
+ Generate samples from the posterior predictive distribution.
571
+
572
+ This function expands and transforms the input data, then uses the modeler to generate samples from the posterior predictive distribution.
573
+
574
+ :param X: Covariates. This is the input data for the model.
575
+ :param batch_effects: Batch effects corresponding to X. This represents the batch effects to be considered in the model.
576
+ :param samples: Number of samples to generate. This number of samples is generated for each input sample.
577
+ :return: A tuple containing the expanded and repeated X, batch_effects, and the generated samples.
578
+ """
579
+ X, batch_effects = expand_all(X, batch_effects)
580
+
581
+ y = np.zeros([X.shape[0], 1])
582
+
583
+ X_transformed = self.transform_X(X)
584
+ modeler = self.get_modeler()
585
+
586
+ # See if a list of var_names is provided, set to self.vars_to_sample otherwise
587
+ if (var_names is None) or (var_names == ['y_like']):
588
+ var_names = self.vars_to_sample
589
+
590
+ # Need to delete self.idata.posterior_predictive, otherwise, if it exists, it will not be overwritten
591
+ if hasattr(self.idata, 'posterior_predictive'):
592
+ del self.idata.posterior_predictive
593
+
594
+ with modeler(X_transformed, y, batch_effects, self.configs):
595
+ # For each batch effect dim
596
+ for i in range(batch_effects.shape[1]):
597
+ # Make a map that maps batch effect values to their index
598
+ valmap = batch_effects_maps[i]
599
+ # Compute those indices for the test data
600
+ indices = list(map(lambda x: valmap[x], batch_effects[:, i]))
601
+ # Those indices need to be used by the model
602
+ pm.set_data({f"batch_effect_{i}_data": indices})
603
+
604
+ self.idata = pm.sample_posterior_predictive(
605
+ trace=self.idata,
606
+ extend_inferencedata=True,
607
+ progressbar=True,
608
+ var_names=var_names
609
+ )
610
+
611
+ generated_samples = np.reshape(self.idata.posterior_predictive["y_like"].to_numpy()[0,0:samples,:].T,
612
+ [X.shape[0] * samples, 1])
613
+
614
+ X = np.repeat(X, samples, axis=0)
615
+ if len(X.shape) == 1:
616
+ X = np.expand_dims(X, axis=1)
617
+ batch_effects = np.repeat(batch_effects, samples, axis=0)
618
+ if len(batch_effects.shape) == 1:
619
+ batch_effects = np.expand_dims(batch_effects, axis=1)
620
+ return X, batch_effects, generated_samples
621
+
622
+
623
+ def sample_prior_predictive(self, X, batch_effects, samples, y=None, idata=None):
624
+ """
625
+ Sample from the prior predictive distribution.
626
+
627
+ This function transforms the input data, then uses the modeler to sample from the prior predictive distribution.
628
+
629
+ :param X: Covariates. This is the input data for the model.
630
+ :param batch_effects: Batch effects corresponding to X. This represents the batch effects to be considered in the model.
631
+ :param samples: Number of samples to generate. This number of samples is generated for each input sample.
632
+ :param y: Outputs. If None, a zero array of appropriate shape is created.
633
+ :param idata: An xarray dataset with the posterior distribution. If None, self.idata is used if it exists.
634
+ :return: An xarray dataset with the prior predictive distribution. The results are also stored in the instance variable `self.idata`.
635
+ """
636
+ if y is None:
637
+ y = np.zeros([X.shape[0], 1])
638
+ X, y, batch_effects = expand_all(X, y, batch_effects)
639
+
640
+ X = self.transform_X(X)
641
+ modeler = self.get_modeler()
642
+ with modeler(X, y, batch_effects, self.configs, idata):
643
+ self.idata = pm.sample_prior_predictive(samples=samples)
644
+ return self.idata
645
+
646
+ def get_model(self, X, y, batch_effects):
647
+ """
648
+ Get the model for the given data.
649
+
650
+ This function expands and transforms the input data, then creates a pymc model using the hbr method
651
+
652
+ :param X: Covariates. This is the input data for the model.
653
+ :param y: Outputs. This is the target data for the model.
654
+ :param batch_effects: Batch effects corresponding to X. This represents the batch effects to be considered in the model.
655
+ :return: The model for the given data.
656
+ """
657
+ X, y, batch_effects = expand_all(X, y, batch_effects)
658
+ modeler = self.get_modeler()
659
+ X = self.transform_X(X)
660
+ idata = self.idata if hasattr(self, "idata") else None
661
+ return modeler(X, y, batch_effects, self.configs, idata=idata)
662
+
663
+ def create_dummy_inputs(self, X, step_size=0.05):
664
+ """
665
+ Create dummy inputs for the model based on the input covariates.
666
+
667
+ This function generates a Cartesian product of the covariate ranges determined from the input X
668
+ (min and max values of each covariate). It repeats this for each batch effect.
669
+ It also generates a Cartesian product of the batch effect indices and repeats it for each input sample.
670
+
671
+ :param X: 2D numpy array, where rows are samples and columns are covariates.
672
+ :param step_size: Step size for generating ranges for each covariate. Default is 0.05.
673
+ :return: A tuple containing the dummy input data and the dummy batch effects.
674
+ """
675
+ arrays = []
676
+ for i in range(X.shape[1]):
677
+ cov_min = np.min(X[:, i])
678
+ cov_max = np.max(X[:, i])
679
+ arrays.append(np.arange(cov_min, cov_max + step_size, step_size))
680
+
681
+ X_dummy = cartesian_product(arrays)
682
+ X_dummy = np.concatenate(
683
+ [X_dummy for _ in range(np.prod(self.batch_effects_size))]
684
+ )
685
+
686
+ arrays = []
687
+ for i in range(self.batch_effects_num):
688
+ arrays.append(np.arange(0, self.batch_effects_size[i]))
689
+
690
+ batch_effects = cartesian_product(arrays)
691
+ batch_effects_dummy = np.repeat(batch_effects, X_dummy.shape[0] // np.prod(self.batch_effects_size), axis=0)
692
+
693
+ return X_dummy, batch_effects_dummy
694
+
695
+
696
+ def Rhats(self, var_names=None, thin=1, resolution=100):
697
+ """
698
+ Get Rhat of posterior samples as function of sampling iteration.
699
+
700
+ This function extracts the posterior samples from the instance variable `idata`, computes the Rhat statistic for each variable and sampling iteration, and returns a dictionary of Rhat values.
701
+
702
+ :param var_names: List of variable names to consider. If None, all variables in `idata` are used.
703
+ :param thin: Integer representing the thinning factor for the samples. Default is 1.
704
+ :param resolution: Integer representing the number of points at which to compute the Rhat statistic. Default is 100.
705
+ :return: A dictionary where the keys are variable names and the values are arrays of Rhat values.
706
+ """
707
+ idata = self.idata
708
+ testvars = az.extract(idata, group='posterior',
709
+ var_names=var_names, combined=False)
710
+ testvar_names = [var for var in list(
711
+ testvars.data_vars.keys()) if not '_samples' in var]
712
+ rhat_dict = {}
713
+ for var_name in testvar_names:
714
+ var = np.stack(testvars[var_name].to_numpy())[:, ::thin]
715
+ var = var.reshape((var.shape[0], var.shape[1], -1))
716
+ vardim = var.shape[2]
717
+ interval_skip = var.shape[1]//resolution
718
+ rhats_var = np.zeros((resolution, vardim))
719
+ for v in range(vardim):
720
+ for j in range(resolution):
721
+ rhats_var[j, v] = az.rhat(var[:, :j*interval_skip, v])
722
+ rhat_dict[var_name] = rhats_var
723
+ return rhat_dict
724
+
725
+
726
+ class Prior:
727
+ """
728
+ A wrapper class for a PyMC distribution.
729
+ - creates a fitted distribution from the idata, if one is present
730
+ - overloads the __getitem__ function with something that switches between indexing or not, based on the shape
731
+ """
732
+
733
+ def __init__(self, name, dist, params, pb, has_random_effect=False) -> None:
734
+ """
735
+ Initialize the Prior object.
736
+
737
+ This function initializes the Prior object with the given name, distribution, parameters, and model.
738
+ It also sets a flag indicating whether the prior has a random effect.
739
+
740
+ :param name: String representing the name of the prior.
741
+ :param dist: String representing the type of the distribution.
742
+ :param params: Dictionary of parameters for the distribution.
743
+ :param pb: The model object.
744
+ :param has_random_effect: Boolean indicating whether the prior has a random effect. Default is False.
745
+ """
746
+ self.dist = None
747
+ self.name = name
748
+ self.has_random_effect = has_random_effect
749
+ self.distmap = {
750
+ "normal": pm.Normal,
751
+ "hnormal": pm.HalfNormal,
752
+ "gamma": pm.Gamma,
753
+ "uniform": pm.Uniform,
754
+ "igamma": pm.InverseGamma,
755
+ "hcauchy": pm.HalfCauchy,
756
+ "hstudt": pm.HalfStudentT,
757
+ "studt": pm.StudentT,
758
+ "lognormal": pm.LogNormal,
759
+ }
760
+ self.make_dist(dist, params, pb)
761
+
762
+ def make_dist(self, dist, params, pb):
763
+ """
764
+ Create a PyMC distribution.
765
+
766
+ This function creates a PyMC distribution. If there is an `idata` present, the distribution is fitted to the `idata`.
767
+ If there isn't an `idata`, the prior is parameterized by the values in `params`.
768
+
769
+ :param dist: String representing the type of the distribution.
770
+ :param params: List of parameters for the distribution.
771
+ :param pb: The model object.
772
+ """
773
+ with pb.model as m:
774
+ if pb.idata is not None:
775
+ # Get samples
776
+ samples = az.extract(pb.idata, var_names=self.name)
777
+ # Define mapping to new shape
778
+
779
+ def get_new_dim_size(tup):
780
+ oldsize, name = tup
781
+ if name.startswith('batch_effect_'):
782
+ ind = pb.batch_effect_dim_names.index(name)
783
+ return len(np.unique(pb.batch_effect_indices[ind].container.data))
784
+ return oldsize
785
+
786
+ new_shape = list(
787
+ map(get_new_dim_size, zip(samples.shape, samples.dims)))
788
+ if len(new_shape) == 1:
789
+ new_shape = None
790
+ else:
791
+ new_shape = new_shape[:-1]
792
+
793
+ dims = []
794
+ if self.has_random_effect:
795
+ dims = dims + pb.batch_effect_dim_names
796
+ if self.name.startswith("slope") or self.name.startswith("offset_slope"):
797
+ dims = dims + ["basis_functions"]
798
+ self.dist = from_posterior(
799
+ param=self.name,
800
+ samples=samples.to_numpy(),
801
+ shape=new_shape,
802
+ distribution=dist,
803
+ dims=dims,
804
+ freedom=pb.configs["freedom"],
805
+ )
806
+
807
+ else:
808
+ dims = []
809
+ if self.has_random_effect:
810
+ dims = dims + pb.batch_effect_dim_names
811
+ if self.name.startswith("slope") or self.name.startswith("offset_slope"):
812
+ dims = dims + ["basis_functions"]
813
+ if dims == []:
814
+ self.dist = self.distmap[dist](self.name, *params)
815
+ else:
816
+ self.dist = self.distmap[dist](
817
+ self.name, *params, dims=dims)
818
+
819
+ def __getitem__(self, idx):
820
+ """
821
+ Retrieve the distribution for a specific batch effect.
822
+
823
+ This function retrieves the distribution for a specific batch effect.
824
+ If the prior does not model batch effects, this should return the same value for each index.
825
+
826
+ :param idx: Index of the batch effect.
827
+ :return: The distribution for the specified batch effect.
828
+ """
829
+ assert self.dist is not None, "Distribution not initialized"
830
+ if self.has_random_effect:
831
+ return self.dist[idx]
832
+ else:
833
+ return self.dist
834
+
835
+
836
+ class ParamBuilder:
837
+ """
838
+ A class that simplifies the construction of parameterizations.
839
+ It has a lot of attributes necessary for creating the model, including the data, but it is never saved with the model.
840
+ It also contains a lot of decision logic for creating the parameterizations.
841
+ """
842
+
843
+ def __init__(self, X, y, batch_effects, idata, configs):
844
+ """
845
+ :param model: model to attach all the distributions to
846
+ :param X: Covariates
847
+ :param y: IDPs
848
+ :param batch_effects: array of batch effects
849
+ :param idata: idem
850
+ :param configs: idem
851
+ """
852
+ self.model = None # Needs to be set later, because coords need to be passed at construction of Model
853
+ self.X = X
854
+ self.n_basis_functions = X.shape[1]
855
+ self.y = y
856
+ self.batch_effects = batch_effects.astype(np.int16)
857
+ self.idata: az.InferenceData = idata
858
+ self.configs = configs
859
+
860
+ self.y_shape = y.shape
861
+ self.n_ys = y.shape[0]
862
+ self.batch_effects_num = batch_effects.shape[1]
863
+
864
+ self.batch_effect_dim_names = []
865
+ self.batch_effect_indices = []
866
+ self.coords = OrderedDict()
867
+ self.coords["basis_functions"] = np.arange(self.n_basis_functions)
868
+
869
+ for i in range(self.batch_effects_num):
870
+ batch_effect_dim_name = f"batch_effect_{i}"
871
+ self.batch_effect_dim_names.append(batch_effect_dim_name)
872
+ this_be_values, this_be_indices = np.unique(
873
+ self.batch_effects[:, i], return_inverse=True
874
+ )
875
+ self.coords[batch_effect_dim_name] = this_be_values
876
+ self.batch_effect_indices.append(this_be_indices)
877
+
878
+ def make_param(self, name, **kwargs):
879
+ """
880
+ Create a parameterization based on the configuration.
881
+
882
+ This function creates a parameterization based on the configuration.
883
+ If the configuration specifies a linear parameterization, it creates a slope and intercept and uses those to make a linear parameterization.
884
+ If the configuration specifies a random parameterization, it creates a random parameterization, either centered or non-centered.
885
+ Otherwise, it creates a fixed parameterization.
886
+
887
+ :param name: String representing the name of the parameter.
888
+ :param kwargs: Additional keyword arguments to be passed to the parameterization.
889
+ :return: The created parameterization.
890
+ """
891
+ if self.configs.get(f"linear_{name}", False):
892
+ # First make a slope and intercept, and use those to make a linear parameterization
893
+ slope_parameterization = self.make_param(f"slope_{name}", **kwargs)
894
+ intercept_parameterization = self.make_param(
895
+ f"intercept_{name}", **kwargs)
896
+ return LinearParameterization(
897
+ name=name,
898
+ slope_parameterization=slope_parameterization,
899
+ intercept_parameterization=intercept_parameterization,
900
+ **kwargs,
901
+ )
902
+
903
+ elif self.configs.get(f"random_{name}", False):
904
+ if self.configs.get(f"centered_{name}", True):
905
+ return CentralRandomFixedParameterization(name=name, pb=self, **kwargs)
906
+ else:
907
+ return NonCentralRandomFixedParameterization(
908
+ name=name, pb=self, **kwargs
909
+ )
910
+ else:
911
+ return FixedParameterization(name=name, pb=self, **kwargs)
912
+
913
+
914
+ class Parameterization:
915
+ """
916
+ This is the top-level parameterization class from which all the other parameterizations inherit.
917
+ """
918
+
919
+ def __init__(self, name):
920
+ """
921
+ Initialize the Parameterization object.
922
+
923
+ This function initializes the Parameterization object with the given name.
924
+
925
+ :param name: String representing the name of the parameterization.
926
+ """
927
+ self.name = name
928
+ # print(name, type(self))
929
+
930
+ def get_samples(self, pb):
931
+ """
932
+ Get samples from the parameterization.
933
+
934
+ This function should be overridden by subclasses to provide specific sampling methods.
935
+
936
+ :param pb: The ParamBuilder object.
937
+ :return: None. This method should be overridden by subclasses.
938
+ """
939
+ pass
940
+
941
+
942
+ class FixedParameterization(Parameterization):
943
+ """
944
+ A parameterization that takes a single value for all input.
945
+
946
+ It does not depend on anything except its hyperparameters. This class inherits from the Parameterization class.
947
+ """
948
+
949
+ def __init__(self, name, pb: ParamBuilder, **kwargs):
950
+ """
951
+ Initialize the FixedParameterization object.
952
+
953
+ This function initializes the FixedParameterization object with the given name, ParamBuilder object, and additional arguments.
954
+
955
+ :param name: String representing the name of the parameterization.
956
+ :param pb: The ParamBuilder object.
957
+ :param kwargs: Additional keyword arguments to be passed to the parameterization.
958
+ """
959
+ super().__init__(name)
960
+ dist = kwargs.get(f"{name}_dist", "normal")
961
+ params = kwargs.get(f"{name}_params", (0.0, 1.0))
962
+ self.dist = Prior(name, dist, params, pb)
963
+
964
+ def get_samples(self, pb):
965
+ """
966
+ Get samples from the parameterization.
967
+
968
+ This function gets samples from the parameterization using the ParamBuilder object.
969
+
970
+ :param pb: The ParamBuilder object.
971
+ :return: The samples from the parameterization.
972
+ """
973
+ with pb.model:
974
+ return self.dist[0]
975
+
976
+
977
+ class CentralRandomFixedParameterization(Parameterization):
978
+ """
979
+ A parameterization that is fixed for each batch effect.
980
+
981
+ This is sampled in a central fashion; the values are sampled from normal distribution with a group mean and group variance
982
+ """
983
+
984
+ def __init__(self, name, pb: ParamBuilder, **kwargs):
985
+ """
986
+ Initialize the CentralRandomFixedParameterization object.
987
+
988
+ This function initializes the CentralRandomFixedParameterization object with the given name, ParamBuilder object, and additional arguments.
989
+
990
+ :param name: String representing the name of the parameterization.
991
+ :param pb: The ParamBuilder object.
992
+ :param kwargs: Additional keyword arguments to be passed to the parameterization.
993
+ """
994
+ super().__init__(name)
995
+
996
+ # Normal distribution is default for mean
997
+ mu_dist = kwargs.get(f"mu_{name}_dist", "normal")
998
+ mu_params = kwargs.get(f"mu_{name}_params", (0.0, 1.0))
999
+ mu_prior = Prior(f"mu_{name}", mu_dist, mu_params, pb)
1000
+
1001
+ # HalfNormal is default for sigma
1002
+ sigma_dist = kwargs.get(f"sigma_{name}_dist", "hnormal")
1003
+ sigma_params = kwargs.get(f"sigma_{name}_params", (1.0,))
1004
+ sigma_prior = Prior(f"sigma_{name}", sigma_dist, sigma_params, pb)
1005
+
1006
+ dims = (
1007
+ [*pb.batch_effect_dim_names, "basis_functions"]
1008
+ if self.name.startswith("slope")
1009
+ else pb.batch_effect_dim_names
1010
+ )
1011
+ self.dist = pm.Normal(
1012
+ name=name,
1013
+ mu=mu_prior.dist,
1014
+ sigma=sigma_prior.dist,
1015
+ dims=dims,
1016
+ )
1017
+
1018
+ def get_samples(self, pb: ParamBuilder):
1019
+ """
1020
+ Get samples from the parameterization.
1021
+
1022
+ This function gets samples from the parameterization using the ParamBuilder object.
1023
+
1024
+ :param pb: The ParamBuilder object.
1025
+ :return: The samples from the parameterization.
1026
+ """
1027
+ with pb.model:
1028
+ return self.dist[pb.batch_effect_indices]
1029
+
1030
+
1031
+
1032
+ class NonCentralRandomFixedParameterization(Parameterization):
1033
+ """
1034
+ A parameterization that is fixed for each batch effect. This is sampled in a non-central fashion;
1035
+ the values are a sum of a group mean and noise values scaled with a group scaling factor
1036
+ """
1037
+
1038
+ def __init__(self, name, pb: ParamBuilder, **kwargs):
1039
+ """
1040
+ Initialize the NonCentralRandomFixedParameterization object.
1041
+
1042
+ This function initializes the NonCentralRandomFixedParameterization object with the given name, ParamBuilder object, and additional arguments.
1043
+
1044
+ :param name: String representing the name of the parameterization.
1045
+ :param pb: The ParamBuilder object.
1046
+ :param kwargs: Additional keyword arguments to be passed to the parameterization.
1047
+ """
1048
+ super().__init__(name)
1049
+
1050
+ # Normal distribution is default for mean
1051
+ mu_dist = kwargs.get(f"mu_{name}_dist", "normal")
1052
+ mu_params = kwargs.get(f"mu_{name}_params", (0.0, 1.0))
1053
+ mu_prior = Prior(f"mu_{name}", mu_dist, mu_params, pb)
1054
+
1055
+ # HalfNormal is default for sigma
1056
+ sigma_dist = kwargs.get(f"sigma_{name}_dist", "hnormal")
1057
+ sigma_params = kwargs.get(f"sigma_{name}_params", (1.0,))
1058
+ sigma_prior = Prior(f"sigma_{name}", sigma_dist, sigma_params, pb)
1059
+
1060
+ # Normal is default for offset
1061
+ offset_dist = kwargs.get(f"offset_{name}_dist", "normal")
1062
+ offset_params = kwargs.get(f"offset_{name}_params", (0.0, 1.0))
1063
+ offset_prior = Prior(
1064
+ f"offset_{name}", offset_dist, offset_params, pb, has_random_effect=True
1065
+ )
1066
+ dims = (
1067
+ [*pb.batch_effect_dim_names, "basis_functions"]
1068
+ if self.name.startswith("slope")
1069
+ else pb.batch_effect_dim_names
1070
+ )
1071
+ self.dist = pm.Deterministic(
1072
+ name=name,
1073
+ var=mu_prior.dist + sigma_prior.dist * offset_prior.dist,
1074
+ dims=dims,
1075
+ )
1076
+
1077
+ def get_samples(self, pb: ParamBuilder):
1078
+ """
1079
+ Get samples from the parameterization.
1080
+
1081
+ This function gets samples from the parameterization using the ParamBuilder object.
1082
+
1083
+ :param pb: The ParamBuilder object.
1084
+ :return: The samples from the parameterization.
1085
+ """
1086
+ with pb.model:
1087
+ return self.dist[pb.batch_effect_indices]
1088
+
1089
+
1090
+ class LinearParameterization(Parameterization):
1091
+ """
1092
+ This class inherits from the Parameterization class and represents a parameterization that can model a linear dependence on X.
1093
+
1094
+ """
1095
+
1096
+ def __init__(
1097
+ self, name, slope_parameterization, intercept_parameterization, **kwargs
1098
+ ):
1099
+ """
1100
+ Initialize the LinearParameterization object.
1101
+
1102
+ This function initializes the LinearParameterization object with the given name, slope parameterization, intercept parameterization, and additional arguments.
1103
+
1104
+ :param name: String representing the name of the parameterization.
1105
+ :param slope_parameterization: An instance of a Parameterization subclass representing the slope.
1106
+ :param intercept_parameterization: An instance of a Parameterization subclass representing the intercept.
1107
+ :param kwargs: Additional keyword arguments to be passed to the parameterization.
1108
+ """
1109
+ super().__init__(name)
1110
+ self.slope_parameterization = slope_parameterization
1111
+ self.intercept_parameterization = intercept_parameterization
1112
+
1113
+ def get_samples(self, pb):
1114
+ """
1115
+ Get samples from the parameterization.
1116
+
1117
+ This function gets samples from the parameterization using the ParamBuilder object. It computes the samples as the sum of the intercept and the product of X and the slope.
1118
+
1119
+ :param pb: The ParamBuilder object.
1120
+ :return: The samples from the parameterization.
1121
+ """
1122
+ with pb.model:
1123
+ intercept_samples = self.intercept_parameterization.get_samples(pb)
1124
+ slope_samples = self.slope_parameterization.get_samples(pb)
1125
+
1126
+ if pb.configs[f"random_slope_{self.name}"]:
1127
+ if slope_samples.shape.eval()[1] > 1:
1128
+ slope = pm.math.sum(
1129
+ pb.X * slope_samples, axis=1)
1130
+ else:
1131
+ slope = pb.X *slope_samples
1132
+ else:
1133
+ slope = pb.X @ slope_samples
1134
+
1135
+ samples = pm.math.flatten(intercept_samples) + pm.math.flatten(slope)
1136
+ return samples
1137
+
1138
+
1139
+ def get_design_matrix(X, nm, basis="linear"):
1140
+ """
1141
+ Get the design matrix for the given data.
1142
+
1143
+ This function gets the design matrix for the given data.
1144
+
1145
+ :param X: Covariates. This is the input data for the model.
1146
+ :param nm: A normative model.
1147
+ :param basis: String representing the basis to use. Default is "linear".
1148
+ """
1149
+ if basis == "bspline":
1150
+ Phi = nm.hbr.bsp.transform(X)
1151
+ #Phi = bspline_transform(X, nm.hbr.bsp)
1152
+ elif basis == "polynomial":
1153
+ Phi = create_poly_basis(X, 3)
1154
+ else:
1155
+ Phi = X
1156
+ return Phi
1157
+
1158
+
1159
+ def nn_hbr(X, y, batch_effects, batch_effects_size, configs, idata=None):
1160
+ n_hidden = configs["nn_hidden_neuron_num"]
1161
+ n_layers = configs["nn_hidden_layers_num"]
1162
+ feature_num = X.shape[1]
1163
+ batch_effects_num = batch_effects.shape[1]
1164
+ all_idx = []
1165
+ for i in range(batch_effects_num):
1166
+ all_idx.append(np.int16(np.unique(batch_effects[:, i])))
1167
+ be_idx = list(product(*all_idx))
1168
+
1169
+ # Initialize random weights between each layer for the mu:
1170
+ init_1 = pm.floatX(
1171
+ np.random.randn(feature_num, n_hidden) * np.sqrt(1 / feature_num)
1172
+ )
1173
+ init_out = pm.floatX(np.random.randn(n_hidden) * np.sqrt(1 / n_hidden))
1174
+
1175
+ std_init_1 = pm.floatX(np.random.rand(feature_num, n_hidden))
1176
+ std_init_out = pm.floatX(np.random.rand(n_hidden))
1177
+
1178
+ # And initialize random weights between each layer for sigma_noise:
1179
+ init_1_noise = pm.floatX(
1180
+ np.random.randn(feature_num, n_hidden) * np.sqrt(1 / feature_num)
1181
+ )
1182
+ init_out_noise = pm.floatX(np.random.randn(
1183
+ n_hidden) * np.sqrt(1 / n_hidden))
1184
+
1185
+ std_init_1_noise = pm.floatX(np.random.rand(feature_num, n_hidden))
1186
+ std_init_out_noise = pm.floatX(np.random.rand(n_hidden))
1187
+
1188
+ # If there are two hidden layers, then initialize weights for the second layer:
1189
+ if n_layers == 2:
1190
+ init_2 = pm.floatX(np.random.randn(
1191
+ n_hidden, n_hidden) * np.sqrt(1 / n_hidden))
1192
+ std_init_2 = pm.floatX(np.random.rand(n_hidden, n_hidden))
1193
+ init_2_noise = pm.floatX(
1194
+ np.random.randn(n_hidden, n_hidden) * np.sqrt(1 / n_hidden)
1195
+ )
1196
+ std_init_2_noise = pm.floatX(np.random.rand(n_hidden, n_hidden))
1197
+
1198
+ with pm.Model() as model:
1199
+ X = pm.Data("X", X)
1200
+ y = pm.Data("y", y)
1201
+
1202
+ if idata is not None: # Used when estimating/predicting on a new site
1203
+ weights_in_1_grp = from_posterior(
1204
+ "w_in_1_grp",
1205
+ idata["w_in_1_grp"],
1206
+ distribution="normal",
1207
+ freedom=configs["freedom"],
1208
+ )
1209
+
1210
+ weights_in_1_grp_sd = from_posterior(
1211
+ "w_in_1_grp_sd",
1212
+ idata["w_in_1_grp_sd"],
1213
+ distribution="hcauchy",
1214
+ freedom=configs["freedom"],
1215
+ )
1216
+
1217
+ if n_layers == 2:
1218
+ weights_1_2_grp = from_posterior(
1219
+ "w_1_2_grp",
1220
+ idata["w_1_2_grp"],
1221
+ distribution="normal",
1222
+ freedom=configs["freedom"],
1223
+ )
1224
+
1225
+ weights_1_2_grp_sd = from_posterior(
1226
+ "w_1_2_grp_sd",
1227
+ idata["w_1_2_grp_sd"],
1228
+ distribution="hcauchy",
1229
+ freedom=configs["freedom"],
1230
+ )
1231
+
1232
+ weights_2_out_grp = from_posterior(
1233
+ "w_2_out_grp",
1234
+ idata["w_2_out_grp"],
1235
+ distribution="normal",
1236
+ freedom=configs["freedom"],
1237
+ )
1238
+
1239
+ weights_2_out_grp_sd = from_posterior(
1240
+ "w_2_out_grp_sd",
1241
+ idata["w_2_out_grp_sd"],
1242
+ distribution="hcauchy",
1243
+ freedom=configs["freedom"],
1244
+ )
1245
+
1246
+ mu_prior_intercept = from_posterior(
1247
+ "mu_prior_intercept",
1248
+ idata["mu_prior_intercept"],
1249
+ distribution="normal",
1250
+ freedom=configs["freedom"],
1251
+ )
1252
+ sigma_prior_intercept = from_posterior(
1253
+ "sigma_prior_intercept",
1254
+ idata["sigma_prior_intercept"],
1255
+ distribution="hcauchy",
1256
+ freedom=configs["freedom"],
1257
+ )
1258
+
1259
+ else:
1260
+ # Group the mean distribution for input to the hidden layer:
1261
+ weights_in_1_grp = pm.Normal(
1262
+ "w_in_1_grp", 0, sd=1, shape=(feature_num, n_hidden), testval=init_1
1263
+ )
1264
+
1265
+ # Group standard deviation:
1266
+ weights_in_1_grp_sd = pm.HalfCauchy(
1267
+ "w_in_1_grp_sd", 1.0, shape=(feature_num, n_hidden), testval=std_init_1
1268
+ )
1269
+
1270
+ if n_layers == 2:
1271
+ # Group the mean distribution for hidden layer 1 to hidden layer 2:
1272
+ weights_1_2_grp = pm.Normal(
1273
+ "w_1_2_grp", 0, sd=1, shape=(n_hidden, n_hidden), testval=init_2
1274
+ )
1275
+
1276
+ # Group standard deviation:
1277
+ weights_1_2_grp_sd = pm.HalfCauchy(
1278
+ "w_1_2_grp_sd", 1.0, shape=(n_hidden, n_hidden), testval=std_init_2
1279
+ )
1280
+
1281
+ # Group the mean distribution for hidden to output:
1282
+ weights_2_out_grp = pm.Normal(
1283
+ "w_2_out_grp", 0, sd=1, shape=(n_hidden,), testval=init_out
1284
+ )
1285
+
1286
+ # Group standard deviation:
1287
+ weights_2_out_grp_sd = pm.HalfCauchy(
1288
+ "w_2_out_grp_sd", 1.0, shape=(n_hidden,), testval=std_init_out
1289
+ )
1290
+
1291
+ # mu_prior_intercept = pm.Uniform('mu_prior_intercept', lower=-100, upper=100)
1292
+ mu_prior_intercept = pm.Normal(
1293
+ "mu_prior_intercept", mu=0.0, sigma=1e3)
1294
+ sigma_prior_intercept = pm.HalfCauchy("sigma_prior_intercept", 5)
1295
+
1296
+ # Now create separate weights for each group, by doing
1297
+ # weights * group_sd + group_mean, we make sure the new weights are
1298
+ # coming from the (group_mean, group_sd) distribution.
1299
+ weights_in_1_raw = pm.Normal(
1300
+ "w_in_1", 0, sd=1, shape=(batch_effects_size + [feature_num, n_hidden])
1301
+ )
1302
+ weights_in_1 = weights_in_1_raw * weights_in_1_grp_sd + weights_in_1_grp
1303
+
1304
+ if n_layers == 2:
1305
+ weights_1_2_raw = pm.Normal(
1306
+ "w_1_2", 0, sd=1, shape=(batch_effects_size + [n_hidden, n_hidden])
1307
+ )
1308
+ weights_1_2 = weights_1_2_raw * weights_1_2_grp_sd + weights_1_2_grp
1309
+
1310
+ weights_2_out_raw = pm.Normal(
1311
+ "w_2_out", 0, sd=1, shape=(batch_effects_size + [n_hidden])
1312
+ )
1313
+ weights_2_out = weights_2_out_raw * weights_2_out_grp_sd + weights_2_out_grp
1314
+
1315
+ intercepts_offset = pm.Normal(
1316
+ "intercepts_offset", mu=0, sd=1, shape=(batch_effects_size)
1317
+ )
1318
+
1319
+ intercepts = pm.Deterministic(
1320
+ "intercepts", intercepts_offset + mu_prior_intercept * sigma_prior_intercept
1321
+ )
1322
+
1323
+ # Build the neural network and estimate y_hat:
1324
+ y_hat = pytensor.tensor.zeros(y.shape)
1325
+ for be in be_idx:
1326
+ # Find the indices corresponding to 'group be':
1327
+ a = []
1328
+ for i, b in enumerate(be):
1329
+ a.append(batch_effects[:, i] == b)
1330
+ idx = reduce(np.logical_and, a).nonzero()
1331
+ if idx[0].shape[0] != 0:
1332
+ act_1 = pm.math.tanh(pytensor.tensor.dot(
1333
+ X[idx, :], weights_in_1[be]))
1334
+ if n_layers == 2:
1335
+ act_2 = pm.math.tanh(
1336
+ pytensor.tensor.dot(act_1, weights_1_2[be]))
1337
+ y_hat = pytensor.tensor.set_subtensor(
1338
+ y_hat[idx, 0],
1339
+ intercepts[be] +
1340
+ pytensor.tensor.dot(act_2, weights_2_out[be]),
1341
+ )
1342
+ else:
1343
+ y_hat = pytensor.tensor.set_subtensor(
1344
+ y_hat[idx, 0],
1345
+ intercepts[be] +
1346
+ pytensor.tensor.dot(act_1, weights_2_out[be]),
1347
+ )
1348
+
1349
+ # If we want to estimate varying noise terms across groups:
1350
+ if configs["random_noise"]:
1351
+ if configs["hetero_noise"]:
1352
+ if idata is not None: # # Used when estimating/predicting on a new site
1353
+ weights_in_1_grp_noise = from_posterior(
1354
+ "w_in_1_grp_noise",
1355
+ idata["w_in_1_grp_noise"],
1356
+ distribution="normal",
1357
+ freedom=configs["freedom"],
1358
+ )
1359
+
1360
+ weights_in_1_grp_sd_noise = from_posterior(
1361
+ "w_in_1_grp_sd_noise",
1362
+ idata["w_in_1_grp_sd_noise"],
1363
+ distribution="hcauchy",
1364
+ freedom=configs["freedom"],
1365
+ )
1366
+
1367
+ if n_layers == 2:
1368
+ weights_1_2_grp_noise = from_posterior(
1369
+ "w_1_2_grp_noise",
1370
+ idata["w_1_2_grp_noise"],
1371
+ distribution="normal",
1372
+ freedom=configs["freedom"],
1373
+ )
1374
+
1375
+ weights_1_2_grp_sd_noise = from_posterior(
1376
+ "w_1_2_grp_sd_noise",
1377
+ idata["w_1_2_grp_sd_noise"],
1378
+ distribution="hcauchy",
1379
+ freedom=configs["freedom"],
1380
+ )
1381
+
1382
+ weights_2_out_grp_noise = from_posterior(
1383
+ "w_2_out_grp_noise",
1384
+ idata["w_2_out_grp_noise"],
1385
+ distribution="normal",
1386
+ freedom=configs["freedom"],
1387
+ )
1388
+
1389
+ weights_2_out_grp_sd_noise = from_posterior(
1390
+ "w_2_out_grp_sd_noise",
1391
+ idata["w_2_out_grp_sd_noise"],
1392
+ distribution="hcauchy",
1393
+ freedom=configs["freedom"],
1394
+ )
1395
+
1396
+ else:
1397
+ # The input layer to the first hidden layer:
1398
+ weights_in_1_grp_noise = pm.Normal(
1399
+ "w_in_1_grp_noise",
1400
+ 0,
1401
+ sd=1,
1402
+ shape=(feature_num, n_hidden),
1403
+ testval=init_1_noise,
1404
+ )
1405
+ weights_in_1_grp_sd_noise = pm.HalfCauchy(
1406
+ "w_in_1_grp_sd_noise",
1407
+ 1,
1408
+ shape=(feature_num, n_hidden),
1409
+ testval=std_init_1_noise,
1410
+ )
1411
+
1412
+ # The first hidden layer to second hidden layer:
1413
+ if n_layers == 2:
1414
+ weights_1_2_grp_noise = pm.Normal(
1415
+ "w_1_2_grp_noise",
1416
+ 0,
1417
+ sd=1,
1418
+ shape=(n_hidden, n_hidden),
1419
+ testval=init_2_noise,
1420
+ )
1421
+ weights_1_2_grp_sd_noise = pm.HalfCauchy(
1422
+ "w_1_2_grp_sd_noise",
1423
+ 1,
1424
+ shape=(n_hidden, n_hidden),
1425
+ testval=std_init_2_noise,
1426
+ )
1427
+
1428
+ # The second hidden layer to output layer:
1429
+ weights_2_out_grp_noise = pm.Normal(
1430
+ "w_2_out_grp_noise",
1431
+ 0,
1432
+ sd=1,
1433
+ shape=(n_hidden,),
1434
+ testval=init_out_noise,
1435
+ )
1436
+ weights_2_out_grp_sd_noise = pm.HalfCauchy(
1437
+ "w_2_out_grp_sd_noise",
1438
+ 1,
1439
+ shape=(n_hidden,),
1440
+ testval=std_init_out_noise,
1441
+ )
1442
+
1443
+ # mu_prior_intercept_noise = pm.HalfNormal('mu_prior_intercept_noise', sigma=1e3)
1444
+ # sigma_prior_intercept_noise = pm.HalfCauchy('sigma_prior_intercept_noise', 5)
1445
+
1446
+ # Now create separate weights for each group:
1447
+ weights_in_1_raw_noise = pm.Normal(
1448
+ "w_in_1_noise",
1449
+ 0,
1450
+ sd=1,
1451
+ shape=(batch_effects_size + [feature_num, n_hidden]),
1452
+ )
1453
+ weights_in_1_noise = (
1454
+ weights_in_1_raw_noise * weights_in_1_grp_sd_noise
1455
+ + weights_in_1_grp_noise
1456
+ )
1457
+
1458
+ if n_layers == 2:
1459
+ weights_1_2_raw_noise = pm.Normal(
1460
+ "w_1_2_noise",
1461
+ 0,
1462
+ sd=1,
1463
+ shape=(batch_effects_size + [n_hidden, n_hidden]),
1464
+ )
1465
+ weights_1_2_noise = (
1466
+ weights_1_2_raw_noise * weights_1_2_grp_sd_noise
1467
+ + weights_1_2_grp_noise
1468
+ )
1469
+
1470
+ weights_2_out_raw_noise = pm.Normal(
1471
+ "w_2_out_noise", 0, sd=1, shape=(batch_effects_size + [n_hidden])
1472
+ )
1473
+ weights_2_out_noise = (
1474
+ weights_2_out_raw_noise * weights_2_out_grp_sd_noise
1475
+ + weights_2_out_grp_noise
1476
+ )
1477
+
1478
+ # intercepts_offset_noise = pm.Normal('intercepts_offset_noise', mu=0, sd=1,
1479
+ # shape=(batch_effects_size))
1480
+
1481
+ # intercepts_noise = pm.Deterministic('intercepts_noise', mu_prior_intercept_noise +
1482
+ # intercepts_offset_noise * sigma_prior_intercept_noise)
1483
+
1484
+ # Build the neural network and estimate the sigma_y:
1485
+ sigma_y = pytensor.tensor.zeros(y.shape)
1486
+ for be in be_idx:
1487
+ a = []
1488
+ for i, b in enumerate(be):
1489
+ a.append(batch_effects[:, i] == b)
1490
+ idx = reduce(np.logical_and, a).nonzero()
1491
+ if idx[0].shape[0] != 0:
1492
+ act_1_noise = pm.math.sigmoid(
1493
+ pytensor.tensor.dot(
1494
+ X[idx, :], weights_in_1_noise[be])
1495
+ )
1496
+ if n_layers == 2:
1497
+ act_2_noise = pm.math.sigmoid(
1498
+ pytensor.tensor.dot(
1499
+ act_1_noise, weights_1_2_noise[be])
1500
+ )
1501
+ temp = (
1502
+ pm.math.log1pexp(
1503
+ pytensor.tensor.dot(
1504
+ act_2_noise, weights_2_out_noise[be]
1505
+ )
1506
+ )
1507
+ + 1e-5
1508
+ )
1509
+ else:
1510
+ temp = (
1511
+ pm.math.log1pexp(
1512
+ pytensor.tensor.dot(
1513
+ act_1_noise, weights_2_out_noise[be]
1514
+ )
1515
+ )
1516
+ + 1e-5
1517
+ )
1518
+ sigma_y = pytensor.tensor.set_subtensor(
1519
+ sigma_y[idx, 0], temp)
1520
+
1521
+ else: # homoscedastic noise:
1522
+ if idata is not None: # Used for transferring the priors
1523
+ upper_bound = np.percentile(idata["sigma_noise"], 95)
1524
+ sigma_noise = pm.Uniform(
1525
+ "sigma_noise",
1526
+ lower=0,
1527
+ upper=2 * upper_bound,
1528
+ shape=(batch_effects_size),
1529
+ )
1530
+ else:
1531
+ sigma_noise = pm.Uniform(
1532
+ "sigma_noise", lower=0, upper=100, shape=(batch_effects_size)
1533
+ )
1534
+
1535
+ sigma_y = pytensor.tensor.zeros(y.shape)
1536
+ for be in be_idx:
1537
+ a = []
1538
+ for i, b in enumerate(be):
1539
+ a.append(batch_effects[:, i] == b)
1540
+ idx = reduce(np.logical_and, a).nonzero()
1541
+ if idx[0].shape[0] != 0:
1542
+ sigma_y = pytensor.tensor.set_subtensor(
1543
+ sigma_y[idx, 0], sigma_noise[be]
1544
+ )
1545
+
1546
+ else: # do not allow for random noise terms across groups:
1547
+ if idata is not None: # Used for transferring the priors
1548
+ upper_bound = np.percentile(idata["sigma_noise"], 95)
1549
+ sigma_noise = pm.Uniform(
1550
+ "sigma_noise", lower=0, upper=2 * upper_bound)
1551
+ else:
1552
+ sigma_noise = pm.Uniform("sigma_noise", lower=0, upper=100)
1553
+ sigma_y = pytensor.tensor.zeros(y.shape)
1554
+ for be in be_idx:
1555
+ a = []
1556
+ for i, b in enumerate(be):
1557
+ a.append(batch_effects[:, i] == b)
1558
+ idx = reduce(np.logical_and, a).nonzero()
1559
+ if idx[0].shape[0] != 0:
1560
+ sigma_y = pytensor.tensor.set_subtensor(
1561
+ sigma_y[idx, 0], sigma_noise
1562
+ )
1563
+
1564
+ if configs["skewed_likelihood"]:
1565
+ skewness = pm.Uniform(
1566
+ "skewness", lower=-10, upper=10, shape=(batch_effects_size)
1567
+ )
1568
+ alpha = pytensor.tensor.zeros(y.shape)
1569
+ for be in be_idx:
1570
+ a = []
1571
+ for i, b in enumerate(be):
1572
+ a.append(batch_effects[:, i] == b)
1573
+ idx = reduce(np.logical_and, a).nonzero()
1574
+ if idx[0].shape[0] != 0:
1575
+ alpha = pytensor.tensor.set_subtensor(
1576
+ alpha[idx, 0], skewness[be])
1577
+ else:
1578
+ alpha = 0 # symmetrical normal distribution
1579
+
1580
+ y_like = pm.SkewNormal(
1581
+ "y_like", mu=y_hat, sigma=sigma_y, alpha=alpha, observed=y
1582
+ )
1583
+
1584
+ return model