peak-performance 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,711 @@
1
+ """
2
+ PeakPerformance
3
+ Copyright (C) 2023 Forschungszentrum Jülich GmbH
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU Affero General Public License as published
7
+ by the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU Affero General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Affero General Public License
16
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
17
+ """
18
+
19
+ from enum import Enum
20
+ from typing import Mapping, Union
21
+
22
+ import arviz as az
23
+ import numpy as np
24
+ import pandas
25
+ import pymc as pm
26
+ import pytensor.tensor as pt
27
+ import scipy.stats as st
28
+
29
+
30
+ class ModelType(str, Enum):
31
+ """Class containing all implemented model types."""
32
+
33
+ Normal = "normal"
34
+ SkewNormal = "skew_normal"
35
+ DoubleNormal = "double_normal"
36
+ DoubleSkewNormal = "double_skew_normal"
37
+
38
+
39
+ def initial_guesses(time: np.ndarray, intensity: np.ndarray):
40
+ """
41
+ Provide initial guesses for priors.
42
+
43
+ Parameters
44
+ ----------
45
+ time
46
+ NumPy array with the time values of the relevant timeframe.
47
+ intensity
48
+ NumPy array with the intensity values of the relevant timeframe.
49
+
50
+ Returns
51
+ -------
52
+ baseline_fit.slope : float or int
53
+ Guess for the slope of the linear baseline prior.
54
+ baseline_fit.intercept : float or int
55
+ Guess for the intercept of the linear baseline prior.
56
+ noise_width_guess : float or int
57
+ Guess for the width of the noise.
58
+ """
59
+ # first create a simple baseline guess only to be able to "correct" the intensity data (y_corrected = y - y_baseline)
60
+ # then, use the corrected data to determine which data points are going to be defined as noise
61
+ # this is the only use of the corrected data
62
+ average_initial_intensity = np.mean([intensity[n] for n in range(3)])
63
+ average_final_intensity = np.mean(
64
+ [intensity[n] for n in range(len(intensity) - 3, len(intensity))]
65
+ )
66
+ slope_guess = (average_final_intensity - average_initial_intensity) / (time[-1] - time[0])
67
+ # calculate intercept_guess based on the slope_guess and the formula for a linear equation
68
+ first_intercept_guess = average_initial_intensity - slope_guess * time[0]
69
+ intensity_corrected = [
70
+ intensity[n] - (slope_guess * time[n] + first_intercept_guess) for n in range(len(time))
71
+ ]
72
+
73
+ # select lowest 35 % of all data points as noise -> noise_tuple
74
+ intensity_tuple = list(enumerate(intensity_corrected))
75
+ intensity_tuple.sort(key=lambda x: x[1])
76
+ noise_range = int(np.round(0.35 * len(intensity_corrected), decimals=0))
77
+ noise_tuple = intensity_tuple[:noise_range]
78
+ noise_index = sorted([x[0] for x in noise_tuple])
79
+ # use the indeces in noise_index to get the time and intensity of all noise data points
80
+ noise_time = [time[n] for n in noise_index]
81
+ noise_intensity = [intensity[n] for n in noise_index]
82
+ # calculate the width of the noise
83
+ noise_width_guess = max(noise_intensity) - min(noise_intensity)
84
+
85
+ # use scipy to fit a linear regression through the noise as a prior for the eventual baseline
86
+ baseline_fit = st.linregress(noise_time, noise_intensity)
87
+
88
+ return baseline_fit.slope, baseline_fit.intercept, noise_width_guess
89
+
90
+
91
+ def baseline_intercept_prior_params(intercept_guess: Union[float, int]) -> Mapping[str, float]:
92
+ """
93
+ Centralized function for supplying parameters for the baseline intercept prior.
94
+
95
+ Returns
96
+ -------
97
+ parameter_dict
98
+ Dictionary containing mu and sigma for the normally distributed prior.
99
+ """
100
+ return {
101
+ "mu": intercept_guess,
102
+ "sigma": np.clip(abs(intercept_guess / 6), 0.05, np.inf),
103
+ }
104
+
105
+
106
+ def baseline_slope_prior_params(slope_guess: Union[float, int]) -> Mapping[str, float]:
107
+ """
108
+ Centralized function for supplying parameters for the baseline slope prior.
109
+
110
+ Returns
111
+ -------
112
+ parameter_dict
113
+ Dictionary containing mu and sigma for the normally distributed prior.
114
+ """
115
+ return {
116
+ "mu": slope_guess,
117
+ "sigma": np.clip(abs(slope_guess / 5), 0.5, np.inf),
118
+ }
119
+
120
+
121
+ def normal_posterior(baseline, time: np.ndarray, mean, std, *, height):
122
+ """
123
+ Model a peak shaped like the PDF of a normal distribution.
124
+
125
+ Parameters
126
+ ----------
127
+ baseline
128
+ Baseline of the data.
129
+ time
130
+ NumPy array with the time values of the relevant timeframe.
131
+ mean
132
+ Arithmetic mean of the normal distribution.
133
+ std
134
+ Standard deviation of the normal distribution.
135
+ height
136
+ Height of the normal distribution (starting from the baseline, thus not the total height).
137
+
138
+ Returns
139
+ -------
140
+ Probability density function (PDF) of the normally distributed posterior.
141
+ """
142
+ return baseline + height * pt.exp(-0.5 * ((time - mean) / std) ** 2)
143
+
144
+
145
+ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
146
+ """
147
+ Define a model for fitting a normal distribution to the peak data.
148
+
149
+ Parameters
150
+ ----------
151
+ time
152
+ NumPy array with the time values of the relevant timeframe.
153
+ intensity
154
+ NumPy array with the intensity values of the relevant timeframe.
155
+
156
+ Returns
157
+ -------
158
+ pmodel
159
+ PyMC model.
160
+ """
161
+ slope_guess, intercept_guess, noise_width_guess = initial_guesses(time, intensity)
162
+ with pm.Model() as pmodel:
163
+ # add observations to the pmodel as ConstantData
164
+ pm.ConstantData("time", time)
165
+ pm.ConstantData("intensity", intensity)
166
+ # add guesses to the pmodel as ConstantData
167
+ pm.ConstantData("intercept_guess", intercept_guess)
168
+ pm.ConstantData("slope_guess", slope_guess)
169
+ pm.ConstantData("noise_width_guess", noise_width_guess)
170
+
171
+ # priors plus error handling in case of mathematically impermissible values
172
+ baseline_intercept = pm.Normal(
173
+ "baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
174
+ )
175
+ baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
176
+ baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
177
+ noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
178
+ # define priors for parameters of a normally distributed posterior
179
+ mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
180
+ std = pm.HalfNormal("std", np.ptp(time) / 3)
181
+ height = pm.HalfNormal("height", 0.95 * np.max(intensity))
182
+ pm.Deterministic("area", height / (1 / (std * np.sqrt(2 * np.pi))))
183
+ pm.Deterministic("sn", height / noise)
184
+ # posterior
185
+ y = normal_posterior(baseline, time, mean, std, height=height)
186
+ y = pm.Deterministic("y", y)
187
+
188
+ # likelihood
189
+ pm.Normal("L", mu=y, sigma=noise, observed=intensity)
190
+
191
+ return pmodel
192
+
193
+
194
+ def double_model_mean_prior(time):
195
+ """
196
+ Function creating prior probability distributions for double peaks using a ZeroSumNormal distribution.
197
+
198
+ Parameters
199
+ ----------
200
+ time
201
+ NumPy array with the time values of the relevant timeframe.
202
+
203
+ Returns
204
+ -------
205
+ mean
206
+ Normally distributed prior for the ordered means of the double peak model.
207
+ diff
208
+ Difference between meanmean and mean.
209
+ meanmean
210
+ Normally distributed prior for the mean of the double peak means.
211
+ """
212
+ meanmean = pm.Normal("meanmean", mu=np.min(time) + np.ptp(time) / 2, sigma=np.ptp(time) / 6)
213
+ diff = pm.ZeroSumNormal(
214
+ "diff",
215
+ sigma=1,
216
+ shape=(2,), # currently no dims due to bug with ordered transformation
217
+ )
218
+ mean = pm.Normal(
219
+ "mean",
220
+ mu=meanmean + diff,
221
+ sigma=1,
222
+ transform=pm.distributions.transforms.ordered,
223
+ dims=("subpeak",),
224
+ )
225
+ return mean, diff, meanmean
226
+
227
+
228
+ def double_normal_posterior(baseline, time: np.ndarray, mean, std, *, height):
229
+ """
230
+ Define a univariate ordered normal distribution as the posterior.
231
+
232
+ Parameters
233
+ ----------
234
+ baseline
235
+ Baseline of the data.
236
+ time
237
+ NumPy array with the time values of the relevant timeframe.
238
+ mean
239
+ Arithmetic mean of the normal distribution.
240
+ std
241
+ Standard deviation of the first and second peak.
242
+ height
243
+ Height of the first and second peak.
244
+
245
+ Returns
246
+ -------
247
+ y
248
+ Probability density function (PDF) of a univariate ordered normal distribution as the posterior.
249
+ """
250
+ y = (
251
+ baseline
252
+ + height[0] * pt.exp(-0.5 * ((time - mean[0]) / std[0]) ** 2)
253
+ + height[1] * pt.exp(-0.5 * ((time - mean[1]) / std[1]) ** 2)
254
+ )
255
+ return y
256
+
257
+
258
+ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
259
+ """
260
+ Define a model for fitting two ordered normal distributions to the peak data
261
+ (for when data contains two peaks or a double peak without baseline separation).
262
+
263
+ Parameters
264
+ ----------
265
+ time
266
+ NumPy array with the time values of the relevant timeframe.
267
+ intensity
268
+ NumPy array with the intensity values of the relevant timeframe.
269
+
270
+ Returns
271
+ -------
272
+ pmodel
273
+ PyMC model.
274
+ """
275
+ slope_guess, intercept_guess, noise_width_guess = initial_guesses(time, intensity)
276
+ coords = {"subpeak": [0, 1]}
277
+ with pm.Model(coords=coords) as pmodel:
278
+ # add observations to the pmodel as ConstantData
279
+ pm.ConstantData("time", time)
280
+ pm.ConstantData("intensity", intensity)
281
+ # add guesses to the pmodel as ConstantData
282
+ pm.ConstantData("intercept_guess", intercept_guess)
283
+ pm.ConstantData("slope_guess", slope_guess)
284
+ pm.ConstantData("noise_width_guess", noise_width_guess)
285
+
286
+ # priors
287
+ baseline_intercept = pm.Normal(
288
+ "baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
289
+ )
290
+ baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
291
+ baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
292
+ noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
293
+ std = pm.HalfNormal("std", sigma=[np.ptp(time) / 3, np.ptp(time) / 3], dims=("subpeak",))
294
+ height = pm.HalfNormal(
295
+ "height", sigma=[0.95 * np.max(intensity), 0.95 * np.max(intensity)], dims=("subpeak",)
296
+ )
297
+ pm.Deterministic("area", height / (1 / (std * np.sqrt(2 * np.pi))), dims=("subpeak",))
298
+ pm.Deterministic("sn", height / noise, dims=("subpeak",))
299
+ # use univariate ordered normal distribution for the mean values
300
+ # use a zero sum normal distribution to describe the distance of the mean values
301
+ # from the mean of the mean values ("meanmean")
302
+ mean, diff, meanmean = double_model_mean_prior(time)
303
+
304
+ # posterior
305
+ y = double_normal_posterior(baseline, time, mean, std, height=height)
306
+ y = pm.Deterministic("y", y)
307
+
308
+ # likelihood
309
+ pm.Normal("L", mu=y, sigma=noise, observed=intensity)
310
+
311
+ return pmodel
312
+
313
+
314
+ def std_skew_calculation(scale, alpha):
315
+ """
316
+ Calculate the standard deviation of a skew normal distribution with f(x | loc, scale, alpha).
317
+
318
+ Parameters
319
+ ----------
320
+ scale
321
+ Scale parameter of the skew normal distribution.
322
+ alpha
323
+ Skewness parameter of the skew normal distribution.
324
+
325
+ Returns
326
+ ----------
327
+ std
328
+ Standard deviation of a skew normal distribution.
329
+ -------
330
+ """
331
+ return np.sqrt(scale**2 * (1 - (2 * alpha**2) / ((alpha**2 + 1) * np.pi)))
332
+
333
+
334
+ def mean_skew_calculation(loc, scale, alpha):
335
+ """
336
+ Calculate the arithmetic mean of a skew normal distribution with f(x | loc, scale, alpha).
337
+
338
+ Parameters
339
+ ----------
340
+ loc
341
+ Location parameter of the skew normal distribution.
342
+ scale
343
+ Scale parameter of the skew normal distribution.
344
+ alpha
345
+ Skewness parameter of the skew normal distribution.
346
+
347
+ Returns
348
+ ----------
349
+ mean
350
+ Arithmetic mean of a skew normal distribution.
351
+ """
352
+ return loc + scale * np.sqrt(2 / np.pi) * alpha / (np.sqrt(1 + alpha**2))
353
+
354
+
355
+ def delta_calculation(alpha):
356
+ """
357
+ Calculate the delta term included in several subsequent formulae.
358
+
359
+ Parameters
360
+ ----------
361
+ alpha
362
+ Skewness parameter of the skew normal distribution.
363
+ """
364
+ return alpha / (np.sqrt(1 + alpha**2))
365
+
366
+
367
+ def mue_z_calculation(delta):
368
+ """Calculate the mue_z variable which is needed to compute a numerical approximation of the mode of a skew normal distribution."""
369
+ return np.sqrt(2 / np.pi) * delta
370
+
371
+
372
+ def sigma_z_calculation(mue_z):
373
+ """Calculate the sigma_z variable which is needed to compute a numerical approximation of the mode of a skew normal distribution."""
374
+ return np.sqrt(1 - mue_z**2)
375
+
376
+
377
+ def skewness_calculation(delta):
378
+ """Calculate the skewness of a skew normal distribution."""
379
+ return (
380
+ (4 - np.pi)
381
+ / 2
382
+ * ((delta * np.sqrt(2 / np.pi)) ** 3)
383
+ / ((1 - 2 * delta**2 / np.pi) ** 1.5)
384
+ )
385
+
386
+
387
+ def mode_offset_calculation(mue_z, skewness, sigma_z, alpha):
388
+ """Calculate the offset between arithmetic mean and mode of a skew normal distribution."""
389
+ # this formula originally contained the sign() function which led to an error due to usage of pytensor variables
390
+ # -> use alpha/abs(alpha) instead for the same effect
391
+ return (
392
+ mue_z
393
+ - (skewness * sigma_z) / 2
394
+ - (alpha / abs(alpha)) / 2 * pt.exp(-(2 * np.pi) / abs(alpha))
395
+ )
396
+
397
+
398
+ def mode_skew_calculation(loc, scale, mode_offset):
399
+ """Calculate a numerical approximation of the mode of a skew normal distribution."""
400
+ return loc + scale * mode_offset
401
+
402
+
403
+ def height_calculation(area, loc, scale, alpha, mode_skew):
404
+ """
405
+ Calculate the height of a skew normal distribution.
406
+ The formula is the result of inserting time = mode_skew into the posterior.
407
+
408
+ Parameters
409
+ ----------
410
+ area
411
+ Area of the peak described by the skew normal distribution (area between baseline and skew normal distribution).
412
+ loc
413
+ Location parameter of the skew normal distribution.
414
+ scale
415
+ Scale parameter of the skew normal distribution.
416
+ alpha
417
+ Skewness parameter of the skew normal distribution.
418
+ mode_skew
419
+ Mode of the skew normal distribution.
420
+
421
+ Returns
422
+ ----------
423
+ mean
424
+ Arithmetic mean of a skew normal distribution.
425
+ """
426
+ return area * (
427
+ 2
428
+ * (1 / (scale * np.sqrt(2 * np.pi)) * pt.exp(-0.5 * ((mode_skew - loc) / scale) ** 2))
429
+ * (0.5 * (1 + pt.erf(((alpha * (mode_skew - loc) / scale)) / np.sqrt(2))))
430
+ )
431
+
432
+
433
+ def skew_normal_posterior(baseline, time, mean, std, alpha, *, area):
434
+ """
435
+ Define a skew normally distributed posterior.
436
+
437
+ Parameters
438
+ ----------
439
+ baseline
440
+ Baseline of the data.
441
+ time
442
+ NumPy array with the time values of the relevant timeframe.
443
+ intensity
444
+ NumPy array with the intensity values of the relevant timeframe.
445
+ mean
446
+ Location parameter, i.e. arithmetic mean.
447
+ std
448
+ Scale parameter, i.e. standard deviation.
449
+ alpha
450
+ Skewness parameter.
451
+ area
452
+ Peak area.
453
+
454
+ Returns
455
+ -------
456
+ y
457
+ Probability density function (PDF) of a univariate ordered normal distribution as the posterior.
458
+ """
459
+ # posterior
460
+ y = baseline + area * (
461
+ 2
462
+ * (1 / (std * np.sqrt(2 * np.pi)) * pt.exp(-0.5 * ((time - mean) / std) ** 2))
463
+ * (0.5 * (1 + pt.erf(((alpha * (time - mean) / std)) / np.sqrt(2))))
464
+ )
465
+ return y
466
+
467
+
468
+ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
469
+ """
470
+ Define a model for fitting a skew normal distribution to the peak data.
471
+
472
+ Parameters
473
+ ----------
474
+ time
475
+ NumPy array with the time values of the relevant timeframe.
476
+ intensity
477
+ NumPy array with the intensity values of the relevant timeframe.
478
+
479
+ Returns
480
+ -------
481
+ pmodel
482
+ PyMC model.
483
+ """
484
+ slope_guess, intercept_guess, noise_width_guess = initial_guesses(time, intensity)
485
+ with pm.Model() as pmodel:
486
+ # add observations to the pmodel as ConstantData
487
+ pm.ConstantData("time", time)
488
+ pm.ConstantData("intensity", intensity)
489
+ # add guesses to the pmodel as ConstantData
490
+ pm.ConstantData("intercept_guess", intercept_guess)
491
+ pm.ConstantData("slope_guess", slope_guess)
492
+ pm.ConstantData("noise_width_guess", noise_width_guess)
493
+
494
+ # priors plus error handling in case of mathematically impermissible values
495
+ baseline_intercept = pm.Normal(
496
+ "baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
497
+ )
498
+ baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
499
+ baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
500
+ noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
501
+ mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
502
+ std = pm.HalfNormal("std", np.ptp(time) / 3)
503
+ alpha = pm.Normal("alpha", 0, 3.5)
504
+ area = pm.HalfNormal("area", np.max(intensity) * 0.9)
505
+
506
+ # calculate standard deviation and arithmetic mean of a skew normal distribution
507
+ std_skew_formula = std_skew_calculation(std, alpha)
508
+ pm.Deterministic("std_skew", std_skew_formula)
509
+ mean_skew_formula = mean_skew_calculation(mean, std, alpha)
510
+ pm.Deterministic("mean_skew", mean_skew_formula)
511
+
512
+ # height is defined as the posterior with x = mode
513
+ delta_formula = delta_calculation(alpha)
514
+ delta = pm.Deterministic("delta", delta_formula)
515
+ mue_z_formula = mue_z_calculation(delta)
516
+ mue_z = pm.Deterministic("mue_z", mue_z_formula)
517
+ sigma_z_formula = sigma_z_calculation(mue_z)
518
+ sigma_z = pm.Deterministic("sigma_z", sigma_z_formula)
519
+ skewness = skewness_calculation(delta)
520
+ mode_offset_formula = mode_offset_calculation(mue_z, skewness, sigma_z, alpha)
521
+ mode_offset = pm.Deterministic("mode_offset", mode_offset_formula)
522
+ mode_skew_formula = mode_skew_calculation(mean, std, mode_offset)
523
+ mode_skew = pm.Deterministic("mode_skew", mode_skew_formula)
524
+ # then calculate the height based on the mode
525
+ height_formula = height_calculation(area, mean, std, alpha, mode_skew)
526
+ height = pm.Deterministic(
527
+ "height",
528
+ height_formula,
529
+ )
530
+ pm.Deterministic("sn", height / noise)
531
+ y = skew_normal_posterior(baseline, time, mean, std, alpha, area=area)
532
+ y = pm.Deterministic("y", y)
533
+
534
+ # likelihood
535
+ pm.Normal("L", mu=y, sigma=noise, observed=intensity)
536
+
537
+ return pmodel
538
+
539
+
540
+ def double_skew_normal_posterior(baseline, time: np.ndarray, mean, std, alpha, *, area):
541
+ """
542
+ Define a univariate ordered skew normal distribution as the posterior.
543
+
544
+ Parameters
545
+ ----------
546
+ baseline
547
+ Baseline of the data.
548
+ time
549
+ NumPy array with the time values of the relevant timeframe.
550
+ mean
551
+ Location parameter.
552
+ std
553
+ Scale parameter of the first and second peak.
554
+ alpha
555
+ Skewness parameter of the first and second peak.
556
+ area
557
+ Area of the first and second peak.
558
+
559
+ Returns
560
+ -------
561
+ y
562
+ Probability density function (PDF) of a univariate ordered normal distribution as the posterior.
563
+ """
564
+ y = (
565
+ baseline
566
+ + area[0]
567
+ * (
568
+ 2
569
+ * (1 / (std[0] * np.sqrt(2 * np.pi)) * pt.exp(-0.5 * ((time - mean[0]) / std[0]) ** 2))
570
+ * (0.5 * (1 + pt.erf(((alpha[0] * (time - mean[0]) / std[0])) / np.sqrt(2))))
571
+ )
572
+ + area[1]
573
+ * (
574
+ 2
575
+ * (1 / (std[1] * np.sqrt(2 * np.pi)) * pt.exp(-0.5 * ((time - mean[1]) / std[1]) ** 2))
576
+ * (0.5 * (1 + pt.erf(((alpha[1] * (time - mean[1]) / std[1])) / np.sqrt(2))))
577
+ )
578
+ )
579
+ return y
580
+
581
+
582
+ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
583
+ """
584
+ Define a model for fitting two ordered skew normal distributions to the peak data
585
+ (for when data contains two peaks or a double peak without baseline separation).
586
+
587
+ Parameters
588
+ ----------
589
+ time
590
+ NumPy array with the time values of the relevant timeframe.
591
+ intensity
592
+ NumPy array with the intensity values of the relevant timeframe.
593
+
594
+ Returns
595
+ -------
596
+ pmodel
597
+ PyMC model.
598
+ """
599
+ slope_guess, intercept_guess, noise_width_guess = initial_guesses(time, intensity)
600
+ coords = {"subpeak": [0, 1]}
601
+ with pm.Model(coords=coords) as pmodel:
602
+ # add observations to the pmodel as ConstantData
603
+ pm.ConstantData("time", time)
604
+ pm.ConstantData("intensity", intensity)
605
+ # add guesses to the pmodel as ConstantData
606
+ pm.ConstantData("intercept_guess", intercept_guess)
607
+ pm.ConstantData("slope_guess", slope_guess)
608
+ pm.ConstantData("noise_width_guess", noise_width_guess)
609
+
610
+ # priors plus error handling in case of mathematically impermissible values
611
+ baseline_intercept = pm.Normal(
612
+ "baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
613
+ )
614
+ baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
615
+ baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
616
+ noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
617
+ # use univariate ordered normal distribution for the mean values
618
+ # use a zero sum normal distribution to describe the distance of the mean values
619
+ # from the mean of the mean values ("meanmean")
620
+ mean, diff, meanmean = double_model_mean_prior(time)
621
+ std = pm.HalfNormal(
622
+ "std",
623
+ sigma=[np.ptp(time) / 3, np.ptp(time) / 3],
624
+ dims=("subpeak",),
625
+ )
626
+ area = pm.HalfNormal(
627
+ "area",
628
+ sigma=[np.max(intensity) * 0.9, np.max(intensity) * 0.9],
629
+ dims=("subpeak",),
630
+ )
631
+ alpha = pm.Normal(
632
+ "alpha",
633
+ mu=[0, 0],
634
+ sigma=3.5,
635
+ dims=("subpeak",),
636
+ )
637
+
638
+ # height is defined as the posterior with x = mode
639
+ delta_formula = delta_calculation(alpha)
640
+ delta = pm.Deterministic("delta", delta_formula)
641
+ mue_z_formula = mue_z_calculation(delta)
642
+ mue_z = pm.Deterministic("mue_z", mue_z_formula)
643
+ sigma_z_formula = sigma_z_calculation(mue_z)
644
+ sigma_z = pm.Deterministic("sigma_z", sigma_z_formula)
645
+ skewness = skewness_calculation(delta)
646
+ mode_offset_formula = mode_offset_calculation(mue_z, skewness, sigma_z, alpha)
647
+ mode_offset = pm.Deterministic("mode_offset", mode_offset_formula)
648
+ mode_skew_formula = mode_skew_calculation(mean, std, mode_offset)
649
+ mode_skew = pm.Deterministic("mode_skew", mode_skew_formula)
650
+ # then calculate the height based on the mode
651
+ height_formula = height_calculation(area, mean, std, alpha, mode_skew)
652
+ height = pm.Deterministic(
653
+ "height",
654
+ height_formula,
655
+ )
656
+ pm.Deterministic("sn", height / noise, dims=("subpeak",))
657
+
658
+ # posterior
659
+ y = double_skew_normal_posterior(baseline, time, mean, std, alpha, area=area)
660
+ y = pm.Deterministic("y", y)
661
+
662
+ # likelihood
663
+ pm.Normal("L", mu=y, sigma=noise, observed=intensity)
664
+
665
+ return pmodel
666
+
667
+
668
+ def compute_log_likelihood(pmodel: pm.Model, idata: az.InferenceData):
669
+ """
670
+ Method to compute the element-wise loglikelihood of every posterior sample and add it to a given inference data object.
671
+
672
+ Parameters
673
+ ----------
674
+ pmodel
675
+ PyMC model.
676
+ idata
677
+ Inference data object resulting from sampling.
678
+
679
+ Returns
680
+ -------
681
+ idata
682
+ Inference data object updated with element-wise loglikelihood of every posterior sample.
683
+ """
684
+ with pmodel:
685
+ pm.compute_log_likelihood(idata)
686
+ return idata
687
+
688
+
689
+ def model_comparison(
690
+ compare_dict: Mapping[str, az.InferenceData],
691
+ ic: str = "loo",
692
+ ) -> pandas.DataFrame:
693
+ """
694
+ Method to compare the models detailed in compare_dict based on the leave-one-out cross-validation (loo)
695
+ or the widely-applicable information criterion (waic).
696
+
697
+ Parameters
698
+ ----------
699
+ compare_dict
700
+ Dictionary with the model denominations as keys and their respective inference data objects as values.
701
+ ic
702
+ Choice of the information criterion with which models are ranked ("loo" or "waic").
703
+ Default is "loo".
704
+
705
+ Returns
706
+ -------
707
+ df_comp
708
+ DataFrame containing the ranking of the given models.
709
+ """
710
+ df_comp = az.compare(compare_dict=compare_dict, ic=ic)
711
+ return df_comp