peak-performance 0.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- peak_performance/__init__.py +13 -0
- peak_performance/models.py +711 -0
- peak_performance/pipeline.py +1596 -0
- peak_performance/plots.py +289 -0
- peak_performance/test_main.py +4 -0
- peak_performance/test_models.py +196 -0
- peak_performance/test_pipeline.py +662 -0
- peak_performance/test_plots.py +122 -0
- peak_performance-0.6.3.dist-info/LICENSE.md +619 -0
- peak_performance-0.6.3.dist-info/METADATA +63 -0
- peak_performance-0.6.3.dist-info/RECORD +13 -0
- peak_performance-0.6.3.dist-info/WHEEL +5 -0
- peak_performance-0.6.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,711 @@
|
|
|
1
|
+
"""
|
|
2
|
+
PeakPerformance
|
|
3
|
+
Copyright (C) 2023 Forschungszentrum Jülich GmbH
|
|
4
|
+
|
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
|
6
|
+
it under the terms of the GNU Affero General Public License as published
|
|
7
|
+
by the Free Software Foundation, either version 3 of the License, or
|
|
8
|
+
(at your option) any later version.
|
|
9
|
+
|
|
10
|
+
This program is distributed in the hope that it will be useful,
|
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
GNU Affero General Public License for more details.
|
|
14
|
+
|
|
15
|
+
You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from enum import Enum
|
|
20
|
+
from typing import Mapping, Union
|
|
21
|
+
|
|
22
|
+
import arviz as az
|
|
23
|
+
import numpy as np
|
|
24
|
+
import pandas
|
|
25
|
+
import pymc as pm
|
|
26
|
+
import pytensor.tensor as pt
|
|
27
|
+
import scipy.stats as st
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ModelType(str, Enum):
|
|
31
|
+
"""Class containing all implemented model types."""
|
|
32
|
+
|
|
33
|
+
Normal = "normal"
|
|
34
|
+
SkewNormal = "skew_normal"
|
|
35
|
+
DoubleNormal = "double_normal"
|
|
36
|
+
DoubleSkewNormal = "double_skew_normal"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def initial_guesses(time: np.ndarray, intensity: np.ndarray):
|
|
40
|
+
"""
|
|
41
|
+
Provide initial guesses for priors.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
time
|
|
46
|
+
NumPy array with the time values of the relevant timeframe.
|
|
47
|
+
intensity
|
|
48
|
+
NumPy array with the intensity values of the relevant timeframe.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
baseline_fit.slope : float or int
|
|
53
|
+
Guess for the slope of the linear baseline prior.
|
|
54
|
+
baseline_fit.intercept : float or int
|
|
55
|
+
Guess for the intercept of the linear baseline prior.
|
|
56
|
+
noise_width_guess : float or int
|
|
57
|
+
Guess for the width of the noise.
|
|
58
|
+
"""
|
|
59
|
+
# first create a simple baseline guess only to be able to "correct" the intensity data (y_corrected = y - y_baseline)
|
|
60
|
+
# then, use the corrected data to determine which data points are going to be defined as noise
|
|
61
|
+
# this is the only use of the corrected data
|
|
62
|
+
average_initial_intensity = np.mean([intensity[n] for n in range(3)])
|
|
63
|
+
average_final_intensity = np.mean(
|
|
64
|
+
[intensity[n] for n in range(len(intensity) - 3, len(intensity))]
|
|
65
|
+
)
|
|
66
|
+
slope_guess = (average_final_intensity - average_initial_intensity) / (time[-1] - time[0])
|
|
67
|
+
# calculate intercept_guess based on the slope_guess and the formula for a linear equation
|
|
68
|
+
first_intercept_guess = average_initial_intensity - slope_guess * time[0]
|
|
69
|
+
intensity_corrected = [
|
|
70
|
+
intensity[n] - (slope_guess * time[n] + first_intercept_guess) for n in range(len(time))
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
# select lowest 35 % of all data points as noise -> noise_tuple
|
|
74
|
+
intensity_tuple = list(enumerate(intensity_corrected))
|
|
75
|
+
intensity_tuple.sort(key=lambda x: x[1])
|
|
76
|
+
noise_range = int(np.round(0.35 * len(intensity_corrected), decimals=0))
|
|
77
|
+
noise_tuple = intensity_tuple[:noise_range]
|
|
78
|
+
noise_index = sorted([x[0] for x in noise_tuple])
|
|
79
|
+
# use the indeces in noise_index to get the time and intensity of all noise data points
|
|
80
|
+
noise_time = [time[n] for n in noise_index]
|
|
81
|
+
noise_intensity = [intensity[n] for n in noise_index]
|
|
82
|
+
# calculate the width of the noise
|
|
83
|
+
noise_width_guess = max(noise_intensity) - min(noise_intensity)
|
|
84
|
+
|
|
85
|
+
# use scipy to fit a linear regression through the noise as a prior for the eventual baseline
|
|
86
|
+
baseline_fit = st.linregress(noise_time, noise_intensity)
|
|
87
|
+
|
|
88
|
+
return baseline_fit.slope, baseline_fit.intercept, noise_width_guess
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def baseline_intercept_prior_params(intercept_guess: Union[float, int]) -> Mapping[str, float]:
|
|
92
|
+
"""
|
|
93
|
+
Centralized function for supplying parameters for the baseline intercept prior.
|
|
94
|
+
|
|
95
|
+
Returns
|
|
96
|
+
-------
|
|
97
|
+
parameter_dict
|
|
98
|
+
Dictionary containing mu and sigma for the normally distributed prior.
|
|
99
|
+
"""
|
|
100
|
+
return {
|
|
101
|
+
"mu": intercept_guess,
|
|
102
|
+
"sigma": np.clip(abs(intercept_guess / 6), 0.05, np.inf),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def baseline_slope_prior_params(slope_guess: Union[float, int]) -> Mapping[str, float]:
|
|
107
|
+
"""
|
|
108
|
+
Centralized function for supplying parameters for the baseline slope prior.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
parameter_dict
|
|
113
|
+
Dictionary containing mu and sigma for the normally distributed prior.
|
|
114
|
+
"""
|
|
115
|
+
return {
|
|
116
|
+
"mu": slope_guess,
|
|
117
|
+
"sigma": np.clip(abs(slope_guess / 5), 0.5, np.inf),
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def normal_posterior(baseline, time: np.ndarray, mean, std, *, height):
|
|
122
|
+
"""
|
|
123
|
+
Model a peak shaped like the PDF of a normal distribution.
|
|
124
|
+
|
|
125
|
+
Parameters
|
|
126
|
+
----------
|
|
127
|
+
baseline
|
|
128
|
+
Baseline of the data.
|
|
129
|
+
time
|
|
130
|
+
NumPy array with the time values of the relevant timeframe.
|
|
131
|
+
mean
|
|
132
|
+
Arithmetic mean of the normal distribution.
|
|
133
|
+
std
|
|
134
|
+
Standard deviation of the normal distribution.
|
|
135
|
+
height
|
|
136
|
+
Height of the normal distribution (starting from the baseline, thus not the total height).
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
Probability density function (PDF) of the normally distributed posterior.
|
|
141
|
+
"""
|
|
142
|
+
return baseline + height * pt.exp(-0.5 * ((time - mean) / std) ** 2)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
|
|
146
|
+
"""
|
|
147
|
+
Define a model for fitting a normal distribution to the peak data.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
time
|
|
152
|
+
NumPy array with the time values of the relevant timeframe.
|
|
153
|
+
intensity
|
|
154
|
+
NumPy array with the intensity values of the relevant timeframe.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
pmodel
|
|
159
|
+
PyMC model.
|
|
160
|
+
"""
|
|
161
|
+
slope_guess, intercept_guess, noise_width_guess = initial_guesses(time, intensity)
|
|
162
|
+
with pm.Model() as pmodel:
|
|
163
|
+
# add observations to the pmodel as ConstantData
|
|
164
|
+
pm.ConstantData("time", time)
|
|
165
|
+
pm.ConstantData("intensity", intensity)
|
|
166
|
+
# add guesses to the pmodel as ConstantData
|
|
167
|
+
pm.ConstantData("intercept_guess", intercept_guess)
|
|
168
|
+
pm.ConstantData("slope_guess", slope_guess)
|
|
169
|
+
pm.ConstantData("noise_width_guess", noise_width_guess)
|
|
170
|
+
|
|
171
|
+
# priors plus error handling in case of mathematically impermissible values
|
|
172
|
+
baseline_intercept = pm.Normal(
|
|
173
|
+
"baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
|
|
174
|
+
)
|
|
175
|
+
baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
|
|
176
|
+
baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
|
|
177
|
+
noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
|
|
178
|
+
# define priors for parameters of a normally distributed posterior
|
|
179
|
+
mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
|
|
180
|
+
std = pm.HalfNormal("std", np.ptp(time) / 3)
|
|
181
|
+
height = pm.HalfNormal("height", 0.95 * np.max(intensity))
|
|
182
|
+
pm.Deterministic("area", height / (1 / (std * np.sqrt(2 * np.pi))))
|
|
183
|
+
pm.Deterministic("sn", height / noise)
|
|
184
|
+
# posterior
|
|
185
|
+
y = normal_posterior(baseline, time, mean, std, height=height)
|
|
186
|
+
y = pm.Deterministic("y", y)
|
|
187
|
+
|
|
188
|
+
# likelihood
|
|
189
|
+
pm.Normal("L", mu=y, sigma=noise, observed=intensity)
|
|
190
|
+
|
|
191
|
+
return pmodel
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def double_model_mean_prior(time):
|
|
195
|
+
"""
|
|
196
|
+
Function creating prior probability distributions for double peaks using a ZeroSumNormal distribution.
|
|
197
|
+
|
|
198
|
+
Parameters
|
|
199
|
+
----------
|
|
200
|
+
time
|
|
201
|
+
NumPy array with the time values of the relevant timeframe.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
mean
|
|
206
|
+
Normally distributed prior for the ordered means of the double peak model.
|
|
207
|
+
diff
|
|
208
|
+
Difference between meanmean and mean.
|
|
209
|
+
meanmean
|
|
210
|
+
Normally distributed prior for the mean of the double peak means.
|
|
211
|
+
"""
|
|
212
|
+
meanmean = pm.Normal("meanmean", mu=np.min(time) + np.ptp(time) / 2, sigma=np.ptp(time) / 6)
|
|
213
|
+
diff = pm.ZeroSumNormal(
|
|
214
|
+
"diff",
|
|
215
|
+
sigma=1,
|
|
216
|
+
shape=(2,), # currently no dims due to bug with ordered transformation
|
|
217
|
+
)
|
|
218
|
+
mean = pm.Normal(
|
|
219
|
+
"mean",
|
|
220
|
+
mu=meanmean + diff,
|
|
221
|
+
sigma=1,
|
|
222
|
+
transform=pm.distributions.transforms.ordered,
|
|
223
|
+
dims=("subpeak",),
|
|
224
|
+
)
|
|
225
|
+
return mean, diff, meanmean
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def double_normal_posterior(baseline, time: np.ndarray, mean, std, *, height):
|
|
229
|
+
"""
|
|
230
|
+
Define a univariate ordered normal distribution as the posterior.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
baseline
|
|
235
|
+
Baseline of the data.
|
|
236
|
+
time
|
|
237
|
+
NumPy array with the time values of the relevant timeframe.
|
|
238
|
+
mean
|
|
239
|
+
Arithmetic mean of the normal distribution.
|
|
240
|
+
std
|
|
241
|
+
Standard deviation of the first and second peak.
|
|
242
|
+
height
|
|
243
|
+
Height of the first and second peak.
|
|
244
|
+
|
|
245
|
+
Returns
|
|
246
|
+
-------
|
|
247
|
+
y
|
|
248
|
+
Probability density function (PDF) of a univariate ordered normal distribution as the posterior.
|
|
249
|
+
"""
|
|
250
|
+
y = (
|
|
251
|
+
baseline
|
|
252
|
+
+ height[0] * pt.exp(-0.5 * ((time - mean[0]) / std[0]) ** 2)
|
|
253
|
+
+ height[1] * pt.exp(-0.5 * ((time - mean[1]) / std[1]) ** 2)
|
|
254
|
+
)
|
|
255
|
+
return y
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
|
|
259
|
+
"""
|
|
260
|
+
Define a model for fitting two ordered normal distributions to the peak data
|
|
261
|
+
(for when data contains two peaks or a double peak without baseline separation).
|
|
262
|
+
|
|
263
|
+
Parameters
|
|
264
|
+
----------
|
|
265
|
+
time
|
|
266
|
+
NumPy array with the time values of the relevant timeframe.
|
|
267
|
+
intensity
|
|
268
|
+
NumPy array with the intensity values of the relevant timeframe.
|
|
269
|
+
|
|
270
|
+
Returns
|
|
271
|
+
-------
|
|
272
|
+
pmodel
|
|
273
|
+
PyMC model.
|
|
274
|
+
"""
|
|
275
|
+
slope_guess, intercept_guess, noise_width_guess = initial_guesses(time, intensity)
|
|
276
|
+
coords = {"subpeak": [0, 1]}
|
|
277
|
+
with pm.Model(coords=coords) as pmodel:
|
|
278
|
+
# add observations to the pmodel as ConstantData
|
|
279
|
+
pm.ConstantData("time", time)
|
|
280
|
+
pm.ConstantData("intensity", intensity)
|
|
281
|
+
# add guesses to the pmodel as ConstantData
|
|
282
|
+
pm.ConstantData("intercept_guess", intercept_guess)
|
|
283
|
+
pm.ConstantData("slope_guess", slope_guess)
|
|
284
|
+
pm.ConstantData("noise_width_guess", noise_width_guess)
|
|
285
|
+
|
|
286
|
+
# priors
|
|
287
|
+
baseline_intercept = pm.Normal(
|
|
288
|
+
"baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
|
|
289
|
+
)
|
|
290
|
+
baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
|
|
291
|
+
baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
|
|
292
|
+
noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
|
|
293
|
+
std = pm.HalfNormal("std", sigma=[np.ptp(time) / 3, np.ptp(time) / 3], dims=("subpeak",))
|
|
294
|
+
height = pm.HalfNormal(
|
|
295
|
+
"height", sigma=[0.95 * np.max(intensity), 0.95 * np.max(intensity)], dims=("subpeak",)
|
|
296
|
+
)
|
|
297
|
+
pm.Deterministic("area", height / (1 / (std * np.sqrt(2 * np.pi))), dims=("subpeak",))
|
|
298
|
+
pm.Deterministic("sn", height / noise, dims=("subpeak",))
|
|
299
|
+
# use univariate ordered normal distribution for the mean values
|
|
300
|
+
# use a zero sum normal distribution to describe the distance of the mean values
|
|
301
|
+
# from the mean of the mean values ("meanmean")
|
|
302
|
+
mean, diff, meanmean = double_model_mean_prior(time)
|
|
303
|
+
|
|
304
|
+
# posterior
|
|
305
|
+
y = double_normal_posterior(baseline, time, mean, std, height=height)
|
|
306
|
+
y = pm.Deterministic("y", y)
|
|
307
|
+
|
|
308
|
+
# likelihood
|
|
309
|
+
pm.Normal("L", mu=y, sigma=noise, observed=intensity)
|
|
310
|
+
|
|
311
|
+
return pmodel
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def std_skew_calculation(scale, alpha):
|
|
315
|
+
"""
|
|
316
|
+
Calculate the standard deviation of a skew normal distribution with f(x | loc, scale, alpha).
|
|
317
|
+
|
|
318
|
+
Parameters
|
|
319
|
+
----------
|
|
320
|
+
scale
|
|
321
|
+
Scale parameter of the skew normal distribution.
|
|
322
|
+
alpha
|
|
323
|
+
Skewness parameter of the skew normal distribution.
|
|
324
|
+
|
|
325
|
+
Returns
|
|
326
|
+
----------
|
|
327
|
+
std
|
|
328
|
+
Standard deviation of a skew normal distribution.
|
|
329
|
+
-------
|
|
330
|
+
"""
|
|
331
|
+
return np.sqrt(scale**2 * (1 - (2 * alpha**2) / ((alpha**2 + 1) * np.pi)))
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def mean_skew_calculation(loc, scale, alpha):
|
|
335
|
+
"""
|
|
336
|
+
Calculate the arithmetic mean of a skew normal distribution with f(x | loc, scale, alpha).
|
|
337
|
+
|
|
338
|
+
Parameters
|
|
339
|
+
----------
|
|
340
|
+
loc
|
|
341
|
+
Location parameter of the skew normal distribution.
|
|
342
|
+
scale
|
|
343
|
+
Scale parameter of the skew normal distribution.
|
|
344
|
+
alpha
|
|
345
|
+
Skewness parameter of the skew normal distribution.
|
|
346
|
+
|
|
347
|
+
Returns
|
|
348
|
+
----------
|
|
349
|
+
mean
|
|
350
|
+
Arithmetic mean of a skew normal distribution.
|
|
351
|
+
"""
|
|
352
|
+
return loc + scale * np.sqrt(2 / np.pi) * alpha / (np.sqrt(1 + alpha**2))
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def delta_calculation(alpha):
|
|
356
|
+
"""
|
|
357
|
+
Calculate the delta term included in several subsequent formulae.
|
|
358
|
+
|
|
359
|
+
Parameters
|
|
360
|
+
----------
|
|
361
|
+
alpha
|
|
362
|
+
Skewness parameter of the skew normal distribution.
|
|
363
|
+
"""
|
|
364
|
+
return alpha / (np.sqrt(1 + alpha**2))
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def mue_z_calculation(delta):
|
|
368
|
+
"""Calculate the mue_z variable which is needed to compute a numerical approximation of the mode of a skew normal distribution."""
|
|
369
|
+
return np.sqrt(2 / np.pi) * delta
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def sigma_z_calculation(mue_z):
|
|
373
|
+
"""Calculate the sigma_z variable which is needed to compute a numerical approximation of the mode of a skew normal distribution."""
|
|
374
|
+
return np.sqrt(1 - mue_z**2)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def skewness_calculation(delta):
|
|
378
|
+
"""Calculate the skewness of a skew normal distribution."""
|
|
379
|
+
return (
|
|
380
|
+
(4 - np.pi)
|
|
381
|
+
/ 2
|
|
382
|
+
* ((delta * np.sqrt(2 / np.pi)) ** 3)
|
|
383
|
+
/ ((1 - 2 * delta**2 / np.pi) ** 1.5)
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def mode_offset_calculation(mue_z, skewness, sigma_z, alpha):
|
|
388
|
+
"""Calculate the offset between arithmetic mean and mode of a skew normal distribution."""
|
|
389
|
+
# this formula originally contained the sign() function which led to an error due to usage of pytensor variables
|
|
390
|
+
# -> use alpha/abs(alpha) instead for the same effect
|
|
391
|
+
return (
|
|
392
|
+
mue_z
|
|
393
|
+
- (skewness * sigma_z) / 2
|
|
394
|
+
- (alpha / abs(alpha)) / 2 * pt.exp(-(2 * np.pi) / abs(alpha))
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def mode_skew_calculation(loc, scale, mode_offset):
|
|
399
|
+
"""Calculate a numerical approximation of the mode of a skew normal distribution."""
|
|
400
|
+
return loc + scale * mode_offset
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def height_calculation(area, loc, scale, alpha, mode_skew):
|
|
404
|
+
"""
|
|
405
|
+
Calculate the height of a skew normal distribution.
|
|
406
|
+
The formula is the result of inserting time = mode_skew into the posterior.
|
|
407
|
+
|
|
408
|
+
Parameters
|
|
409
|
+
----------
|
|
410
|
+
area
|
|
411
|
+
Area of the peak described by the skew normal distribution (area between baseline and skew normal distribution).
|
|
412
|
+
loc
|
|
413
|
+
Location parameter of the skew normal distribution.
|
|
414
|
+
scale
|
|
415
|
+
Scale parameter of the skew normal distribution.
|
|
416
|
+
alpha
|
|
417
|
+
Skewness parameter of the skew normal distribution.
|
|
418
|
+
mode_skew
|
|
419
|
+
Mode of the skew normal distribution.
|
|
420
|
+
|
|
421
|
+
Returns
|
|
422
|
+
----------
|
|
423
|
+
mean
|
|
424
|
+
Arithmetic mean of a skew normal distribution.
|
|
425
|
+
"""
|
|
426
|
+
return area * (
|
|
427
|
+
2
|
|
428
|
+
* (1 / (scale * np.sqrt(2 * np.pi)) * pt.exp(-0.5 * ((mode_skew - loc) / scale) ** 2))
|
|
429
|
+
* (0.5 * (1 + pt.erf(((alpha * (mode_skew - loc) / scale)) / np.sqrt(2))))
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def skew_normal_posterior(baseline, time, mean, std, alpha, *, area):
|
|
434
|
+
"""
|
|
435
|
+
Define a skew normally distributed posterior.
|
|
436
|
+
|
|
437
|
+
Parameters
|
|
438
|
+
----------
|
|
439
|
+
baseline
|
|
440
|
+
Baseline of the data.
|
|
441
|
+
time
|
|
442
|
+
NumPy array with the time values of the relevant timeframe.
|
|
443
|
+
intensity
|
|
444
|
+
NumPy array with the intensity values of the relevant timeframe.
|
|
445
|
+
mean
|
|
446
|
+
Location parameter, i.e. arithmetic mean.
|
|
447
|
+
std
|
|
448
|
+
Scale parameter, i.e. standard deviation.
|
|
449
|
+
alpha
|
|
450
|
+
Skewness parameter.
|
|
451
|
+
area
|
|
452
|
+
Peak area.
|
|
453
|
+
|
|
454
|
+
Returns
|
|
455
|
+
-------
|
|
456
|
+
y
|
|
457
|
+
Probability density function (PDF) of a univariate ordered normal distribution as the posterior.
|
|
458
|
+
"""
|
|
459
|
+
# posterior
|
|
460
|
+
y = baseline + area * (
|
|
461
|
+
2
|
|
462
|
+
* (1 / (std * np.sqrt(2 * np.pi)) * pt.exp(-0.5 * ((time - mean) / std) ** 2))
|
|
463
|
+
* (0.5 * (1 + pt.erf(((alpha * (time - mean) / std)) / np.sqrt(2))))
|
|
464
|
+
)
|
|
465
|
+
return y
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
|
|
469
|
+
"""
|
|
470
|
+
Define a model for fitting a skew normal distribution to the peak data.
|
|
471
|
+
|
|
472
|
+
Parameters
|
|
473
|
+
----------
|
|
474
|
+
time
|
|
475
|
+
NumPy array with the time values of the relevant timeframe.
|
|
476
|
+
intensity
|
|
477
|
+
NumPy array with the intensity values of the relevant timeframe.
|
|
478
|
+
|
|
479
|
+
Returns
|
|
480
|
+
-------
|
|
481
|
+
pmodel
|
|
482
|
+
PyMC model.
|
|
483
|
+
"""
|
|
484
|
+
slope_guess, intercept_guess, noise_width_guess = initial_guesses(time, intensity)
|
|
485
|
+
with pm.Model() as pmodel:
|
|
486
|
+
# add observations to the pmodel as ConstantData
|
|
487
|
+
pm.ConstantData("time", time)
|
|
488
|
+
pm.ConstantData("intensity", intensity)
|
|
489
|
+
# add guesses to the pmodel as ConstantData
|
|
490
|
+
pm.ConstantData("intercept_guess", intercept_guess)
|
|
491
|
+
pm.ConstantData("slope_guess", slope_guess)
|
|
492
|
+
pm.ConstantData("noise_width_guess", noise_width_guess)
|
|
493
|
+
|
|
494
|
+
# priors plus error handling in case of mathematically impermissible values
|
|
495
|
+
baseline_intercept = pm.Normal(
|
|
496
|
+
"baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
|
|
497
|
+
)
|
|
498
|
+
baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
|
|
499
|
+
baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
|
|
500
|
+
noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
|
|
501
|
+
mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
|
|
502
|
+
std = pm.HalfNormal("std", np.ptp(time) / 3)
|
|
503
|
+
alpha = pm.Normal("alpha", 0, 3.5)
|
|
504
|
+
area = pm.HalfNormal("area", np.max(intensity) * 0.9)
|
|
505
|
+
|
|
506
|
+
# calculate standard deviation and arithmetic mean of a skew normal distribution
|
|
507
|
+
std_skew_formula = std_skew_calculation(std, alpha)
|
|
508
|
+
pm.Deterministic("std_skew", std_skew_formula)
|
|
509
|
+
mean_skew_formula = mean_skew_calculation(mean, std, alpha)
|
|
510
|
+
pm.Deterministic("mean_skew", mean_skew_formula)
|
|
511
|
+
|
|
512
|
+
# height is defined as the posterior with x = mode
|
|
513
|
+
delta_formula = delta_calculation(alpha)
|
|
514
|
+
delta = pm.Deterministic("delta", delta_formula)
|
|
515
|
+
mue_z_formula = mue_z_calculation(delta)
|
|
516
|
+
mue_z = pm.Deterministic("mue_z", mue_z_formula)
|
|
517
|
+
sigma_z_formula = sigma_z_calculation(mue_z)
|
|
518
|
+
sigma_z = pm.Deterministic("sigma_z", sigma_z_formula)
|
|
519
|
+
skewness = skewness_calculation(delta)
|
|
520
|
+
mode_offset_formula = mode_offset_calculation(mue_z, skewness, sigma_z, alpha)
|
|
521
|
+
mode_offset = pm.Deterministic("mode_offset", mode_offset_formula)
|
|
522
|
+
mode_skew_formula = mode_skew_calculation(mean, std, mode_offset)
|
|
523
|
+
mode_skew = pm.Deterministic("mode_skew", mode_skew_formula)
|
|
524
|
+
# then calculate the height based on the mode
|
|
525
|
+
height_formula = height_calculation(area, mean, std, alpha, mode_skew)
|
|
526
|
+
height = pm.Deterministic(
|
|
527
|
+
"height",
|
|
528
|
+
height_formula,
|
|
529
|
+
)
|
|
530
|
+
pm.Deterministic("sn", height / noise)
|
|
531
|
+
y = skew_normal_posterior(baseline, time, mean, std, alpha, area=area)
|
|
532
|
+
y = pm.Deterministic("y", y)
|
|
533
|
+
|
|
534
|
+
# likelihood
|
|
535
|
+
pm.Normal("L", mu=y, sigma=noise, observed=intensity)
|
|
536
|
+
|
|
537
|
+
return pmodel
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def double_skew_normal_posterior(baseline, time: np.ndarray, mean, std, alpha, *, area):
|
|
541
|
+
"""
|
|
542
|
+
Define a univariate ordered skew normal distribution as the posterior.
|
|
543
|
+
|
|
544
|
+
Parameters
|
|
545
|
+
----------
|
|
546
|
+
baseline
|
|
547
|
+
Baseline of the data.
|
|
548
|
+
time
|
|
549
|
+
NumPy array with the time values of the relevant timeframe.
|
|
550
|
+
mean
|
|
551
|
+
Location parameter.
|
|
552
|
+
std
|
|
553
|
+
Scale parameter of the first and second peak.
|
|
554
|
+
alpha
|
|
555
|
+
Skewness parameter of the first and second peak.
|
|
556
|
+
area
|
|
557
|
+
Area of the first and second peak.
|
|
558
|
+
|
|
559
|
+
Returns
|
|
560
|
+
-------
|
|
561
|
+
y
|
|
562
|
+
Probability density function (PDF) of a univariate ordered normal distribution as the posterior.
|
|
563
|
+
"""
|
|
564
|
+
y = (
|
|
565
|
+
baseline
|
|
566
|
+
+ area[0]
|
|
567
|
+
* (
|
|
568
|
+
2
|
|
569
|
+
* (1 / (std[0] * np.sqrt(2 * np.pi)) * pt.exp(-0.5 * ((time - mean[0]) / std[0]) ** 2))
|
|
570
|
+
* (0.5 * (1 + pt.erf(((alpha[0] * (time - mean[0]) / std[0])) / np.sqrt(2))))
|
|
571
|
+
)
|
|
572
|
+
+ area[1]
|
|
573
|
+
* (
|
|
574
|
+
2
|
|
575
|
+
* (1 / (std[1] * np.sqrt(2 * np.pi)) * pt.exp(-0.5 * ((time - mean[1]) / std[1]) ** 2))
|
|
576
|
+
* (0.5 * (1 + pt.erf(((alpha[1] * (time - mean[1]) / std[1])) / np.sqrt(2))))
|
|
577
|
+
)
|
|
578
|
+
)
|
|
579
|
+
return y
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
|
|
583
|
+
"""
|
|
584
|
+
Define a model for fitting two ordered skew normal distributions to the peak data
|
|
585
|
+
(for when data contains two peaks or a double peak without baseline separation).
|
|
586
|
+
|
|
587
|
+
Parameters
|
|
588
|
+
----------
|
|
589
|
+
time
|
|
590
|
+
NumPy array with the time values of the relevant timeframe.
|
|
591
|
+
intensity
|
|
592
|
+
NumPy array with the intensity values of the relevant timeframe.
|
|
593
|
+
|
|
594
|
+
Returns
|
|
595
|
+
-------
|
|
596
|
+
pmodel
|
|
597
|
+
PyMC model.
|
|
598
|
+
"""
|
|
599
|
+
slope_guess, intercept_guess, noise_width_guess = initial_guesses(time, intensity)
|
|
600
|
+
coords = {"subpeak": [0, 1]}
|
|
601
|
+
with pm.Model(coords=coords) as pmodel:
|
|
602
|
+
# add observations to the pmodel as ConstantData
|
|
603
|
+
pm.ConstantData("time", time)
|
|
604
|
+
pm.ConstantData("intensity", intensity)
|
|
605
|
+
# add guesses to the pmodel as ConstantData
|
|
606
|
+
pm.ConstantData("intercept_guess", intercept_guess)
|
|
607
|
+
pm.ConstantData("slope_guess", slope_guess)
|
|
608
|
+
pm.ConstantData("noise_width_guess", noise_width_guess)
|
|
609
|
+
|
|
610
|
+
# priors plus error handling in case of mathematically impermissible values
|
|
611
|
+
baseline_intercept = pm.Normal(
|
|
612
|
+
"baseline_intercept", **baseline_intercept_prior_params(intercept_guess)
|
|
613
|
+
)
|
|
614
|
+
baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
|
|
615
|
+
baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
|
|
616
|
+
noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
|
|
617
|
+
# use univariate ordered normal distribution for the mean values
|
|
618
|
+
# use a zero sum normal distribution to describe the distance of the mean values
|
|
619
|
+
# from the mean of the mean values ("meanmean")
|
|
620
|
+
mean, diff, meanmean = double_model_mean_prior(time)
|
|
621
|
+
std = pm.HalfNormal(
|
|
622
|
+
"std",
|
|
623
|
+
sigma=[np.ptp(time) / 3, np.ptp(time) / 3],
|
|
624
|
+
dims=("subpeak",),
|
|
625
|
+
)
|
|
626
|
+
area = pm.HalfNormal(
|
|
627
|
+
"area",
|
|
628
|
+
sigma=[np.max(intensity) * 0.9, np.max(intensity) * 0.9],
|
|
629
|
+
dims=("subpeak",),
|
|
630
|
+
)
|
|
631
|
+
alpha = pm.Normal(
|
|
632
|
+
"alpha",
|
|
633
|
+
mu=[0, 0],
|
|
634
|
+
sigma=3.5,
|
|
635
|
+
dims=("subpeak",),
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
# height is defined as the posterior with x = mode
|
|
639
|
+
delta_formula = delta_calculation(alpha)
|
|
640
|
+
delta = pm.Deterministic("delta", delta_formula)
|
|
641
|
+
mue_z_formula = mue_z_calculation(delta)
|
|
642
|
+
mue_z = pm.Deterministic("mue_z", mue_z_formula)
|
|
643
|
+
sigma_z_formula = sigma_z_calculation(mue_z)
|
|
644
|
+
sigma_z = pm.Deterministic("sigma_z", sigma_z_formula)
|
|
645
|
+
skewness = skewness_calculation(delta)
|
|
646
|
+
mode_offset_formula = mode_offset_calculation(mue_z, skewness, sigma_z, alpha)
|
|
647
|
+
mode_offset = pm.Deterministic("mode_offset", mode_offset_formula)
|
|
648
|
+
mode_skew_formula = mode_skew_calculation(mean, std, mode_offset)
|
|
649
|
+
mode_skew = pm.Deterministic("mode_skew", mode_skew_formula)
|
|
650
|
+
# then calculate the height based on the mode
|
|
651
|
+
height_formula = height_calculation(area, mean, std, alpha, mode_skew)
|
|
652
|
+
height = pm.Deterministic(
|
|
653
|
+
"height",
|
|
654
|
+
height_formula,
|
|
655
|
+
)
|
|
656
|
+
pm.Deterministic("sn", height / noise, dims=("subpeak",))
|
|
657
|
+
|
|
658
|
+
# posterior
|
|
659
|
+
y = double_skew_normal_posterior(baseline, time, mean, std, alpha, area=area)
|
|
660
|
+
y = pm.Deterministic("y", y)
|
|
661
|
+
|
|
662
|
+
# likelihood
|
|
663
|
+
pm.Normal("L", mu=y, sigma=noise, observed=intensity)
|
|
664
|
+
|
|
665
|
+
return pmodel
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def compute_log_likelihood(pmodel: pm.Model, idata: az.InferenceData):
|
|
669
|
+
"""
|
|
670
|
+
Method to compute the element-wise loglikelihood of every posterior sample and add it to a given inference data object.
|
|
671
|
+
|
|
672
|
+
Parameters
|
|
673
|
+
----------
|
|
674
|
+
pmodel
|
|
675
|
+
PyMC model.
|
|
676
|
+
idata
|
|
677
|
+
Inference data object resulting from sampling.
|
|
678
|
+
|
|
679
|
+
Returns
|
|
680
|
+
-------
|
|
681
|
+
idata
|
|
682
|
+
Inference data object updated with element-wise loglikelihood of every posterior sample.
|
|
683
|
+
"""
|
|
684
|
+
with pmodel:
|
|
685
|
+
pm.compute_log_likelihood(idata)
|
|
686
|
+
return idata
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def model_comparison(
|
|
690
|
+
compare_dict: Mapping[str, az.InferenceData],
|
|
691
|
+
ic: str = "loo",
|
|
692
|
+
) -> pandas.DataFrame:
|
|
693
|
+
"""
|
|
694
|
+
Method to compare the models detailed in compare_dict based on the leave-one-out cross-validation (loo)
|
|
695
|
+
or the widely-applicable information criterion (waic).
|
|
696
|
+
|
|
697
|
+
Parameters
|
|
698
|
+
----------
|
|
699
|
+
compare_dict
|
|
700
|
+
Dictionary with the model denominations as keys and their respective inference data objects as values.
|
|
701
|
+
ic
|
|
702
|
+
Choice of the information criterion with which models are ranked ("loo" or "waic").
|
|
703
|
+
Default is "loo".
|
|
704
|
+
|
|
705
|
+
Returns
|
|
706
|
+
-------
|
|
707
|
+
df_comp
|
|
708
|
+
DataFrame containing the ranking of the given models.
|
|
709
|
+
"""
|
|
710
|
+
df_comp = az.compare(compare_dict=compare_dict, ic=ic)
|
|
711
|
+
return df_comp
|