peak-performance 0.6.4__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,6 +36,26 @@ class ModelType(str, Enum):
36
36
  DoubleSkewNormal = "double_skew_normal"
37
37
 
38
38
 
39
+ def guess_noise(intensity):
40
+ """
41
+ Function for providing a guess for the noise width of a given signal
42
+ based on the first and last 15 % of data points in a time series.
43
+
44
+ Parameters
45
+ ----------
46
+ time
47
+ NumPy array with the time values of the relevant timeframe.
48
+ intensity
49
+ NumPy array with the intensity values of the relevant timeframe.
50
+ """
51
+ n = len(intensity)
52
+ ifrom = int(np.ceil(0.15 * n))
53
+ ito = int(np.floor(0.85 * n))
54
+ start_ints = intensity[:ifrom]
55
+ end_ints = intensity[ito:]
56
+ return np.std([*(start_ints - np.mean(start_ints)), *(end_ints - np.mean(end_ints))])
57
+
58
+
39
59
  def initial_guesses(time: np.ndarray, intensity: np.ndarray):
40
60
  """
41
61
  Provide initial guesses for priors.
@@ -79,12 +99,16 @@ def initial_guesses(time: np.ndarray, intensity: np.ndarray):
79
99
  # use the indeces in noise_index to get the time and intensity of all noise data points
80
100
  noise_time = [time[n] for n in noise_index]
81
101
  noise_intensity = [intensity[n] for n in noise_index]
82
- # calculate the width of the noise
83
- noise_width_guess = max(noise_intensity) - min(noise_intensity)
84
102
 
85
103
  # use scipy to fit a linear regression through the noise as a prior for the eventual baseline
86
104
  baseline_fit = st.linregress(noise_time, noise_intensity)
87
105
 
106
+ # calculate the width of the noise
107
+ noise_width_guess = guess_noise(intensity)
108
+
109
+ # clip the noise to at least 10
110
+ noise_width_guess = np.clip(noise_width_guess, 10, np.inf)
111
+
88
112
  return baseline_fit.slope, baseline_fit.intercept, noise_width_guess
89
113
 
90
114
 
@@ -118,9 +142,9 @@ def baseline_slope_prior_params(slope_guess: Union[float, int]) -> Mapping[str,
118
142
  }
119
143
 
120
144
 
121
- def normal_posterior(baseline, time: np.ndarray, mean, std, *, height):
145
+ def normal_peak_shape(baseline, time: np.ndarray, mean, std, *, height):
122
146
  """
123
- Model a peak shaped like the PDF of a normal distribution.
147
+ Model a peak shaped like a normal distribution.
124
148
 
125
149
  Parameters
126
150
  ----------
@@ -166,7 +190,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
166
190
  # add guesses to the pmodel as ConstantData
167
191
  pm.ConstantData("intercept_guess", intercept_guess)
168
192
  pm.ConstantData("slope_guess", slope_guess)
169
- pm.ConstantData("noise_width_guess", noise_width_guess)
193
+ noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
170
194
 
171
195
  # priors plus error handling in case of mathematically impermissible values
172
196
  baseline_intercept = pm.Normal(
@@ -174,7 +198,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
174
198
  )
175
199
  baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
176
200
  baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
177
- noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
201
+ noise = pm.LogNormal("noise", pt.log(noise_guess))
178
202
  # define priors for parameters of a normally distributed posterior
179
203
  mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
180
204
  std = pm.HalfNormal("std", np.ptp(time) / 3)
@@ -182,7 +206,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
182
206
  pm.Deterministic("area", height / (1 / (std * np.sqrt(2 * np.pi))))
183
207
  pm.Deterministic("sn", height / noise)
184
208
  # posterior
185
- y = normal_posterior(baseline, time, mean, std, height=height)
209
+ y = normal_peak_shape(baseline, time, mean, std, height=height)
186
210
  y = pm.Deterministic("y", y)
187
211
 
188
212
  # likelihood
@@ -193,7 +217,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
193
217
 
194
218
  def double_model_mean_prior(time):
195
219
  """
196
- Function creating prior probability distributions for double peaks using a ZeroSumNormal distribution.
220
+ Function creating prior probability distributions for the mean retention times of a dual-peak.
197
221
 
198
222
  Parameters
199
223
  ----------
@@ -203,31 +227,75 @@ def double_model_mean_prior(time):
203
227
  Returns
204
228
  -------
205
229
  mean
206
- Normally distributed prior for the ordered means of the double peak model.
230
+ Normally distributed prior for the ordered means of the multi-peak model.
207
231
  diff
208
- Difference between meanmean and mean.
232
+ Difference between the group mean and peak-wise mean.
233
+ meanmean
234
+ Normally distributed prior for the group mean of the peak means.
235
+ """
236
+ tmin = np.min(time)
237
+ tdelta = np.ptp(time)
238
+ meanmean = pm.Normal("meanmean", mu=tmin + tdelta / 2, sigma=tdelta / 6)
239
+ separation = pm.Gamma(
240
+ "separation",
241
+ mu=tdelta / 6,
242
+ sigma=tdelta / 12,
243
+ )
244
+ offset = pm.Deterministic("offset", pt.stack([-separation / 2, separation / 2]), dims="subpeak")
245
+ mean = pm.Deterministic(
246
+ "mean",
247
+ meanmean + offset,
248
+ dims=("subpeak",),
249
+ )
250
+ return mean, offset, meanmean
251
+
252
+
253
+ def multi_peak_means_prior(time):
254
+ """
255
+ Function creating prior probability distributions for multi-peaks using a ZeroSumNormal distribution.
256
+
257
+ The number of peaks is determined from the `"subpeak"` model coordinates.
258
+
259
+ Parameters
260
+ ----------
261
+ time
262
+ NumPy array with the time values of the relevant timeframe.
263
+
264
+ Returns
265
+ -------
266
+ mean
267
+ Normally distributed prior for the ordered means of the multi-peak model.
268
+ offset
269
+ Time offset between the group mean and peak-wise mean.
209
270
  meanmean
210
- Normally distributed prior for the mean of the double peak means.
271
+ Normally distributed prior for the group mean of the peak means.
211
272
  """
273
+ pmodel = pm.modelcontext(None)
212
274
  meanmean = pm.Normal("meanmean", mu=np.min(time) + np.ptp(time) / 2, sigma=np.ptp(time) / 6)
213
- diff = pm.ZeroSumNormal(
214
- "diff",
215
- sigma=1,
216
- shape=(2,), # currently no dims due to bug with ordered transformation
275
+ offset_unsorted = pm.ZeroSumNormal(
276
+ "offset_unsorted",
277
+ sigma=2,
278
+ # Support arbitrary number of subpeaks
279
+ shape=len(pmodel.coords["subpeak"]),
280
+ # NOTE: As of PyMC v5.14, the OrderedTransform and ZeroSumTransform are incompatible.
281
+ # See https://github.com/pymc-devs/pymc/issues/6975.
282
+ # As a workaround we'll call pt.sort a few lines below.
217
283
  )
218
- mean = pm.Normal(
284
+ offset = pm.Deterministic("offset", pt.sort(offset_unsorted), dims="subpeak")
285
+ mean = pm.Deterministic(
219
286
  "mean",
220
- mu=meanmean + diff,
221
- sigma=1,
222
- transform=pm.distributions.transforms.ordered,
287
+ meanmean + offset,
288
+ # Introduce a small jitter to the subpeak means to decouple them
289
+ # from the strictly asymmetric ZeroSumNormal entries.
290
+ # This reduces the chances of unwanted bimodality.
223
291
  dims=("subpeak",),
224
292
  )
225
- return mean, diff, meanmean
293
+ return mean, offset, meanmean
226
294
 
227
295
 
228
- def double_normal_posterior(baseline, time: np.ndarray, mean, std, *, height):
296
+ def double_normal_peak_shape(baseline, time: np.ndarray, mean, std, *, height):
229
297
  """
230
- Define a univariate ordered normal distribution as the posterior.
298
+ Model a peak shaped like a univariate ordered normal distribution.
231
299
 
232
300
  Parameters
233
301
  ----------
@@ -281,7 +349,7 @@ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Mo
281
349
  # add guesses to the pmodel as ConstantData
282
350
  pm.ConstantData("intercept_guess", intercept_guess)
283
351
  pm.ConstantData("slope_guess", slope_guess)
284
- pm.ConstantData("noise_width_guess", noise_width_guess)
352
+ noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
285
353
 
286
354
  # priors
287
355
  baseline_intercept = pm.Normal(
@@ -289,8 +357,9 @@ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Mo
289
357
  )
290
358
  baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
291
359
  baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
292
- noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
293
- std = pm.HalfNormal("std", sigma=[np.ptp(time) / 3, np.ptp(time) / 3], dims=("subpeak",))
360
+ noise = pm.LogNormal("noise", pt.log(noise_guess))
361
+ # NOTE: We expect dobule-peaks to be narrower w.r.t. the time frame, compare to single peaks.
362
+ std = pm.HalfNormal("std", sigma=[np.ptp(time) / 6, np.ptp(time) / 6], dims=("subpeak",))
294
363
  height = pm.HalfNormal(
295
364
  "height", sigma=[0.95 * np.max(intensity), 0.95 * np.max(intensity)], dims=("subpeak",)
296
365
  )
@@ -302,7 +371,7 @@ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Mo
302
371
  mean, diff, meanmean = double_model_mean_prior(time)
303
372
 
304
373
  # posterior
305
- y = double_normal_posterior(baseline, time, mean, std, height=height)
374
+ y = double_normal_peak_shape(baseline, time, mean, std, height=height)
306
375
  y = pm.Deterministic("y", y)
307
376
 
308
377
  # likelihood
@@ -430,9 +499,9 @@ def height_calculation(area, loc, scale, alpha, mode_skew):
430
499
  )
431
500
 
432
501
 
433
- def skew_normal_posterior(baseline, time, mean, std, alpha, *, area):
502
+ def skew_normal_peak_shape(baseline, time, mean, std, alpha, *, area):
434
503
  """
435
- Define a skew normally distributed posterior.
504
+ Model a peak shaped like a skew normal distribution.
436
505
 
437
506
  Parameters
438
507
  ----------
@@ -489,7 +558,7 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
489
558
  # add guesses to the pmodel as ConstantData
490
559
  pm.ConstantData("intercept_guess", intercept_guess)
491
560
  pm.ConstantData("slope_guess", slope_guess)
492
- pm.ConstantData("noise_width_guess", noise_width_guess)
561
+ noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
493
562
 
494
563
  # priors plus error handling in case of mathematically impermissible values
495
564
  baseline_intercept = pm.Normal(
@@ -497,7 +566,7 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
497
566
  )
498
567
  baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
499
568
  baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
500
- noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
569
+ noise = pm.LogNormal("noise", pt.log(noise_guess))
501
570
  mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
502
571
  std = pm.HalfNormal("std", np.ptp(time) / 3)
503
572
  alpha = pm.Normal("alpha", 0, 3.5)
@@ -528,7 +597,7 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
528
597
  height_formula,
529
598
  )
530
599
  pm.Deterministic("sn", height / noise)
531
- y = skew_normal_posterior(baseline, time, mean, std, alpha, area=area)
600
+ y = skew_normal_peak_shape(baseline, time, mean, std, alpha, area=area)
532
601
  y = pm.Deterministic("y", y)
533
602
 
534
603
  # likelihood
@@ -537,9 +606,9 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
537
606
  return pmodel
538
607
 
539
608
 
540
- def double_skew_normal_posterior(baseline, time: np.ndarray, mean, std, alpha, *, area):
609
+ def double_skew_normal_peak_shape(baseline, time: np.ndarray, mean, std, alpha, *, area):
541
610
  """
542
- Define a univariate ordered skew normal distribution as the posterior.
611
+ Model a peak shaped like the a univariate ordered skew normal distribution.
543
612
 
544
613
  Parameters
545
614
  ----------
@@ -605,7 +674,7 @@ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) ->
605
674
  # add guesses to the pmodel as ConstantData
606
675
  pm.ConstantData("intercept_guess", intercept_guess)
607
676
  pm.ConstantData("slope_guess", slope_guess)
608
- pm.ConstantData("noise_width_guess", noise_width_guess)
677
+ noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
609
678
 
610
679
  # priors plus error handling in case of mathematically impermissible values
611
680
  baseline_intercept = pm.Normal(
@@ -613,7 +682,7 @@ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) ->
613
682
  )
614
683
  baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
615
684
  baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
616
- noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
685
+ noise = pm.LogNormal("noise", pt.log(noise_guess))
617
686
  # use univariate ordered normal distribution for the mean values
618
687
  # use a zero sum normal distribution to describe the distance of the mean values
619
688
  # from the mean of the mean values ("meanmean")
@@ -656,7 +725,7 @@ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) ->
656
725
  pm.Deterministic("sn", height / noise, dims=("subpeak",))
657
726
 
658
727
  # posterior
659
- y = double_skew_normal_posterior(baseline, time, mean, std, alpha, area=area)
728
+ y = double_skew_normal_peak_shape(baseline, time, mean, std, alpha, area=area)
660
729
  y = pm.Deterministic("y", y)
661
730
 
662
731
  # likelihood
@@ -489,6 +489,7 @@ def sampling(pmodel, **sample_kwargs):
489
489
  idata
490
490
  Inference data object.
491
491
  """
492
+ sample_kwargs.setdefault("chains", 4)
492
493
  sample_kwargs.setdefault("tune", 2000)
493
494
  sample_kwargs.setdefault("draws", 2000)
494
495
  # check if nutpie is available; if so, use it to enhance performance
@@ -647,7 +648,7 @@ def posterior_predictive_sampling(pmodel, idata):
647
648
  Inference data object updated with the posterior predictive samples.
648
649
  """
649
650
  with pmodel:
650
- idata.extend(pm.sample_posterior_predictive(idata, var_names=["y"]))
651
+ idata.extend(pm.sample_posterior_predictive(idata))
651
652
  return idata
652
653
 
653
654
 
@@ -1409,7 +1410,7 @@ def selected_models_to_template(
1409
1410
 
1410
1411
 
1411
1412
  def model_selection_check(
1412
- result_df: pandas.DataFrame, ic: str, elpd_threshold: Union[str, float] = 25
1413
+ result_df: pandas.DataFrame, ic: str, elpd_threshold: Union[str, float] = 35
1413
1414
  ) -> str:
1414
1415
  """
1415
1416
  During model seleciton, double peak models are sometimes incorrectly preferred due to their increased complexity.
@@ -1435,10 +1436,11 @@ def model_selection_check(
1435
1436
  selected_model = str(result_df.index[0])
1436
1437
  if "double" in selected_model:
1437
1438
  df_single_peak_models = result_df[~result_df.index.str.contains("double")]
1438
- elpd_single = max(list(df_single_peak_models[f"elpd_{ic}"]))
1439
- elpd_double = max(list(result_df[f"elpd_{ic}"]))
1440
- if not elpd_double > elpd_single + elpd_threshold:
1441
- selected_model = str(df_single_peak_models.index[0])
1439
+ if len(df_single_peak_models) > 0:
1440
+ elpd_single = max(list(df_single_peak_models[f"elpd_{ic}"]))
1441
+ elpd_double = max(list(result_df[f"elpd_{ic}"]))
1442
+ if not elpd_double > elpd_single + elpd_threshold:
1443
+ selected_model = str(df_single_peak_models.index[0])
1442
1444
  return selected_model
1443
1445
 
1444
1446
 
peak_performance/plots.py CHANGED
@@ -18,7 +18,7 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
18
18
 
19
19
  import os
20
20
  from pathlib import Path
21
- from typing import Sequence, Union
21
+ from typing import Optional, Sequence, Union
22
22
 
23
23
  import arviz as az
24
24
  import numpy as np
@@ -31,7 +31,7 @@ def plot_raw_data(
31
31
  identifier: str,
32
32
  time: np.ndarray,
33
33
  intensity: np.ndarray,
34
- path: Union[str, os.PathLike],
34
+ path: Optional[Union[str, os.PathLike]],
35
35
  save_formats: Sequence[str] = ("png", "svg"),
36
36
  ):
37
37
  """
@@ -62,9 +62,10 @@ def plot_raw_data(
62
62
  plt.xticks(size=11.5)
63
63
  plt.yticks(size=11.5)
64
64
  fig.tight_layout()
65
- for format in save_formats:
66
- fig.savefig(Path(path) / f"{identifier}_NoPeak.{format}", format=format)
67
- plt.close(fig)
65
+ if path is not None:
66
+ for format in save_formats:
67
+ fig.savefig(Path(path) / f"{identifier}_NoPeak.{format}", format=format)
68
+ plt.close(fig)
68
69
 
69
70
  return
70
71
 
@@ -74,7 +75,6 @@ def plot_density(
74
75
  ):
75
76
  """
76
77
  Method to plot the original data points alongside the posterior predictive plot (percentiles marked with a black, dashed line).
77
- Serves as a more accurate comparison between data and model than comparing data and posterior distribution.
78
78
 
79
79
  Parameters
80
80
  ----------
@@ -135,7 +135,7 @@ def plot_posterior_predictive(
135
135
  identifier: str,
136
136
  time: np.ndarray,
137
137
  intensity: np.ndarray,
138
- path: Union[str, os.PathLike],
138
+ path: Optional[Union[str, os.PathLike]],
139
139
  idata: az.InferenceData,
140
140
  discarded: bool,
141
141
  save_formats: Sequence[str] = ("png", "svg"),
@@ -168,7 +168,7 @@ def plot_posterior_predictive(
168
168
  plot_density(
169
169
  ax=ax,
170
170
  x=time,
171
- samples=idata.posterior_predictive.y.stack(sample=("chain", "draw")).T.values,
171
+ samples=idata.posterior_predictive["L"].stack(sample=("chain", "draw")).T.values,
172
172
  percentiles=(2.5, 97.5),
173
173
  )
174
174
  # plot the raw data points
@@ -179,16 +179,19 @@ def plot_posterior_predictive(
179
179
  plt.yticks(size=11.5)
180
180
  plt.legend()
181
181
  fig.tight_layout()
182
- # if signal was discarded, add a "_NoPeak" to the file name
183
- if discarded:
184
- for format in save_formats:
185
- fig.savefig(
186
- Path(path) / f"{identifier}_predictive_posterior_NoPeak.{format}", format=format
187
- )
188
- else:
189
- for format in save_formats:
190
- fig.savefig(Path(path) / f"{identifier}_predictive_posterior.{format}", format=format)
191
- plt.close(fig)
182
+ if path is not None:
183
+ # if signal was discarded, add a "_NoPeak" to the file name
184
+ if discarded:
185
+ for format in save_formats:
186
+ fig.savefig(
187
+ Path(path) / f"{identifier}_predictive_posterior_NoPeak.{format}", format=format
188
+ )
189
+ else:
190
+ for format in save_formats:
191
+ fig.savefig(
192
+ Path(path) / f"{identifier}_predictive_posterior.{format}", format=format
193
+ )
194
+ plt.close(fig)
192
195
 
193
196
  return
194
197
 
@@ -197,7 +200,7 @@ def plot_posterior(
197
200
  identifier: str,
198
201
  time: np.ndarray,
199
202
  intensity: np.ndarray,
200
- path: Union[str, os.PathLike],
203
+ path: Optional[Union[str, os.PathLike]],
201
204
  idata: az.InferenceData,
202
205
  discarded: bool,
203
206
  save_formats: Sequence[str] = ("png", "svg"),
@@ -246,14 +249,15 @@ def plot_posterior(
246
249
  plt.xticks(size=11.5)
247
250
  plt.yticks(size=11.5)
248
251
  fig.tight_layout()
249
- # if signal was discarded, add a "_NoPeak" to the file name
250
- if discarded:
251
- for format in save_formats:
252
- fig.savefig(Path(path) / f"{identifier}_posterior_NoPeak.{format}", format=format)
253
- else:
254
- for format in save_formats:
255
- fig.savefig(Path(path) / f"{identifier}_posterior.{format}", format=format)
256
- plt.close(fig)
252
+ if path is not None:
253
+ # if signal was discarded, add a "_NoPeak" to the file name
254
+ if discarded:
255
+ for format in save_formats:
256
+ fig.savefig(Path(path) / f"{identifier}_posterior_NoPeak.{format}", format=format)
257
+ else:
258
+ for format in save_formats:
259
+ fig.savefig(Path(path) / f"{identifier}_posterior.{format}", format=format)
260
+ plt.close(fig)
257
261
 
258
262
  return
259
263
 
@@ -261,7 +265,7 @@ def plot_posterior(
261
265
  def plot_model_comparison(
262
266
  df_comp: pandas.DataFrame,
263
267
  identifier: str,
264
- path: Union[str, os.PathLike],
268
+ path: Optional[Union[str, os.PathLike]],
265
269
  save_formats: Sequence[str] = ("png", "svg"),
266
270
  ):
267
271
  """
@@ -282,8 +286,9 @@ def plot_model_comparison(
282
286
  axes = az.plot_compare(df_comp, insample_dev=False)
283
287
  fig = axes.figure
284
288
  plt.tight_layout()
285
- for format in save_formats:
286
- fig.savefig(Path(path) / f"model_comparison_{identifier}.{format}", format=format)
287
- plt.close(fig)
289
+ if path is not None:
290
+ for format in save_formats:
291
+ fig.savefig(Path(path) / f"model_comparison_{identifier}.{format}", format=format)
292
+ plt.close(fig)
288
293
 
289
294
  return
@@ -3,34 +3,110 @@ from pathlib import Path
3
3
  import arviz as az
4
4
  import numpy as np
5
5
  import pymc as pm
6
+ import pytensor.tensor as pt
6
7
  import pytest
7
8
  import scipy.integrate
8
9
  import scipy.stats as st
9
10
 
10
11
  from peak_performance import models
11
12
 
13
+ _DP_ROOT = Path(__file__).absolute().parent.parent
14
+ _REQUIRED_VARIABLES = {
15
+ "baseline_slope",
16
+ "baseline_intercept",
17
+ "baseline",
18
+ "std",
19
+ "height",
20
+ "area",
21
+ "sn",
22
+ "mean",
23
+ "y",
24
+ "noise",
25
+ }
26
+ _REQUIRED_DATA = {
27
+ "slope_guess",
28
+ "intercept_guess",
29
+ "noise_width_guess",
30
+ }
31
+
32
+
33
+ def test_noise_guessing():
34
+ expected = 0.7
35
+ intensities = [
36
+ *np.random.normal(10, expected, size=200),
37
+ *np.random.normal(0, 6, size=600),
38
+ *np.random.normal(40, expected, size=200),
39
+ ]
40
+ actual = models.guess_noise(intensities)
41
+ assert 0.6 < actual < 0.8
42
+ pass
43
+
12
44
 
13
45
  def test_initial_guesses():
14
46
  # define time and intensity for example with known result
15
47
  time = 2 + 0.1 * np.arange(17)
16
48
  intensity = [1, 5, 3] + 11 * [1000] + [7, 9, 11]
17
49
  # define expected results
18
- expected_noise_width = np.ptp([1, 5, 3, 7, 9, 11])
19
50
  expected_baseline_fit = st.linregress([2, 2.1, 2.2, 3.4, 3.5, 3.6], [1, 5, 3, 7, 9, 11])
20
51
  # get the values from the initial guesses function
21
52
  slope, intercept, noise_width = models.initial_guesses(time, intensity)
22
53
  # compare the outcome with the expected values
23
54
  assert expected_baseline_fit.slope == slope
24
55
  assert expected_baseline_fit.intercept == intercept
25
- assert expected_noise_width == noise_width
56
+ # With this example the noise is clipped to at least 10
57
+ assert noise_width == 10
58
+ pass
59
+
60
+
61
+ def test_zsn_sorting():
62
+ """This tests a workaround that we rely on for multi-peak models."""
63
+ coords = {
64
+ "thing": ["left", "center", "right"],
65
+ }
66
+ with pm.Model(coords=coords) as pmodel:
67
+ hyper = pm.Normal("hyper", mu=0, sigma=3)
68
+ offset_unsorted = pm.ZeroSumNormal(
69
+ "offset_unsorted",
70
+ sigma=1,
71
+ shape=3,
72
+ )
73
+ # Create a sorted deterministic without using transforms
74
+ offset = pm.Deterministic("offset", pt.sort(offset_unsorted), dims="thing")
75
+ pos = pm.Deterministic(
76
+ "pos",
77
+ hyper + offset,
78
+ dims="thing",
79
+ )
80
+ # Observe the two things in incorrect order to provoke the model 😈
81
+ dat = pm.Data("dat", [0.2, 0.05, -0.3], dims="thing")
82
+ pm.Normal("L", pos, observed=dat, dims="thing")
83
+
84
+ # Check draws from the prior
85
+ drawn = pm.draw(offset, draws=69)
86
+ np.testing.assert_array_less(drawn[:, 0], drawn[:, 1])
87
+
88
+ # And check MCMC draws too
89
+ with pmodel:
90
+ idata = pm.sample(
91
+ chains=1, tune=10, draws=69, step=pm.Metropolis(), compute_convergence_checks=False
92
+ )
93
+ for vname in ["offset", "pos"]:
94
+ np.testing.assert_array_less(
95
+ idata.posterior[vname].sel(thing="left"),
96
+ idata.posterior[vname].sel(thing="center"),
97
+ )
98
+ np.testing.assert_array_less(
99
+ idata.posterior[vname].sel(thing="center"),
100
+ idata.posterior[vname].sel(thing="right"),
101
+ )
26
102
  pass
27
103
 
28
104
 
29
105
  class TestDistributions:
30
- def test_normal_posterior(self):
106
+ def test_normal_peak_shape(self):
31
107
  x = np.linspace(-5, 10, 10000)
32
108
  expected = st.norm.pdf(x, 3, 2)
33
- actual_pt = models.normal_posterior(0, x, 3, 2, height=np.max(expected))
109
+ actual_pt = models.normal_peak_shape(0, x, 3, 2, height=np.max(expected))
34
110
  # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
35
111
  actual = actual_pt.eval().astype(float)
36
112
  expected = expected.astype(float)
@@ -38,11 +114,11 @@ class TestDistributions:
38
114
  np.testing.assert_allclose(expected, actual, atol=0.0000001)
39
115
  pass
40
116
 
41
- def test_double_normal_posterior(self):
117
+ def test_double_normal_peak_shape(self):
42
118
  x = np.linspace(5, 12, 10000)
43
119
  y1 = st.norm.pdf(x, loc=7.5, scale=0.6)
44
120
  y2 = st.norm.pdf(x, loc=9, scale=0.4) * 2
45
- y_double_pt = models.double_normal_posterior(
121
+ y_double_pt = models.double_normal_peak_shape(
46
122
  0, x, (7.5, 9), (0.6, 0.4), height=(np.max(y1), np.max(y2))
47
123
  )
48
124
  y_double = y_double_pt.eval().astype(float)
@@ -105,11 +181,11 @@ class TestDistributions:
105
181
  np.testing.assert_allclose(expected_mode_skew, actual_mode, atol=5e-3)
106
182
  pass
107
183
 
108
- def test_skew_normal_posterior(self):
184
+ def test_skew_normal_peak_shape(self):
109
185
  x = np.linspace(-1, 5.5, 10000)
110
186
  # test first with positive alpha
111
187
  expected = st.skewnorm.pdf(x, 3, loc=1.2, scale=1.1)
112
- actual_pt = models.skew_normal_posterior(0, x, 1.2, 1.1, 3, area=1)
188
+ actual_pt = models.skew_normal_peak_shape(0, x, 1.2, 1.1, 3, area=1)
113
189
  # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
114
190
  actual = actual_pt.eval().astype(float)
115
191
  expected = expected.astype(float)
@@ -118,7 +194,7 @@ class TestDistributions:
118
194
 
119
195
  # test again with negative alpha
120
196
  expected = st.skewnorm.pdf(x, -3, loc=1.2, scale=1.1)
121
- actual_pt = models.skew_normal_posterior(0, x, 1.2, 1.1, -3, area=1)
197
+ actual_pt = models.skew_normal_peak_shape(0, x, 1.2, 1.1, -3, area=1)
122
198
  # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
123
199
  actual = actual_pt.eval().astype(float)
124
200
  expected = expected.astype(float)
@@ -133,8 +209,8 @@ class TestDistributions:
133
209
  height = np.max(y)
134
210
  area = scipy.integrate.quad(lambda x: st.norm.pdf(x, loc=1, scale=1), -10, 10)[0]
135
211
  x = np.linspace(-10, 10, 10000)
136
- y_actual_pt = models.normal_posterior(0, x, 1, 1, height=height)
137
- y_skew_actual_pt = models.skew_normal_posterior(0, x, 1, 1, 0, area=area)
212
+ y_actual_pt = models.normal_peak_shape(0, x, 1, 1, height=height)
213
+ y_skew_actual_pt = models.skew_normal_peak_shape(0, x, 1, 1, 0, area=area)
138
214
  y_actual = y_actual_pt.eval().astype(float)
139
215
  y_skew_actual = y_skew_actual_pt.eval().astype(float)
140
216
  # many values are extremely close to zero so rtol was increased.
@@ -142,7 +218,7 @@ class TestDistributions:
142
218
  np.testing.assert_allclose(y_skew_actual, y_actual, atol=1e-20, rtol=0.9)
143
219
  pass
144
220
 
145
- def test_double_skew_normal_posterior(self):
221
+ def test_double_skew_normal_peak_shape(self):
146
222
  x1 = np.arange(4, 6, 0.1)
147
223
  x2 = np.arange(6, 8, 0.1)
148
224
  alpha = 5
@@ -150,7 +226,7 @@ class TestDistributions:
150
226
  y2 = st.skewnorm.pdf(x2, alpha, loc=6.3, scale=0.2)
151
227
  time = np.array(list(x1) + list(x2))
152
228
  intensity = np.array(list(y1) + list(y2))
153
- y_double_pt = models.double_skew_normal_posterior(
229
+ y_double_pt = models.double_skew_normal_peak_shape(
154
230
  0, time, (5, 6.3), (0.2, 0.2), (5, 5), area=(1, 1)
155
231
  )
156
232
  y_double = y_double_pt.eval().astype(float)
@@ -158,33 +234,52 @@ class TestDistributions:
158
234
 
159
235
 
160
236
  @pytest.mark.parametrize(
161
- "model_type", ["normal", "skew_normal", "double_normal", "double_skew_normal"]
237
+ "define_func",
238
+ [
239
+ models.define_model_normal,
240
+ models.define_model_skew,
241
+ ],
162
242
  )
163
- def test_pymc_sampling(model_type):
164
- timeseries = np.load(
165
- Path(__file__).absolute().parent.parent / "example" / "A2t2R1Part1_132_85.9_86.1.npy"
166
- )
243
+ def test_singlepeak_sampling(define_func):
244
+ timeseries = np.load(_DP_ROOT / "example" / "A2t2R1Part1_132_85.9_86.1.npy")
167
245
 
168
- if model_type == models.ModelType.Normal:
169
- pmodel = models.define_model_normal(timeseries[0], timeseries[1])
170
- elif model_type == models.ModelType.SkewNormal:
171
- pmodel = models.define_model_skew(timeseries[0], timeseries[1])
172
- elif model_type == models.ModelType.DoubleNormal:
173
- pmodel = models.define_model_double_normal(timeseries[0], timeseries[1])
174
- elif model_type == models.ModelType.DoubleSkewNormal:
175
- pmodel = models.define_model_double_skew_normal(timeseries[0], timeseries[1])
246
+ pmodel = define_func(timeseries[0], timeseries[1])
176
247
  with pmodel:
177
248
  idata = pm.sample(cores=2, chains=2, tune=3, draws=5)
178
- if model_type in [models.ModelType.DoubleNormal, models.ModelType.DoubleSkewNormal]:
179
- summary = az.summary(idata)
180
- # test whether the ordered transformation and the subpeak dimension work as intended
181
- assert summary.loc["mean[0]", "mean"] < summary.loc["mean[1]", "mean"]
182
- # assert summary.loc["area[0]", "mean"] < summary.loc["area[1]", "mean"]
249
+ assert set(idata.posterior.keys()) >= _REQUIRED_VARIABLES
250
+ assert set(idata.constant_data.keys()) >= _REQUIRED_DATA
251
+ pass
252
+
253
+
254
+ @pytest.mark.parametrize(
255
+ "define_func",
256
+ [
257
+ models.define_model_double_normal,
258
+ models.define_model_double_skew_normal,
259
+ ],
260
+ )
261
+ def test_doublepeak_sampling(define_func):
262
+ timeseries = np.load(_DP_ROOT / "example" / "A2t2R1Part1_132_85.9_86.1.npy")
263
+
264
+ pmodel = define_func(timeseries[0], timeseries[1])
265
+ with pmodel:
266
+ idata = pm.sample(cores=2, chains=2, tune=3, draws=5)
267
+ assert set(idata.posterior.keys()) >= _REQUIRED_VARIABLES
268
+ assert set(idata.constant_data.keys()) >= _REQUIRED_DATA
269
+ # Confirm the order of peaks is as intended
270
+ np.testing.assert_array_less(
271
+ idata.posterior["offset"].sel(subpeak=0),
272
+ idata.posterior["offset"].sel(subpeak=1),
273
+ )
274
+ np.testing.assert_array_less(
275
+ idata.posterior["mean"].sel(subpeak=0),
276
+ idata.posterior["mean"].sel(subpeak=1),
277
+ )
183
278
  pass
184
279
 
185
280
 
186
281
  def test_model_comparison():
187
- path = Path(__file__).absolute().parent.parent / "test_data/test_model_comparison"
282
+ path = _DP_ROOT / "test_data/test_model_comparison"
188
283
  idata_normal = az.from_netcdf(path / "idata_normal.nc")
189
284
  idata_skew = az.from_netcdf(path / "idata_skew.nc")
190
285
  compare_dict = {
@@ -636,11 +636,18 @@ def test_model_selection_check():
636
636
  assert selected_model == "normal"
637
637
  # case 2: double peak exceeds elpd score difference threshold and is thusly accepted
638
638
  result_df = pandas.DataFrame(
639
- {"elpd_loo": [50, 30, 10, -5], "ic": ["loo", "loo", "loo", "loo"]},
639
+ {"elpd_loo": [50, 30, 20, -5], "ic": ["loo", "loo", "loo", "loo"]},
640
640
  index=["double_normal", "double_skew_normal", "normal", "skew_normal"],
641
641
  )
642
642
  selected_model = pl.model_selection_check(result_df, "loo", 25)
643
643
  assert selected_model == "double_normal"
644
+ # case 3: single peak models were excluded
645
+ result_df = pandas.DataFrame(
646
+ {"elpd_loo": [50, 30], "ic": ["loo", "loo"]},
647
+ index=["double_normal", "double_skew_normal"],
648
+ )
649
+ selected_model = pl.model_selection_check(result_df, "loo", 25)
650
+ assert selected_model == "double_normal"
644
651
  pass
645
652
 
646
653
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: peak-performance
3
- Version: 0.6.4
3
+ Version: 0.7.0
4
4
  Summary: A Python toolbox to fit chromatography peaks with uncertainty.
5
5
  Author-email: Jochen Nießer <j.niesser@fz-juelich.de>, Michael Osthege <m.osthege@fz-juelich.de>
6
6
  License: AGPLv3
@@ -24,14 +24,12 @@ Requires-Dist: pytensor
24
24
  Requires-Dist: scipy
25
25
  Requires-Dist: openpyxl
26
26
  Requires-Dist: numpy <1.26.0
27
- Provides-Extra: test
28
- Requires-Dist: pytest ; extra == 'test'
29
- Requires-Dist: pytest-cov ; extra == 'test'
30
- Requires-Dist: twine ; extra == 'test'
31
27
 
32
- [![PyPI version](https://img.shields.io/pypi/v/bletl)](https://pypi.org/project/peak-performance/)
33
- [![pipeline](https://github.com/jubiotech/bletl/workflows/pipeline/badge.svg)](https://github.com/JuBiotech/peak-performance/actions)
34
- [![coverage](https://codecov.io/gh/jubiotech/bletl/branch/main/graph/badge.svg)](https://app.codecov.io/gh/JuBiotech/peak-performance)
28
+ [![PyPI version](https://img.shields.io/pypi/v/peak-performance)](https://pypi.org/project/peak-performance/)
29
+ [![pipeline](https://github.com/jubiotech/peak-performance/workflows/pipeline/badge.svg)](https://github.com/JuBiotech/peak-performance/actions)
30
+ [![coverage](https://codecov.io/gh/jubiotech/peak-performance/branch/main/graph/badge.svg)](https://app.codecov.io/gh/JuBiotech/peak-performance)
31
+ [![documentation](https://readthedocs.org/projects/peak-performance/badge/?version=latest)](https://peak-performance.readthedocs.io/en/latest)
32
+ [![DOI](https://zenodo.org/badge/713469041.svg)](https://zenodo.org/doi/10.5281/zenodo.10255543)
35
33
 
36
34
  # How to use PeakPerformance
37
35
  For installation instructions, see `Installation.md`.
@@ -39,13 +37,15 @@ For instructions regarding the use of PeakPerformance, check out the example not
39
37
 
40
38
  ## Preparing raw data
41
39
  This step is crucial when using PeakPerformance. Raw data has to be supplied as time series meaning for each signal you want to analyze, save a NumPy array consisting of time in the first dimension and intensity in the second dimension (compare example data). Both time and intensity should also be NumPy arrays. If you e.g. have time and intensity of a singal as lists, you can use the following code to convert, format, and save them in the correct manner:
42
- ```
40
+
41
+ ```python
43
42
  import numpy as np
44
43
  from pathlib import Path
45
44
 
46
45
  time_series = np.array([np.array(time), np.array(intensity)])
47
46
  np.save(Path(r"example_path/time_series.npy"), time_series)
48
47
  ```
48
+
49
49
  The naming convention of raw data files is `<acquisition name>_<precursor ion m/z or experiment number>_<product ion m/z start>_<product ion m/z end>.npy`. There should be no underscores within the named sections such as `acquisition name`. Essentially, the raw data names include the acquisition and mass trace, thus yielding a recognizable and unique name for each isotopomer/fragment/metabolite/sample.
50
50
 
51
51
  ## Model selection
@@ -57,11 +57,12 @@ Since model selection is a computationally demanding and time consuming process,
57
57
  If an error occured in the middle of a batch run, then you can use the `pipeline_restart` function in the `pipeline` module to create a new batch which will analyze only those samples, which have not been analyzed previously.
58
58
 
59
59
  ### The model parameters don't converge and/or the fit does not describe the raw data well.
60
- Check the separate file `How to adapt PeakPerformance to you data`.
60
+ Check the separate file `How to adapt PeakPerformance to your data`.
61
61
 
62
62
  # How to contribute
63
63
  If you encounter bugs while using PeakPerformance, please bring them to our attention by opening an issue. When doing so, describe the problem in detail and add screenshots/code snippets and whatever other helpful material you can provide.
64
64
  When contributing code, create a local clone of PeakPerformance, create a new branch, and open a pull request (PR).
65
65
 
66
66
  # How to cite
67
- Will be updated once the paper has been released and a zenodo DOI has been created.
67
+ Head over to Zenodo to [generate a BibTeX citation](https://doi.org/10.5281/zenodo.10255543) for the latest release.
68
+ A publication has just been submitted to a scientific journal. Once published, this section will be updated.
@@ -0,0 +1,13 @@
1
+ peak_performance/__init__.py,sha256=yTq4THYewbWRnrs2Qkv4nCd-7MyvDlu_t0fPeWeKxQc,261
2
+ peak_performance/models.py,sha256=HpJzjf9Eq1ZXUxKlX6GZDj21icL4s_G0naJrCgWntcM,27457
3
+ peak_performance/pipeline.py,sha256=8yy2-hTNozBJeLE_dulQJCzCBWv2CoRRdXSHDN2UwD8,64395
4
+ peak_performance/plots.py,sha256=5F-s7ZcFgZuN5xGIWRSJ5-_Pl99-vqbcr3F8dYnBFQc,9455
5
+ peak_performance/test_main.py,sha256=xQiLDjhldxZzY5sp3RyIJUTtXxX46auWY9Qy7nuifxw,97
6
+ peak_performance/test_models.py,sha256=r6kqAVBtAbycf4IoRaXcSCZp6Lras3afK6o9qcLZbH8,11592
7
+ peak_performance/test_pipeline.py,sha256=gTZAxcJEVwJ0XW4IewmIWGLmx1n7KaK8egrovKHsCFI,22961
8
+ peak_performance/test_plots.py,sha256=lGwPWzezAhzEnyu_NMx2lFtyzzb1wxy-jnRMtOaaniY,4100
9
+ peak_performance-0.7.0.dist-info/LICENSE.md,sha256=zj-4LZ7oChyw5Uj5sFYOrVI3juK06Cb9lFm0rPcHXYk,32387
10
+ peak_performance-0.7.0.dist-info/METADATA,sha256=uAz1t9qggYqguLgLPJ611PjUOYKM8CWJQLXbH3u8RsY,5076
11
+ peak_performance-0.7.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
12
+ peak_performance-0.7.0.dist-info/top_level.txt,sha256=-lZSmgn2fZA-xPVmddLwaRt2hQeeWj7TYVefOk7_T58,17
13
+ peak_performance-0.7.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,13 +0,0 @@
1
- peak_performance/__init__.py,sha256=yTq4THYewbWRnrs2Qkv4nCd-7MyvDlu_t0fPeWeKxQc,261
2
- peak_performance/models.py,sha256=L47mNU1HItYv5cB-cs2H0ooswhdcLfBdg8X1MHeiTUY,25130
3
- peak_performance/pipeline.py,sha256=A-eIwhbn9hCIvWgrG5ksfQLn--ISBKVBjq09nVDwFO8,64311
4
- peak_performance/plots.py,sha256=OO5rSC-kTCzH8-Fh0diz0Cq86fyrZ_FSOiDjcboZRAU,9280
5
- peak_performance/test_main.py,sha256=xQiLDjhldxZzY5sp3RyIJUTtXxX46auWY9Qy7nuifxw,97
6
- peak_performance/test_models.py,sha256=X3fy-kNih7TNrr4jKzgcx8qRnmh6cA27hSr2b6Tmf18,9334
7
- peak_performance/test_pipeline.py,sha256=wyzVgVYT0pK_Lnh5VZEgL8Rxn8sjiCa1dRp1tF79foM,22652
8
- peak_performance/test_plots.py,sha256=lGwPWzezAhzEnyu_NMx2lFtyzzb1wxy-jnRMtOaaniY,4100
9
- peak_performance-0.6.4.dist-info/LICENSE.md,sha256=zj-4LZ7oChyw5Uj5sFYOrVI3juK06Cb9lFm0rPcHXYk,32387
10
- peak_performance-0.6.4.dist-info/METADATA,sha256=X7qWgjCWDwi9KseQnDasaijG0k9u-L6CbGTH0qj8Zd4,4796
11
- peak_performance-0.6.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
12
- peak_performance-0.6.4.dist-info/top_level.txt,sha256=-lZSmgn2fZA-xPVmddLwaRt2hQeeWj7TYVefOk7_T58,17
13
- peak_performance-0.6.4.dist-info/RECORD,,