peak-performance 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,20 @@
1
- """
2
- PeakPerformance
3
- Copyright (C) 2023 Forschungszentrum Jülich GmbH
1
+ # PeakPerformance
2
+ # Copyright (C) 2023 Forschungszentrum Jülich GmbH
4
3
 
5
- This program is free software: you can redistribute it and/or modify
6
- it under the terms of the GNU Affero General Public License as published
7
- by the Free Software Foundation, either version 3 of the License, or
8
- (at your option) any later version.
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published
6
+ # by the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
9
8
 
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU Affero General Public License for more details.
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU Affero General Public License for more details.
14
13
 
15
- You should have received a copy of the GNU Affero General Public License
16
- along with this program. If not, see <https://www.gnu.org/licenses/>.
14
+ # You should have received a copy of the GNU Affero General Public License
15
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
16
+ """
17
+ This module contains functions for creating various kinds of peak models and to make initial guesses for their parameters.
17
18
  """
18
19
 
19
20
  from enum import Enum
@@ -28,12 +29,39 @@ import scipy.stats as st
28
29
 
29
30
 
30
31
  class ModelType(str, Enum):
31
- """Class containing all implemented model types."""
32
+ """Enum of default model types."""
32
33
 
33
34
  Normal = "normal"
35
+ """Shape of a Gaussian Normal PDF."""
36
+
34
37
  SkewNormal = "skew_normal"
38
+ """Shape of a skewed Normal PDF."""
39
+
35
40
  DoubleNormal = "double_normal"
41
+ """Superposition of two ``Normal`` peaks."""
42
+
36
43
  DoubleSkewNormal = "double_skew_normal"
44
+ """Superposition of two ``SkewedNormal`` peaks."""
45
+
46
+
47
+ def guess_noise(intensity):
48
+ """
49
+ Function for providing a guess for the noise width of a given signal
50
+ based on the first and last 15 % of data points in a time series.
51
+
52
+ Parameters
53
+ ----------
54
+ time
55
+ NumPy array with the time values of the relevant timeframe.
56
+ intensity
57
+ NumPy array with the intensity values of the relevant timeframe.
58
+ """
59
+ n = len(intensity)
60
+ ifrom = int(np.ceil(0.15 * n))
61
+ ito = int(np.floor(0.85 * n))
62
+ start_ints = intensity[:ifrom]
63
+ end_ints = intensity[ito:]
64
+ return np.std([*(start_ints - np.mean(start_ints)), *(end_ints - np.mean(end_ints))])
37
65
 
38
66
 
39
67
  def initial_guesses(time: np.ndarray, intensity: np.ndarray):
@@ -79,12 +107,16 @@ def initial_guesses(time: np.ndarray, intensity: np.ndarray):
79
107
  # use the indeces in noise_index to get the time and intensity of all noise data points
80
108
  noise_time = [time[n] for n in noise_index]
81
109
  noise_intensity = [intensity[n] for n in noise_index]
82
- # calculate the width of the noise
83
- noise_width_guess = max(noise_intensity) - min(noise_intensity)
84
110
 
85
111
  # use scipy to fit a linear regression through the noise as a prior for the eventual baseline
86
112
  baseline_fit = st.linregress(noise_time, noise_intensity)
87
113
 
114
+ # calculate the width of the noise
115
+ noise_width_guess = guess_noise(intensity)
116
+
117
+ # clip the noise to at least 10
118
+ noise_width_guess = np.clip(noise_width_guess, 10, np.inf)
119
+
88
120
  return baseline_fit.slope, baseline_fit.intercept, noise_width_guess
89
121
 
90
122
 
@@ -118,9 +150,9 @@ def baseline_slope_prior_params(slope_guess: Union[float, int]) -> Mapping[str,
118
150
  }
119
151
 
120
152
 
121
- def normal_posterior(baseline, time: np.ndarray, mean, std, *, height):
153
+ def normal_peak_shape(baseline, time: np.ndarray, mean, std, *, height):
122
154
  """
123
- Model a peak shaped like the PDF of a normal distribution.
155
+ Model a peak shaped like a normal distribution.
124
156
 
125
157
  Parameters
126
158
  ----------
@@ -166,7 +198,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
166
198
  # add guesses to the pmodel as ConstantData
167
199
  pm.ConstantData("intercept_guess", intercept_guess)
168
200
  pm.ConstantData("slope_guess", slope_guess)
169
- pm.ConstantData("noise_width_guess", noise_width_guess)
201
+ noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
170
202
 
171
203
  # priors plus error handling in case of mathematically impermissible values
172
204
  baseline_intercept = pm.Normal(
@@ -174,7 +206,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
174
206
  )
175
207
  baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
176
208
  baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
177
- noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
209
+ noise = pm.LogNormal("noise", pt.log(noise_guess))
178
210
  # define priors for parameters of a normally distributed posterior
179
211
  mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
180
212
  std = pm.HalfNormal("std", np.ptp(time) / 3)
@@ -182,7 +214,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
182
214
  pm.Deterministic("area", height / (1 / (std * np.sqrt(2 * np.pi))))
183
215
  pm.Deterministic("sn", height / noise)
184
216
  # posterior
185
- y = normal_posterior(baseline, time, mean, std, height=height)
217
+ y = normal_peak_shape(baseline, time, mean, std, height=height)
186
218
  y = pm.Deterministic("y", y)
187
219
 
188
220
  # likelihood
@@ -193,7 +225,7 @@ def define_model_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
193
225
 
194
226
  def double_model_mean_prior(time):
195
227
  """
196
- Function creating prior probability distributions for double peaks using a ZeroSumNormal distribution.
228
+ Function creating prior probability distributions for the mean retention times of a dual-peak.
197
229
 
198
230
  Parameters
199
231
  ----------
@@ -203,31 +235,75 @@ def double_model_mean_prior(time):
203
235
  Returns
204
236
  -------
205
237
  mean
206
- Normally distributed prior for the ordered means of the double peak model.
238
+ Normally distributed prior for the ordered means of the multi-peak model.
207
239
  diff
208
- Difference between meanmean and mean.
240
+ Difference between the group mean and peak-wise mean.
209
241
  meanmean
210
- Normally distributed prior for the mean of the double peak means.
242
+ Normally distributed prior for the group mean of the peak means.
243
+ """
244
+ tmin = np.min(time)
245
+ tdelta = np.ptp(time)
246
+ meanmean = pm.Normal("meanmean", mu=tmin + tdelta / 2, sigma=tdelta / 6)
247
+ separation = pm.Gamma(
248
+ "separation",
249
+ mu=tdelta / 6,
250
+ sigma=tdelta / 12,
251
+ )
252
+ offset = pm.Deterministic("offset", pt.stack([-separation / 2, separation / 2]), dims="subpeak")
253
+ mean = pm.Deterministic(
254
+ "mean",
255
+ meanmean + offset,
256
+ dims=("subpeak",),
257
+ )
258
+ return mean, offset, meanmean
259
+
260
+
261
+ def multi_peak_means_prior(time):
262
+ """
263
+ Function creating prior probability distributions for multi-peaks using a ZeroSumNormal distribution.
264
+
265
+ The number of peaks is determined from the `"subpeak"` model coordinates.
266
+
267
+ Parameters
268
+ ----------
269
+ time
270
+ NumPy array with the time values of the relevant timeframe.
271
+
272
+ Returns
273
+ -------
274
+ mean
275
+ Normally distributed prior for the ordered means of the multi-peak model.
276
+ offset
277
+ Time offset between the group mean and peak-wise mean.
278
+ meanmean
279
+ Normally distributed prior for the group mean of the peak means.
211
280
  """
281
+ pmodel = pm.modelcontext(None)
212
282
  meanmean = pm.Normal("meanmean", mu=np.min(time) + np.ptp(time) / 2, sigma=np.ptp(time) / 6)
213
- diff = pm.ZeroSumNormal(
214
- "diff",
215
- sigma=1,
216
- shape=(2,), # currently no dims due to bug with ordered transformation
283
+ offset_unsorted = pm.ZeroSumNormal(
284
+ "offset_unsorted",
285
+ sigma=2,
286
+ # Support arbitrary number of subpeaks
287
+ shape=len(pmodel.coords["subpeak"]),
288
+ # NOTE: As of PyMC v5.14, the OrderedTransform and ZeroSumTransform are incompatible.
289
+ # See https://github.com/pymc-devs/pymc/issues/6975.
290
+ # As a workaround we'll call pt.sort a few lines below.
217
291
  )
218
- mean = pm.Normal(
292
+ offset = pm.Deterministic("offset", pt.sort(offset_unsorted), dims="subpeak")
293
+ mean = pm.Deterministic(
219
294
  "mean",
220
- mu=meanmean + diff,
221
- sigma=1,
222
- transform=pm.distributions.transforms.ordered,
295
+ meanmean + offset,
296
+ # Introduce a small jitter to the subpeak means to decouple them
297
+ # from the strictly asymmetric ZeroSumNormal entries.
298
+ # This reduces the chances of unwanted bimodality.
223
299
  dims=("subpeak",),
224
300
  )
225
- return mean, diff, meanmean
301
+ return mean, offset, meanmean
226
302
 
227
303
 
228
- def double_normal_posterior(baseline, time: np.ndarray, mean, std, *, height):
304
+ def double_normal_peak_shape(baseline, time: np.ndarray, mean, std, *, height):
229
305
  """
230
- Define a univariate ordered normal distribution as the posterior.
306
+ Model a peak shaped like a univariate ordered normal distribution.
231
307
 
232
308
  Parameters
233
309
  ----------
@@ -281,7 +357,7 @@ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Mo
281
357
  # add guesses to the pmodel as ConstantData
282
358
  pm.ConstantData("intercept_guess", intercept_guess)
283
359
  pm.ConstantData("slope_guess", slope_guess)
284
- pm.ConstantData("noise_width_guess", noise_width_guess)
360
+ noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
285
361
 
286
362
  # priors
287
363
  baseline_intercept = pm.Normal(
@@ -289,8 +365,9 @@ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Mo
289
365
  )
290
366
  baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
291
367
  baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
292
- noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
293
- std = pm.HalfNormal("std", sigma=[np.ptp(time) / 3, np.ptp(time) / 3], dims=("subpeak",))
368
+ noise = pm.LogNormal("noise", pt.log(noise_guess))
369
+ # NOTE: We expect dobule-peaks to be narrower w.r.t. the time frame, compare to single peaks.
370
+ std = pm.HalfNormal("std", sigma=[np.ptp(time) / 6, np.ptp(time) / 6], dims=("subpeak",))
294
371
  height = pm.HalfNormal(
295
372
  "height", sigma=[0.95 * np.max(intensity), 0.95 * np.max(intensity)], dims=("subpeak",)
296
373
  )
@@ -302,7 +379,7 @@ def define_model_double_normal(time: np.ndarray, intensity: np.ndarray) -> pm.Mo
302
379
  mean, diff, meanmean = double_model_mean_prior(time)
303
380
 
304
381
  # posterior
305
- y = double_normal_posterior(baseline, time, mean, std, height=height)
382
+ y = double_normal_peak_shape(baseline, time, mean, std, height=height)
306
383
  y = pm.Deterministic("y", y)
307
384
 
308
385
  # likelihood
@@ -323,10 +400,9 @@ def std_skew_calculation(scale, alpha):
323
400
  Skewness parameter of the skew normal distribution.
324
401
 
325
402
  Returns
326
- ----------
403
+ -------
327
404
  std
328
405
  Standard deviation of a skew normal distribution.
329
- -------
330
406
  """
331
407
  return np.sqrt(scale**2 * (1 - (2 * alpha**2) / ((alpha**2 + 1) * np.pi)))
332
408
 
@@ -345,7 +421,7 @@ def mean_skew_calculation(loc, scale, alpha):
345
421
  Skewness parameter of the skew normal distribution.
346
422
 
347
423
  Returns
348
- ----------
424
+ -------
349
425
  mean
350
426
  Arithmetic mean of a skew normal distribution.
351
427
  """
@@ -419,7 +495,7 @@ def height_calculation(area, loc, scale, alpha, mode_skew):
419
495
  Mode of the skew normal distribution.
420
496
 
421
497
  Returns
422
- ----------
498
+ -------
423
499
  mean
424
500
  Arithmetic mean of a skew normal distribution.
425
501
  """
@@ -430,9 +506,9 @@ def height_calculation(area, loc, scale, alpha, mode_skew):
430
506
  )
431
507
 
432
508
 
433
- def skew_normal_posterior(baseline, time, mean, std, alpha, *, area):
509
+ def skew_normal_peak_shape(baseline, time, mean, std, alpha, *, area):
434
510
  """
435
- Define a skew normally distributed posterior.
511
+ Model a peak shaped like a skew normal distribution.
436
512
 
437
513
  Parameters
438
514
  ----------
@@ -489,7 +565,7 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
489
565
  # add guesses to the pmodel as ConstantData
490
566
  pm.ConstantData("intercept_guess", intercept_guess)
491
567
  pm.ConstantData("slope_guess", slope_guess)
492
- pm.ConstantData("noise_width_guess", noise_width_guess)
568
+ noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
493
569
 
494
570
  # priors plus error handling in case of mathematically impermissible values
495
571
  baseline_intercept = pm.Normal(
@@ -497,7 +573,7 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
497
573
  )
498
574
  baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
499
575
  baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
500
- noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
576
+ noise = pm.LogNormal("noise", pt.log(noise_guess))
501
577
  mean = pm.Normal("mean", np.mean(time[[0, -1]]), np.ptp(time) / 2)
502
578
  std = pm.HalfNormal("std", np.ptp(time) / 3)
503
579
  alpha = pm.Normal("alpha", 0, 3.5)
@@ -528,7 +604,7 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
528
604
  height_formula,
529
605
  )
530
606
  pm.Deterministic("sn", height / noise)
531
- y = skew_normal_posterior(baseline, time, mean, std, alpha, area=area)
607
+ y = skew_normal_peak_shape(baseline, time, mean, std, alpha, area=area)
532
608
  y = pm.Deterministic("y", y)
533
609
 
534
610
  # likelihood
@@ -537,9 +613,9 @@ def define_model_skew(time: np.ndarray, intensity: np.ndarray) -> pm.Model:
537
613
  return pmodel
538
614
 
539
615
 
540
- def double_skew_normal_posterior(baseline, time: np.ndarray, mean, std, alpha, *, area):
616
+ def double_skew_normal_peak_shape(baseline, time: np.ndarray, mean, std, alpha, *, area):
541
617
  """
542
- Define a univariate ordered skew normal distribution as the posterior.
618
+ Model a peak shaped like the a univariate ordered skew normal distribution.
543
619
 
544
620
  Parameters
545
621
  ----------
@@ -605,7 +681,7 @@ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) ->
605
681
  # add guesses to the pmodel as ConstantData
606
682
  pm.ConstantData("intercept_guess", intercept_guess)
607
683
  pm.ConstantData("slope_guess", slope_guess)
608
- pm.ConstantData("noise_width_guess", noise_width_guess)
684
+ noise_guess = pm.ConstantData("noise_width_guess", noise_width_guess)
609
685
 
610
686
  # priors plus error handling in case of mathematically impermissible values
611
687
  baseline_intercept = pm.Normal(
@@ -613,7 +689,7 @@ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) ->
613
689
  )
614
690
  baseline_slope = pm.Normal("baseline_slope", **baseline_slope_prior_params(slope_guess))
615
691
  baseline = pm.Deterministic("baseline", baseline_intercept + baseline_slope * time)
616
- noise = pm.LogNormal("noise", np.clip(np.log(noise_width_guess), np.log(10), np.inf), 1)
692
+ noise = pm.LogNormal("noise", pt.log(noise_guess))
617
693
  # use univariate ordered normal distribution for the mean values
618
694
  # use a zero sum normal distribution to describe the distance of the mean values
619
695
  # from the mean of the mean values ("meanmean")
@@ -656,7 +732,7 @@ def define_model_double_skew_normal(time: np.ndarray, intensity: np.ndarray) ->
656
732
  pm.Deterministic("sn", height / noise, dims=("subpeak",))
657
733
 
658
734
  # posterior
659
- y = double_skew_normal_posterior(baseline, time, mean, std, alpha, area=area)
735
+ y = double_skew_normal_peak_shape(baseline, time, mean, std, alpha, area=area)
660
736
  y = pm.Deterministic("y", y)
661
737
 
662
738
  # likelihood
@@ -1,19 +1,20 @@
1
- """
2
- PeakPerformance
3
- Copyright (C) 2023 Forschungszentrum Jülich GmbH
1
+ # PeakPerformance
2
+ # Copyright (C) 2023 Forschungszentrum Jülich GmbH
4
3
 
5
- This program is free software: you can redistribute it and/or modify
6
- it under the terms of the GNU Affero General Public License as published
7
- by the Free Software Foundation, either version 3 of the License, or
8
- (at your option) any later version.
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published
6
+ # by the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
9
8
 
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU Affero General Public License for more details.
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU Affero General Public License for more details.
14
13
 
15
- You should have received a copy of the GNU Affero General Public License
16
- along with this program. If not, see <https://www.gnu.org/licenses/>.
14
+ # You should have received a copy of the GNU Affero General Public License
15
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
16
+ """
17
+ Defines steps for a pipeline to process LC-MS-MS data.
17
18
  """
18
19
 
19
20
  import importlib
@@ -489,6 +490,7 @@ def sampling(pmodel, **sample_kwargs):
489
490
  idata
490
491
  Inference data object.
491
492
  """
493
+ sample_kwargs.setdefault("chains", 4)
492
494
  sample_kwargs.setdefault("tune", 2000)
493
495
  sample_kwargs.setdefault("draws", 2000)
494
496
  # check if nutpie is available; if so, use it to enhance performance
@@ -647,7 +649,7 @@ def posterior_predictive_sampling(pmodel, idata):
647
649
  Inference data object updated with the posterior predictive samples.
648
650
  """
649
651
  with pmodel:
650
- idata.extend(pm.sample_posterior_predictive(idata, var_names=["y"]))
652
+ idata.extend(pm.sample_posterior_predictive(idata))
651
653
  return idata
652
654
 
653
655
 
@@ -1185,7 +1187,7 @@ def pipeline(
1185
1187
  Data format (suffix) of the raw data, default is '.npy'.
1186
1188
 
1187
1189
  Returns
1188
- ----------
1190
+ -------
1189
1191
  path_results
1190
1192
  Path variable pointing to the newly created folder for this batch.
1191
1193
  """
@@ -1222,7 +1224,7 @@ def pipeline_restart(
1222
1224
  Path variable pointing to the directory of the broken PeakPerformance batch
1223
1225
 
1224
1226
  Returns
1225
- ----------
1227
+ -------
1226
1228
  path_results_new
1227
1229
  Path variable pointing to the newly created folder for the restarted batch.
1228
1230
  """
@@ -1321,7 +1323,7 @@ def parse_files_for_model_selection(signals: pandas.DataFrame) -> Dict[str, str]
1321
1323
  DataFrame containing the signals tab of Template.xlsx.
1322
1324
 
1323
1325
  Returns
1324
- ----------
1326
+ -------
1325
1327
  files_for_selection
1326
1328
  Dict with file names as keys and unique identifiers as values.
1327
1329
  """
@@ -1409,7 +1411,7 @@ def selected_models_to_template(
1409
1411
 
1410
1412
 
1411
1413
  def model_selection_check(
1412
- result_df: pandas.DataFrame, ic: str, elpd_threshold: Union[str, float] = 25
1414
+ result_df: pandas.DataFrame, ic: str, elpd_threshold: Union[str, float] = 35
1413
1415
  ) -> str:
1414
1416
  """
1415
1417
  During model seleciton, double peak models are sometimes incorrectly preferred due to their increased complexity.
@@ -1428,17 +1430,18 @@ def model_selection_check(
1428
1430
  to be accepted.
1429
1431
 
1430
1432
  Returns
1431
- ----------
1433
+ -------
1432
1434
  selected_model
1433
1435
  Name of the selected model type.
1434
1436
  """
1435
1437
  selected_model = str(result_df.index[0])
1436
1438
  if "double" in selected_model:
1437
1439
  df_single_peak_models = result_df[~result_df.index.str.contains("double")]
1438
- elpd_single = max(list(df_single_peak_models[f"elpd_{ic}"]))
1439
- elpd_double = max(list(result_df[f"elpd_{ic}"]))
1440
- if not elpd_double > elpd_single + elpd_threshold:
1441
- selected_model = str(df_single_peak_models.index[0])
1440
+ if len(df_single_peak_models) > 0:
1441
+ elpd_single = max(list(df_single_peak_models[f"elpd_{ic}"]))
1442
+ elpd_double = max(list(result_df[f"elpd_{ic}"]))
1443
+ if not elpd_double > elpd_single + elpd_threshold:
1444
+ selected_model = str(df_single_peak_models.index[0])
1442
1445
  return selected_model
1443
1446
 
1444
1447
 
@@ -1470,7 +1473,7 @@ def selection_loop(
1470
1473
  "waic": widely applicable information criterion)
1471
1474
 
1472
1475
  Returns
1473
- ----------
1476
+ -------
1474
1477
  result_df
1475
1478
  DataFrame containing the ranking and scores of the model selection.
1476
1479
  model_dict
@@ -1562,7 +1565,7 @@ def model_selection(path_raw_data: Union[str, os.PathLike], *, ic: str = "loo"):
1562
1565
  "waic": widely applicable information criterion)
1563
1566
 
1564
1567
  Returns
1565
- ----------
1568
+ -------
1566
1569
  comparison_results
1567
1570
  DataFrame containing all rankings from model selection.
1568
1571
  model_dict
peak_performance/plots.py CHANGED
@@ -1,24 +1,25 @@
1
- """
2
- PeakPerformance
3
- Copyright (C) 2023 Forschungszentrum Jülich GmbH
1
+ # PeakPerformance
2
+ # Copyright (C) 2023 Forschungszentrum Jülich GmbH
4
3
 
5
- This program is free software: you can redistribute it and/or modify
6
- it under the terms of the GNU Affero General Public License as published
7
- by the Free Software Foundation, either version 3 of the License, or
8
- (at your option) any later version.
4
+ # This program is free software: you can redistribute it and/or modify
5
+ # it under the terms of the GNU Affero General Public License as published
6
+ # by the Free Software Foundation, either version 3 of the License, or
7
+ # (at your option) any later version.
9
8
 
10
- This program is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- GNU Affero General Public License for more details.
9
+ # This program is distributed in the hope that it will be useful,
10
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ # GNU Affero General Public License for more details.
14
13
 
15
- You should have received a copy of the GNU Affero General Public License
16
- along with this program. If not, see <https://www.gnu.org/licenses/>.
14
+ # You should have received a copy of the GNU Affero General Public License
15
+ # along with this program. If not, see <https://www.gnu.org/licenses/>.
16
+ """
17
+ Functions for preparing diagnostic and QC plots.
17
18
  """
18
19
 
19
20
  import os
20
21
  from pathlib import Path
21
- from typing import Sequence, Union
22
+ from typing import Optional, Sequence, Union
22
23
 
23
24
  import arviz as az
24
25
  import numpy as np
@@ -31,7 +32,7 @@ def plot_raw_data(
31
32
  identifier: str,
32
33
  time: np.ndarray,
33
34
  intensity: np.ndarray,
34
- path: Union[str, os.PathLike],
35
+ path: Optional[Union[str, os.PathLike]],
35
36
  save_formats: Sequence[str] = ("png", "svg"),
36
37
  ):
37
38
  """
@@ -62,9 +63,10 @@ def plot_raw_data(
62
63
  plt.xticks(size=11.5)
63
64
  plt.yticks(size=11.5)
64
65
  fig.tight_layout()
65
- for format in save_formats:
66
- fig.savefig(Path(path) / f"{identifier}_NoPeak.{format}", format=format)
67
- plt.close(fig)
66
+ if path is not None:
67
+ for format in save_formats:
68
+ fig.savefig(Path(path) / f"{identifier}_NoPeak.{format}", format=format)
69
+ plt.close(fig)
68
70
 
69
71
  return
70
72
 
@@ -74,7 +76,6 @@ def plot_density(
74
76
  ):
75
77
  """
76
78
  Method to plot the original data points alongside the posterior predictive plot (percentiles marked with a black, dashed line).
77
- Serves as a more accurate comparison between data and model than comparing data and posterior distribution.
78
79
 
79
80
  Parameters
80
81
  ----------
@@ -135,7 +136,7 @@ def plot_posterior_predictive(
135
136
  identifier: str,
136
137
  time: np.ndarray,
137
138
  intensity: np.ndarray,
138
- path: Union[str, os.PathLike],
139
+ path: Optional[Union[str, os.PathLike]],
139
140
  idata: az.InferenceData,
140
141
  discarded: bool,
141
142
  save_formats: Sequence[str] = ("png", "svg"),
@@ -168,7 +169,7 @@ def plot_posterior_predictive(
168
169
  plot_density(
169
170
  ax=ax,
170
171
  x=time,
171
- samples=idata.posterior_predictive.y.stack(sample=("chain", "draw")).T.values,
172
+ samples=idata.posterior_predictive["L"].stack(sample=("chain", "draw")).T.values,
172
173
  percentiles=(2.5, 97.5),
173
174
  )
174
175
  # plot the raw data points
@@ -179,16 +180,19 @@ def plot_posterior_predictive(
179
180
  plt.yticks(size=11.5)
180
181
  plt.legend()
181
182
  fig.tight_layout()
182
- # if signal was discarded, add a "_NoPeak" to the file name
183
- if discarded:
184
- for format in save_formats:
185
- fig.savefig(
186
- Path(path) / f"{identifier}_predictive_posterior_NoPeak.{format}", format=format
187
- )
188
- else:
189
- for format in save_formats:
190
- fig.savefig(Path(path) / f"{identifier}_predictive_posterior.{format}", format=format)
191
- plt.close(fig)
183
+ if path is not None:
184
+ # if signal was discarded, add a "_NoPeak" to the file name
185
+ if discarded:
186
+ for format in save_formats:
187
+ fig.savefig(
188
+ Path(path) / f"{identifier}_predictive_posterior_NoPeak.{format}", format=format
189
+ )
190
+ else:
191
+ for format in save_formats:
192
+ fig.savefig(
193
+ Path(path) / f"{identifier}_predictive_posterior.{format}", format=format
194
+ )
195
+ plt.close(fig)
192
196
 
193
197
  return
194
198
 
@@ -197,7 +201,7 @@ def plot_posterior(
197
201
  identifier: str,
198
202
  time: np.ndarray,
199
203
  intensity: np.ndarray,
200
- path: Union[str, os.PathLike],
204
+ path: Optional[Union[str, os.PathLike]],
201
205
  idata: az.InferenceData,
202
206
  discarded: bool,
203
207
  save_formats: Sequence[str] = ("png", "svg"),
@@ -246,14 +250,15 @@ def plot_posterior(
246
250
  plt.xticks(size=11.5)
247
251
  plt.yticks(size=11.5)
248
252
  fig.tight_layout()
249
- # if signal was discarded, add a "_NoPeak" to the file name
250
- if discarded:
251
- for format in save_formats:
252
- fig.savefig(Path(path) / f"{identifier}_posterior_NoPeak.{format}", format=format)
253
- else:
254
- for format in save_formats:
255
- fig.savefig(Path(path) / f"{identifier}_posterior.{format}", format=format)
256
- plt.close(fig)
253
+ if path is not None:
254
+ # if signal was discarded, add a "_NoPeak" to the file name
255
+ if discarded:
256
+ for format in save_formats:
257
+ fig.savefig(Path(path) / f"{identifier}_posterior_NoPeak.{format}", format=format)
258
+ else:
259
+ for format in save_formats:
260
+ fig.savefig(Path(path) / f"{identifier}_posterior.{format}", format=format)
261
+ plt.close(fig)
257
262
 
258
263
  return
259
264
 
@@ -261,7 +266,7 @@ def plot_posterior(
261
266
  def plot_model_comparison(
262
267
  df_comp: pandas.DataFrame,
263
268
  identifier: str,
264
- path: Union[str, os.PathLike],
269
+ path: Optional[Union[str, os.PathLike]],
265
270
  save_formats: Sequence[str] = ("png", "svg"),
266
271
  ):
267
272
  """
@@ -282,8 +287,9 @@ def plot_model_comparison(
282
287
  axes = az.plot_compare(df_comp, insample_dev=False)
283
288
  fig = axes.figure
284
289
  plt.tight_layout()
285
- for format in save_formats:
286
- fig.savefig(Path(path) / f"model_comparison_{identifier}.{format}", format=format)
287
- plt.close(fig)
290
+ if path is not None:
291
+ for format in save_formats:
292
+ fig.savefig(Path(path) / f"model_comparison_{identifier}.{format}", format=format)
293
+ plt.close(fig)
288
294
 
289
295
  return
@@ -3,34 +3,110 @@ from pathlib import Path
3
3
  import arviz as az
4
4
  import numpy as np
5
5
  import pymc as pm
6
+ import pytensor.tensor as pt
6
7
  import pytest
7
8
  import scipy.integrate
8
9
  import scipy.stats as st
9
10
 
10
11
  from peak_performance import models
11
12
 
13
+ _DP_ROOT = Path(__file__).absolute().parent.parent
14
+ _REQUIRED_VARIABLES = {
15
+ "baseline_slope",
16
+ "baseline_intercept",
17
+ "baseline",
18
+ "std",
19
+ "height",
20
+ "area",
21
+ "sn",
22
+ "mean",
23
+ "y",
24
+ "noise",
25
+ }
26
+ _REQUIRED_DATA = {
27
+ "slope_guess",
28
+ "intercept_guess",
29
+ "noise_width_guess",
30
+ }
31
+
32
+
33
+ def test_noise_guessing():
34
+ expected = 0.7
35
+ intensities = [
36
+ *np.random.normal(10, expected, size=200),
37
+ *np.random.normal(0, 6, size=600),
38
+ *np.random.normal(40, expected, size=200),
39
+ ]
40
+ actual = models.guess_noise(intensities)
41
+ assert 0.6 < actual < 0.8
42
+ pass
43
+
12
44
 
13
45
  def test_initial_guesses():
14
46
  # define time and intensity for example with known result
15
47
  time = 2 + 0.1 * np.arange(17)
16
48
  intensity = [1, 5, 3] + 11 * [1000] + [7, 9, 11]
17
49
  # define expected results
18
- expected_noise_width = np.ptp([1, 5, 3, 7, 9, 11])
19
50
  expected_baseline_fit = st.linregress([2, 2.1, 2.2, 3.4, 3.5, 3.6], [1, 5, 3, 7, 9, 11])
20
51
  # get the values from the initial guesses function
21
52
  slope, intercept, noise_width = models.initial_guesses(time, intensity)
22
53
  # compare the outcome with the expected values
23
54
  assert expected_baseline_fit.slope == slope
24
55
  assert expected_baseline_fit.intercept == intercept
25
- assert expected_noise_width == noise_width
56
+ # With this example the noise is clipped to at least 10
57
+ assert noise_width == 10
58
+ pass
59
+
60
+
61
+ def test_zsn_sorting():
62
+ """This tests a workaround that we rely on for multi-peak models."""
63
+ coords = {
64
+ "thing": ["left", "center", "right"],
65
+ }
66
+ with pm.Model(coords=coords) as pmodel:
67
+ hyper = pm.Normal("hyper", mu=0, sigma=3)
68
+ offset_unsorted = pm.ZeroSumNormal(
69
+ "offset_unsorted",
70
+ sigma=1,
71
+ shape=3,
72
+ )
73
+ # Create a sorted deterministic without using transforms
74
+ offset = pm.Deterministic("offset", pt.sort(offset_unsorted), dims="thing")
75
+ pos = pm.Deterministic(
76
+ "pos",
77
+ hyper + offset,
78
+ dims="thing",
79
+ )
80
+ # Observe the two things in incorrect order to provoke the model 😈
81
+ dat = pm.Data("dat", [0.2, 0.05, -0.3], dims="thing")
82
+ pm.Normal("L", pos, observed=dat, dims="thing")
83
+
84
+ # Check draws from the prior
85
+ drawn = pm.draw(offset, draws=69)
86
+ np.testing.assert_array_less(drawn[:, 0], drawn[:, 1])
87
+
88
+ # And check MCMC draws too
89
+ with pmodel:
90
+ idata = pm.sample(
91
+ chains=1, tune=10, draws=69, step=pm.Metropolis(), compute_convergence_checks=False
92
+ )
93
+ for vname in ["offset", "pos"]:
94
+ np.testing.assert_array_less(
95
+ idata.posterior[vname].sel(thing="left"),
96
+ idata.posterior[vname].sel(thing="center"),
97
+ )
98
+ np.testing.assert_array_less(
99
+ idata.posterior[vname].sel(thing="center"),
100
+ idata.posterior[vname].sel(thing="right"),
101
+ )
26
102
  pass
27
103
 
28
104
 
29
105
  class TestDistributions:
30
- def test_normal_posterior(self):
106
+ def test_normal_peak_shape(self):
31
107
  x = np.linspace(-5, 10, 10000)
32
108
  expected = st.norm.pdf(x, 3, 2)
33
- actual_pt = models.normal_posterior(0, x, 3, 2, height=np.max(expected))
109
+ actual_pt = models.normal_peak_shape(0, x, 3, 2, height=np.max(expected))
34
110
  # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
35
111
  actual = actual_pt.eval().astype(float)
36
112
  expected = expected.astype(float)
@@ -38,11 +114,11 @@ class TestDistributions:
38
114
  np.testing.assert_allclose(expected, actual, atol=0.0000001)
39
115
  pass
40
116
 
41
- def test_double_normal_posterior(self):
117
+ def test_double_normal_peak_shape(self):
42
118
  x = np.linspace(5, 12, 10000)
43
119
  y1 = st.norm.pdf(x, loc=7.5, scale=0.6)
44
120
  y2 = st.norm.pdf(x, loc=9, scale=0.4) * 2
45
- y_double_pt = models.double_normal_posterior(
121
+ y_double_pt = models.double_normal_peak_shape(
46
122
  0, x, (7.5, 9), (0.6, 0.4), height=(np.max(y1), np.max(y2))
47
123
  )
48
124
  y_double = y_double_pt.eval().astype(float)
@@ -105,11 +181,11 @@ class TestDistributions:
105
181
  np.testing.assert_allclose(expected_mode_skew, actual_mode, atol=5e-3)
106
182
  pass
107
183
 
108
- def test_skew_normal_posterior(self):
184
+ def test_skew_normal_peak_shape(self):
109
185
  x = np.linspace(-1, 5.5, 10000)
110
186
  # test first with positive alpha
111
187
  expected = st.skewnorm.pdf(x, 3, loc=1.2, scale=1.1)
112
- actual_pt = models.skew_normal_posterior(0, x, 1.2, 1.1, 3, area=1)
188
+ actual_pt = models.skew_normal_peak_shape(0, x, 1.2, 1.1, 3, area=1)
113
189
  # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
114
190
  actual = actual_pt.eval().astype(float)
115
191
  expected = expected.astype(float)
@@ -118,7 +194,7 @@ class TestDistributions:
118
194
 
119
195
  # test again with negative alpha
120
196
  expected = st.skewnorm.pdf(x, -3, loc=1.2, scale=1.1)
121
- actual_pt = models.skew_normal_posterior(0, x, 1.2, 1.1, -3, area=1)
197
+ actual_pt = models.skew_normal_peak_shape(0, x, 1.2, 1.1, -3, area=1)
122
198
  # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
123
199
  actual = actual_pt.eval().astype(float)
124
200
  expected = expected.astype(float)
@@ -133,8 +209,8 @@ class TestDistributions:
133
209
  height = np.max(y)
134
210
  area = scipy.integrate.quad(lambda x: st.norm.pdf(x, loc=1, scale=1), -10, 10)[0]
135
211
  x = np.linspace(-10, 10, 10000)
136
- y_actual_pt = models.normal_posterior(0, x, 1, 1, height=height)
137
- y_skew_actual_pt = models.skew_normal_posterior(0, x, 1, 1, 0, area=area)
212
+ y_actual_pt = models.normal_peak_shape(0, x, 1, 1, height=height)
213
+ y_skew_actual_pt = models.skew_normal_peak_shape(0, x, 1, 1, 0, area=area)
138
214
  y_actual = y_actual_pt.eval().astype(float)
139
215
  y_skew_actual = y_skew_actual_pt.eval().astype(float)
140
216
  # many values are extremely close to zero so rtol was increased.
@@ -142,7 +218,7 @@ class TestDistributions:
142
218
  np.testing.assert_allclose(y_skew_actual, y_actual, atol=1e-20, rtol=0.9)
143
219
  pass
144
220
 
145
- def test_double_skew_normal_posterior(self):
221
+ def test_double_skew_normal_peak_shape(self):
146
222
  x1 = np.arange(4, 6, 0.1)
147
223
  x2 = np.arange(6, 8, 0.1)
148
224
  alpha = 5
@@ -150,7 +226,7 @@ class TestDistributions:
150
226
  y2 = st.skewnorm.pdf(x2, alpha, loc=6.3, scale=0.2)
151
227
  time = np.array(list(x1) + list(x2))
152
228
  intensity = np.array(list(y1) + list(y2))
153
- y_double_pt = models.double_skew_normal_posterior(
229
+ y_double_pt = models.double_skew_normal_peak_shape(
154
230
  0, time, (5, 6.3), (0.2, 0.2), (5, 5), area=(1, 1)
155
231
  )
156
232
  y_double = y_double_pt.eval().astype(float)
@@ -158,33 +234,52 @@ class TestDistributions:
158
234
 
159
235
 
160
236
  @pytest.mark.parametrize(
161
- "model_type", ["normal", "skew_normal", "double_normal", "double_skew_normal"]
237
+ "define_func",
238
+ [
239
+ models.define_model_normal,
240
+ models.define_model_skew,
241
+ ],
162
242
  )
163
- def test_pymc_sampling(model_type):
164
- timeseries = np.load(
165
- Path(__file__).absolute().parent.parent / "example" / "A2t2R1Part1_132_85.9_86.1.npy"
166
- )
243
+ def test_singlepeak_sampling(define_func):
244
+ timeseries = np.load(_DP_ROOT / "example" / "A2t2R1Part1_132_85.9_86.1.npy")
167
245
 
168
- if model_type == models.ModelType.Normal:
169
- pmodel = models.define_model_normal(timeseries[0], timeseries[1])
170
- elif model_type == models.ModelType.SkewNormal:
171
- pmodel = models.define_model_skew(timeseries[0], timeseries[1])
172
- elif model_type == models.ModelType.DoubleNormal:
173
- pmodel = models.define_model_double_normal(timeseries[0], timeseries[1])
174
- elif model_type == models.ModelType.DoubleSkewNormal:
175
- pmodel = models.define_model_double_skew_normal(timeseries[0], timeseries[1])
246
+ pmodel = define_func(timeseries[0], timeseries[1])
176
247
  with pmodel:
177
248
  idata = pm.sample(cores=2, chains=2, tune=3, draws=5)
178
- if model_type in [models.ModelType.DoubleNormal, models.ModelType.DoubleSkewNormal]:
179
- summary = az.summary(idata)
180
- # test whether the ordered transformation and the subpeak dimension work as intended
181
- assert summary.loc["mean[0]", "mean"] < summary.loc["mean[1]", "mean"]
182
- # assert summary.loc["area[0]", "mean"] < summary.loc["area[1]", "mean"]
249
+ assert set(idata.posterior.keys()) >= _REQUIRED_VARIABLES
250
+ assert set(idata.constant_data.keys()) >= _REQUIRED_DATA
251
+ pass
252
+
253
+
254
+ @pytest.mark.parametrize(
255
+ "define_func",
256
+ [
257
+ models.define_model_double_normal,
258
+ models.define_model_double_skew_normal,
259
+ ],
260
+ )
261
+ def test_doublepeak_sampling(define_func):
262
+ timeseries = np.load(_DP_ROOT / "example" / "A2t2R1Part1_132_85.9_86.1.npy")
263
+
264
+ pmodel = define_func(timeseries[0], timeseries[1])
265
+ with pmodel:
266
+ idata = pm.sample(cores=2, chains=2, tune=3, draws=5)
267
+ assert set(idata.posterior.keys()) >= _REQUIRED_VARIABLES
268
+ assert set(idata.constant_data.keys()) >= _REQUIRED_DATA
269
+ # Confirm the order of peaks is as intended
270
+ np.testing.assert_array_less(
271
+ idata.posterior["offset"].sel(subpeak=0),
272
+ idata.posterior["offset"].sel(subpeak=1),
273
+ )
274
+ np.testing.assert_array_less(
275
+ idata.posterior["mean"].sel(subpeak=0),
276
+ idata.posterior["mean"].sel(subpeak=1),
277
+ )
183
278
  pass
184
279
 
185
280
 
186
281
  def test_model_comparison():
187
- path = Path(__file__).absolute().parent.parent / "test_data/test_model_comparison"
282
+ path = _DP_ROOT / "test_data/test_model_comparison"
188
283
  idata_normal = az.from_netcdf(path / "idata_normal.nc")
189
284
  idata_skew = az.from_netcdf(path / "idata_skew.nc")
190
285
  compare_dict = {
@@ -636,11 +636,18 @@ def test_model_selection_check():
636
636
  assert selected_model == "normal"
637
637
  # case 2: double peak exceeds elpd score difference threshold and is thusly accepted
638
638
  result_df = pandas.DataFrame(
639
- {"elpd_loo": [50, 30, 10, -5], "ic": ["loo", "loo", "loo", "loo"]},
639
+ {"elpd_loo": [50, 30, 20, -5], "ic": ["loo", "loo", "loo", "loo"]},
640
640
  index=["double_normal", "double_skew_normal", "normal", "skew_normal"],
641
641
  )
642
642
  selected_model = pl.model_selection_check(result_df, "loo", 25)
643
643
  assert selected_model == "double_normal"
644
+ # case 3: single peak models were excluded
645
+ result_df = pandas.DataFrame(
646
+ {"elpd_loo": [50, 30], "ic": ["loo", "loo"]},
647
+ index=["double_normal", "double_skew_normal"],
648
+ )
649
+ selected_model = pl.model_selection_check(result_df, "loo", 25)
650
+ assert selected_model == "double_normal"
644
651
  pass
645
652
 
646
653
 
@@ -0,0 +1,48 @@
1
+ Metadata-Version: 2.1
2
+ Name: peak-performance
3
+ Version: 0.7.1
4
+ Summary: A Python toolbox to fit chromatography peaks with uncertainty.
5
+ Author-email: Jochen Nießer <j.niesser@fz-juelich.de>, Michael Osthege <m.osthege@fz-juelich.de>
6
+ License: AGPLv3
7
+ Project-URL: homepage, https://jugit.fz-juelich.de/IBG-1/micropro/peak-performance
8
+ Project-URL: documentation, https://jugit.fz-juelich.de/IBG-1/micropro/peak-performance
9
+ Project-URL: repository, https://jugit.fz-juelich.de/IBG-1/micropro/peak-performance
10
+ Keywords: hplc,mass-spectrometry,uncertainty quantification
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: License :: OSI Approved :: GNU Affero General Public License v3
14
+ Classifier: Intended Audience :: Science/Research
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE.md
18
+ Requires-Dist: arviz
19
+ Requires-Dist: matplotlib
20
+ Requires-Dist: numpy
21
+ Requires-Dist: pandas
22
+ Requires-Dist: pymc>=5.9.1
23
+ Requires-Dist: pytensor
24
+ Requires-Dist: scipy
25
+ Requires-Dist: openpyxl
26
+
27
+ [![PyPI version](https://img.shields.io/pypi/v/peak-performance)](https://pypi.org/project/peak-performance/)
28
+ [![pipeline](https://github.com/jubiotech/peak-performance/workflows/pipeline/badge.svg)](https://github.com/JuBiotech/peak-performance/actions)
29
+ [![coverage](https://codecov.io/gh/jubiotech/peak-performance/branch/main/graph/badge.svg)](https://app.codecov.io/gh/JuBiotech/peak-performance)
30
+ [![documentation](https://readthedocs.org/projects/peak-performance/badge/?version=latest)](https://peak-performance.readthedocs.io/en/latest)
31
+ [![DOI](https://zenodo.org/badge/713469041.svg)](https://zenodo.org/doi/10.5281/zenodo.10255543)
32
+
33
+ # About PeakPerformance
34
+ PeakPerformance employs Bayesian modeling for chromatographic peak data fitting.
35
+ This has the innate advantage of providing uncertainty quantification while jointly estimating all peak parameters united in a single peak model.
36
+ As Markov Chain Monte Carlo (MCMC) methods are utilized to infer the posterior probability distribution, convergence checks and the aformentioned uncertainty quantification are applied as novel quality metrics for a robust peak recognition.
37
+
38
+ # First steps
39
+ Be sure to check out our thorough [documentation](https://peak-performance.readthedocs.io/en/latest). It contains not only information on how to install PeakPerformance and prepare raw data for its application but also detailed treatises about the implemented model structures, validation with both synthetic and experimental data against a commercially available vendor software, exemplary usage of diagnostic plots and investigation of various effects.
40
+ Furthermore, you will find example notebooks and data sets showcasing different aspects of PeakPerformance.
41
+
42
+ # How to contribute
43
+ If you encounter bugs while using PeakPerformance, please bring them to our attention by opening an issue. When doing so, describe the problem in detail and add screenshots/code snippets and whatever other helpful material you can provide.
44
+ When contributing code, create a local clone of PeakPerformance, create a new branch, and open a pull request (PR).
45
+
46
+ # How to cite
47
+ Head over to Zenodo to [generate a BibTeX citation](https://doi.org/10.5281/zenodo.10255543) for the latest release.
48
+ A publication has just been submitted to a scientific journal. Once published, this section will be updated.
@@ -0,0 +1,13 @@
1
+ peak_performance/__init__.py,sha256=yTq4THYewbWRnrs2Qkv4nCd-7MyvDlu_t0fPeWeKxQc,261
2
+ peak_performance/models.py,sha256=m32qCkEW00E3WV5d8xDlcMVHvdmcLH0fRnziPLsgDMk,27755
3
+ peak_performance/pipeline.py,sha256=O38AtmtGTA4fFYj78S836TgcFa1nuyf6npsbIM7DGec,64456
4
+ peak_performance/plots.py,sha256=JToIsNxGF-uh09t8IJvN9cWRTsL3opjDE8DMqGocYJQ,9528
5
+ peak_performance/test_main.py,sha256=xQiLDjhldxZzY5sp3RyIJUTtXxX46auWY9Qy7nuifxw,97
6
+ peak_performance/test_models.py,sha256=r6kqAVBtAbycf4IoRaXcSCZp6Lras3afK6o9qcLZbH8,11592
7
+ peak_performance/test_pipeline.py,sha256=gTZAxcJEVwJ0XW4IewmIWGLmx1n7KaK8egrovKHsCFI,22961
8
+ peak_performance/test_plots.py,sha256=lGwPWzezAhzEnyu_NMx2lFtyzzb1wxy-jnRMtOaaniY,4100
9
+ peak_performance-0.7.1.dist-info/LICENSE.md,sha256=zj-4LZ7oChyw5Uj5sFYOrVI3juK06Cb9lFm0rPcHXYk,32387
10
+ peak_performance-0.7.1.dist-info/METADATA,sha256=62R5sa4j-zdBzwVQRBaUD9cfS6pXvfPqUBpEUx0rVmk,3388
11
+ peak_performance-0.7.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
12
+ peak_performance-0.7.1.dist-info/top_level.txt,sha256=-lZSmgn2fZA-xPVmddLwaRt2hQeeWj7TYVefOk7_T58,17
13
+ peak_performance-0.7.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: bdist_wheel (0.44.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,67 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: peak-performance
3
- Version: 0.6.4
4
- Summary: A Python toolbox to fit chromatography peaks with uncertainty.
5
- Author-email: Jochen Nießer <j.niesser@fz-juelich.de>, Michael Osthege <m.osthege@fz-juelich.de>
6
- License: AGPLv3
7
- Project-URL: homepage, https://jugit.fz-juelich.de/IBG-1/micropro/peak-performance
8
- Project-URL: documentation, https://jugit.fz-juelich.de/IBG-1/micropro/peak-performance
9
- Project-URL: repository, https://jugit.fz-juelich.de/IBG-1/micropro/peak-performance
10
- Keywords: hplc,mass-spectrometry,uncertainty quantification
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: Operating System :: OS Independent
13
- Classifier: License :: OSI Approved :: GNU Affero General Public License v3
14
- Classifier: Intended Audience :: Science/Research
15
- Requires-Python: >=3.9
16
- Description-Content-Type: text/markdown
17
- License-File: LICENSE.md
18
- Requires-Dist: arviz
19
- Requires-Dist: matplotlib
20
- Requires-Dist: numpy
21
- Requires-Dist: pandas
22
- Requires-Dist: pymc >=5.9.1
23
- Requires-Dist: pytensor
24
- Requires-Dist: scipy
25
- Requires-Dist: openpyxl
26
- Requires-Dist: numpy <1.26.0
27
- Provides-Extra: test
28
- Requires-Dist: pytest ; extra == 'test'
29
- Requires-Dist: pytest-cov ; extra == 'test'
30
- Requires-Dist: twine ; extra == 'test'
31
-
32
- [![PyPI version](https://img.shields.io/pypi/v/bletl)](https://pypi.org/project/peak-performance/)
33
- [![pipeline](https://github.com/jubiotech/bletl/workflows/pipeline/badge.svg)](https://github.com/JuBiotech/peak-performance/actions)
34
- [![coverage](https://codecov.io/gh/jubiotech/bletl/branch/main/graph/badge.svg)](https://app.codecov.io/gh/JuBiotech/peak-performance)
35
-
36
- # How to use PeakPerformance
37
- For installation instructions, see `Installation.md`.
38
- For instructions regarding the use of PeakPerformance, check out the example notebook(s) under `notebooks`, the complementary example data under `example`, and the following introductory explanations.
39
-
40
- ## Preparing raw data
41
- This step is crucial when using PeakPerformance. Raw data has to be supplied as time series meaning for each signal you want to analyze, save a NumPy array consisting of time in the first dimension and intensity in the second dimension (compare example data). Both time and intensity should also be NumPy arrays. If you e.g. have time and intensity of a singal as lists, you can use the following code to convert, format, and save them in the correct manner:
42
- ```
43
- import numpy as np
44
- from pathlib import Path
45
-
46
- time_series = np.array([np.array(time), np.array(intensity)])
47
- np.save(Path(r"example_path/time_series.npy"), time_series)
48
- ```
49
- The naming convention of raw data files is `<acquisition name>_<precursor ion m/z or experiment number>_<product ion m/z start>_<product ion m/z end>.npy`. There should be no underscores within the named sections such as `acquisition name`. Essentially, the raw data names include the acquisition and mass trace, thus yielding a recognizable and unique name for each isotopomer/fragment/metabolite/sample.
50
-
51
- ## Model selection
52
- When it comes to selecting models, PeakPerformance has a function performing an automated selection process by analyzing one acquisiton per mass trace with all implemented models. Subsequently, all models are ranked based on an information criterion (either pareto-smoothed importance sampling leave-one-out cross-validation or widely applicable information criterion). For this process to work as intended, you need to specify acquisitions with representative peaks for each mass trace (see example notebook 1). If e.g. most peaks of an analyte show a skewed shape, then select an acquisition where this is the case. For double peaks, select an acquision where the peaks are as distinct and comparable in height as possible.
53
- Since model selection is a computationally demanding and time consuming process, it is suggested to state the model type as the user (see example notebook 1) if possible.
54
-
55
- ## Troubleshooting
56
- ### A batch run broke and I want to restart it.
57
- If an error occured in the middle of a batch run, then you can use the `pipeline_restart` function in the `pipeline` module to create a new batch which will analyze only those samples, which have not been analyzed previously.
58
-
59
- ### The model parameters don't converge and/or the fit does not describe the raw data well.
60
- Check the separate file `How to adapt PeakPerformance to you data`.
61
-
62
- # How to contribute
63
- If you encounter bugs while using PeakPerformance, please bring them to our attention by opening an issue. When doing so, describe the problem in detail and add screenshots/code snippets and whatever other helpful material you can provide.
64
- When contributing code, create a local clone of PeakPerformance, create a new branch, and open a pull request (PR).
65
-
66
- # How to cite
67
- Will be updated once the paper has been released and a zenodo DOI has been created.
@@ -1,13 +0,0 @@
1
- peak_performance/__init__.py,sha256=yTq4THYewbWRnrs2Qkv4nCd-7MyvDlu_t0fPeWeKxQc,261
2
- peak_performance/models.py,sha256=L47mNU1HItYv5cB-cs2H0ooswhdcLfBdg8X1MHeiTUY,25130
3
- peak_performance/pipeline.py,sha256=A-eIwhbn9hCIvWgrG5ksfQLn--ISBKVBjq09nVDwFO8,64311
4
- peak_performance/plots.py,sha256=OO5rSC-kTCzH8-Fh0diz0Cq86fyrZ_FSOiDjcboZRAU,9280
5
- peak_performance/test_main.py,sha256=xQiLDjhldxZzY5sp3RyIJUTtXxX46auWY9Qy7nuifxw,97
6
- peak_performance/test_models.py,sha256=X3fy-kNih7TNrr4jKzgcx8qRnmh6cA27hSr2b6Tmf18,9334
7
- peak_performance/test_pipeline.py,sha256=wyzVgVYT0pK_Lnh5VZEgL8Rxn8sjiCa1dRp1tF79foM,22652
8
- peak_performance/test_plots.py,sha256=lGwPWzezAhzEnyu_NMx2lFtyzzb1wxy-jnRMtOaaniY,4100
9
- peak_performance-0.6.4.dist-info/LICENSE.md,sha256=zj-4LZ7oChyw5Uj5sFYOrVI3juK06Cb9lFm0rPcHXYk,32387
10
- peak_performance-0.6.4.dist-info/METADATA,sha256=X7qWgjCWDwi9KseQnDasaijG0k9u-L6CbGTH0qj8Zd4,4796
11
- peak_performance-0.6.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
12
- peak_performance-0.6.4.dist-info/top_level.txt,sha256=-lZSmgn2fZA-xPVmddLwaRt2hQeeWj7TYVefOk7_T58,17
13
- peak_performance-0.6.4.dist-info/RECORD,,