peak-performance 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,662 @@
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+
5
+ import arviz as az
6
+ import numpy as np
7
+ import pandas
8
+ import pytest
9
+
10
+ from peak_performance import pipeline as pl
11
+
12
+ # define columns for empty summary DataFrame for results
13
+ COLUMNS = [
14
+ "mean",
15
+ "sd",
16
+ "hdi_3%",
17
+ "hdi_97%",
18
+ "mcse_mean",
19
+ "mcse_sd",
20
+ "ess_bulk",
21
+ "ess_tail",
22
+ "r_hat",
23
+ "acquisition",
24
+ "experiment_or_precursor_mz",
25
+ "product_mz_start",
26
+ "product_mz_end",
27
+ "is_peak",
28
+ "cause_for_rejection",
29
+ "model_type",
30
+ "subpeak",
31
+ ]
32
+
33
+
34
+ def test_user_input_class():
35
+ path = Path(__file__).absolute().parent.parent / "example"
36
+ raw_data_files = ["A1t1R1Part2_110_109.9_110.1.npy"]
37
+ data_file_format = ".npy"
38
+ model_type = ["normal"]
39
+ retention_time_estimate = [22.5]
40
+ peak_width_estimate = 1.5
41
+ pre_filtering = True
42
+ minimum_sn = 5
43
+ timeseries = np.load(
44
+ Path(__file__).absolute().parent.parent / "example" / "A1t1R1Part2_110_109.9_110.1.npy"
45
+ )
46
+ acquisition = "A1t1R1"
47
+ precursor_mz = 118
48
+ product_mz_start = "71.9"
49
+ product_mz_end = 72.1
50
+ # test instantiation of the UserInput class
51
+ ui = pl.UserInput(
52
+ path,
53
+ raw_data_files,
54
+ data_file_format,
55
+ model_type,
56
+ retention_time_estimate,
57
+ peak_width_estimate,
58
+ pre_filtering,
59
+ minimum_sn,
60
+ timeseries,
61
+ acquisition,
62
+ precursor_mz,
63
+ product_mz_start,
64
+ product_mz_end,
65
+ )
66
+ assert ui.timeseries.all() == timeseries.all()
67
+ assert ui.precursor == 118
68
+ assert ui.product_mz_start == 71.9
69
+ assert ui.product_mz_end == 72.1
70
+ # test some of the error handling of the parameter setter of the UserInput class
71
+ with pytest.raises(pl.InputError):
72
+ ui = pl.UserInput(
73
+ path,
74
+ raw_data_files,
75
+ data_file_format,
76
+ model_type,
77
+ retention_time_estimate,
78
+ peak_width_estimate,
79
+ pre_filtering,
80
+ minimum_sn,
81
+ timeseries,
82
+ 5,
83
+ precursor_mz,
84
+ product_mz_start,
85
+ product_mz_end,
86
+ )
87
+ with pytest.raises(pl.InputError):
88
+ ui = pl.UserInput(
89
+ path,
90
+ raw_data_files,
91
+ data_file_format,
92
+ model_type,
93
+ retention_time_estimate,
94
+ peak_width_estimate,
95
+ pre_filtering,
96
+ minimum_sn,
97
+ timeseries,
98
+ acquisition,
99
+ "mz",
100
+ product_mz_start,
101
+ product_mz_end,
102
+ )
103
+ pass
104
+
105
+
106
+ def test_detect_raw_data():
107
+ path = Path(__file__).absolute().parent.parent / "example"
108
+ data_format = ".npy"
109
+ files = pl.detect_raw_data(path, data_type=data_format)
110
+ files = sorted(files)
111
+ expected_files = sorted(
112
+ [
113
+ "A1t1R1Part2_110_109.9_110.1.npy",
114
+ "A1t1R1Part2_111_109.9_110.1.npy",
115
+ "A1t1R1Part2_111_110.9_111.1.npy",
116
+ "A1t1R1Part2_112_110.9_111.1.npy",
117
+ "A1t1R1Part2_112_111.9_112.1.npy",
118
+ "A2t2R1Part1_132_85.9_86.1.npy",
119
+ "A4t4R1Part2_137_72.9_73.1.npy",
120
+ ]
121
+ )
122
+ assert files == expected_files
123
+ pass
124
+
125
+
126
+ def test_parse_data():
127
+ path = Path(__file__).absolute().parent.parent / "example"
128
+ data_format = ".npy"
129
+ filename = "A1t1R1Part2_110_109.9_110.1.npy"
130
+ (
131
+ timeseries,
132
+ acquisition,
133
+ precursor,
134
+ product_mz_start,
135
+ product_mz_end,
136
+ ) = pl.parse_data(path, filename, data_format)
137
+ assert isinstance(timeseries[0], np.ndarray)
138
+ assert isinstance(timeseries[1], np.ndarray)
139
+ assert acquisition == "A1t1R1Part2"
140
+ assert precursor == 110.0
141
+ assert product_mz_start == 109.9
142
+ assert product_mz_end == 110.1
143
+ pass
144
+
145
+
146
+ def test_initiate():
147
+ path = Path(__file__).absolute().parent.parent / "example"
148
+ run_dir = "test"
149
+ df_summary, path = pl.initiate(path, run_dir=run_dir)
150
+ df_summary2 = pandas.DataFrame(columns=COLUMNS)
151
+ assert df_summary2.values.all() == df_summary.values.all()
152
+ assert df_summary2.columns.all() == df_summary.columns.all()
153
+ assert path == Path(__file__).absolute().parent.parent / "example" / "test"
154
+ assert path.exists()
155
+ shutil.rmtree(path)
156
+ pass
157
+
158
+
159
+ def test_prefiltering():
160
+ # create df_summary
161
+ df_summary = pandas.DataFrame(columns=COLUMNS)
162
+ # create instance of the UserInput class
163
+ path = Path(__file__).absolute().parent.parent / "example"
164
+ raw_data_files = ["A1t1R1Part2_110_109.9_110.1.npy"]
165
+ data_file_format = ".npy"
166
+ model_type = ["normal"]
167
+ retention_time_estimate = [26.3]
168
+ peak_width_estimate = 1.5
169
+ pre_filtering = True
170
+ minimum_sn = 5
171
+ timeseries = np.load(
172
+ Path(__file__).absolute().parent.parent / "example" / "A1t1R1Part2_110_109.9_110.1.npy"
173
+ )
174
+ acquisition = "A1t1R1"
175
+ precursor_mz = 118
176
+ product_mz_start = 71.9
177
+ product_mz_end = 72.1
178
+ # positive test
179
+ ui = pl.UserInput(
180
+ path,
181
+ raw_data_files,
182
+ data_file_format,
183
+ model_type,
184
+ retention_time_estimate,
185
+ peak_width_estimate,
186
+ pre_filtering,
187
+ minimum_sn,
188
+ timeseries,
189
+ acquisition,
190
+ precursor_mz,
191
+ product_mz_start,
192
+ product_mz_end,
193
+ )
194
+ filename = "A1t1R1Part2_110_109.9_110.1.npy"
195
+ found_peak, df_summary_1 = pl.prefiltering(filename, ui, 108, df_summary)
196
+ assert found_peak
197
+ assert df_summary_1.values.all() == df_summary.values.all()
198
+ assert df_summary_1.columns.all() == df_summary.columns.all()
199
+ # negative test due to retention time
200
+ retention_time_estimate = [0]
201
+ ui = pl.UserInput(
202
+ path,
203
+ raw_data_files,
204
+ data_file_format,
205
+ model_type,
206
+ retention_time_estimate,
207
+ peak_width_estimate,
208
+ pre_filtering,
209
+ minimum_sn,
210
+ timeseries,
211
+ acquisition,
212
+ precursor_mz,
213
+ product_mz_start,
214
+ product_mz_end,
215
+ )
216
+ filename = "A1t1R1Part2_110_109.9_110.1.npy"
217
+ found_peak, df_summary_1 = pl.prefiltering(filename, ui, 108, df_summary)
218
+ assert not found_peak
219
+ assert len(df_summary_1.loc[:, "mean"].values) == 8
220
+ assert list(df_summary_1.columns) == COLUMNS
221
+ assert all(pandas.isna(df_summary_1["mean"]))
222
+ # negative test due to signal-to-noise ratio
223
+ timeseries = np.load(
224
+ Path(__file__).absolute().parent.parent / "example" / "A4t4R1Part2_137_72.9_73.1.npy"
225
+ )
226
+ raw_data_files = ["A4t4R1Part2_137_72.9_73.1.npy"]
227
+ retention_time_estimate = [26.3]
228
+ ui = pl.UserInput(
229
+ path,
230
+ raw_data_files,
231
+ data_file_format,
232
+ model_type,
233
+ retention_time_estimate,
234
+ peak_width_estimate,
235
+ pre_filtering,
236
+ minimum_sn,
237
+ timeseries,
238
+ acquisition,
239
+ precursor_mz,
240
+ product_mz_start,
241
+ product_mz_end,
242
+ )
243
+ filename = "A4t4R1Part2_137_72.9_73.1.npy"
244
+ found_peak, df_summary_2 = pl.prefiltering(filename, ui, 108, df_summary)
245
+ assert not found_peak
246
+ assert len(df_summary_2.loc[:, "mean"].values) == 8
247
+ assert list(df_summary_2.columns) == COLUMNS
248
+ assert all(pandas.isna(df_summary_2["mean"]))
249
+ pass
250
+
251
+
252
+ def test_postfiltering_success():
253
+ # load exemplary inference data object
254
+ idata = az.from_netcdf(
255
+ Path(__file__).absolute().parent.parent
256
+ / "test_data/test_postfiltering_success/idata_double_normal.nc"
257
+ )
258
+ # create df_summary
259
+ df_summary = pandas.DataFrame(columns=COLUMNS)
260
+ # create instance of the UserInput class
261
+ path = Path(__file__).absolute().parent.parent / "example"
262
+ raw_data_files = ["A2t2R1Part1_132_85.9_86.1.npy"]
263
+ data_file_format = ".npy"
264
+ model_type = ["double_normal"]
265
+ retention_time_estimate = [22.5]
266
+ peak_width_estimate = 1
267
+ pre_filtering = True
268
+ minimum_sn = 5
269
+ timeseries = np.load(
270
+ Path(__file__).absolute().parent.parent / "example" / "A2t2R1Part1_132_85.9_86.1.npy"
271
+ )
272
+ acquisition = "A2t2R1Part1"
273
+ precursor_mz = 132
274
+ product_mz_start = 85.9
275
+ product_mz_end = 86.1
276
+ ui = pl.UserInput(
277
+ path,
278
+ raw_data_files,
279
+ data_file_format,
280
+ model_type,
281
+ retention_time_estimate,
282
+ peak_width_estimate,
283
+ pre_filtering,
284
+ minimum_sn,
285
+ timeseries,
286
+ acquisition,
287
+ precursor_mz,
288
+ product_mz_start,
289
+ product_mz_end,
290
+ )
291
+ filename = "A2t2R1Part1_132_85.9_86.1.npy"
292
+ resample, discard, df_summary = pl.postfiltering(filename, idata, ui, df_summary)
293
+ # tests
294
+ assert not resample
295
+ assert not discard
296
+ pass
297
+
298
+
299
+ def test_postfiltering_resample():
300
+ # load exemplary inference data object
301
+ idata = az.from_netcdf(
302
+ Path(__file__).absolute().parent.parent
303
+ / "test_data/test_postfiltering_resample/idata_double_skew_rhat_too_high.nc"
304
+ )
305
+ # create df_summary
306
+ df_summary = pandas.DataFrame(columns=COLUMNS)
307
+ # create instance of the UserInput class
308
+ path = Path(__file__).absolute().parent.parent / "example"
309
+ raw_data_files = ["A2t2R1Part1_132_85.9_86.1.npy"]
310
+ data_file_format = ".npy"
311
+ model_type = ["double_normal"]
312
+ retention_time_estimate = [22.5]
313
+ peak_width_estimate = 1
314
+ pre_filtering = True
315
+ minimum_sn = 5
316
+ timeseries = np.load(
317
+ Path(__file__).absolute().parent.parent / "example" / "A2t2R1Part1_132_85.9_86.1.npy"
318
+ )
319
+ acquisition = "A2t2R1Part1"
320
+ precursor_mz = 132
321
+ product_mz_start = 85.9
322
+ product_mz_end = 86.1
323
+ ui = pl.UserInput(
324
+ path,
325
+ raw_data_files,
326
+ data_file_format,
327
+ model_type,
328
+ retention_time_estimate,
329
+ peak_width_estimate,
330
+ pre_filtering,
331
+ minimum_sn,
332
+ timeseries,
333
+ acquisition,
334
+ precursor_mz,
335
+ product_mz_start,
336
+ product_mz_end,
337
+ )
338
+ filename = "A2t2R1Part1_132_85.9_86.1.npy"
339
+ resample, discard, df_summary = pl.postfiltering(filename, idata, ui, df_summary)
340
+ # tests
341
+ assert resample
342
+ assert not discard
343
+ pass
344
+
345
+
346
+ def test_single_peak_report_add_nan_to_summary():
347
+ # create df_summary
348
+ df_summary = pandas.DataFrame(columns=COLUMNS)
349
+ # create instance of the UserInput class
350
+ path = Path(__file__).absolute().parent.parent / "example"
351
+ raw_data_files = ["A1t1R1Part2_110_109.9_110.1.npy"]
352
+ data_file_format = ".npy"
353
+ model_type = ["skew_normal"]
354
+ retention_time_estimate = [22.5]
355
+ peak_width_estimate = 1.5
356
+ pre_filtering = True
357
+ minimum_sn = 5
358
+ timeseries = np.load(
359
+ Path(__file__).absolute().parent.parent / "example" / "A1t1R1Part2_110_109.9_110.1.npy"
360
+ )
361
+ acquisition = "A1t1R1"
362
+ precursor_mz = 118
363
+ product_mz_start = 71.9
364
+ product_mz_end = 72.1
365
+ ui = pl.UserInput(
366
+ path,
367
+ raw_data_files,
368
+ data_file_format,
369
+ model_type,
370
+ retention_time_estimate,
371
+ peak_width_estimate,
372
+ pre_filtering,
373
+ minimum_sn,
374
+ timeseries,
375
+ acquisition,
376
+ precursor_mz,
377
+ product_mz_start,
378
+ product_mz_end,
379
+ )
380
+ filename = "A1t1R1Part2_110_109.9_110.1.npy"
381
+ rejection_msg = "because I said so"
382
+ df_summary = pl.report_add_nan_to_summary(filename, ui, df_summary, rejection_msg)
383
+ # tests
384
+ assert len(df_summary.loc[:, "mean"].values) == 8
385
+ assert list(df_summary.columns) == COLUMNS
386
+ assert all(pandas.isna(df_summary["mean"]))
387
+ assert list(df_summary.loc[:, "acquisition"]) == len(df_summary.index) * ["A1t1R1"]
388
+ assert list(df_summary.loc[:, "experiment_or_precursor_mz"]) == len(df_summary.index) * [118]
389
+ assert list(df_summary.loc[:, "product_mz_start"]) == len(df_summary.index) * [71.9]
390
+ assert list(df_summary.loc[:, "product_mz_end"]) == len(df_summary.index) * [72.1]
391
+ assert not any(df_summary["is_peak"])
392
+ assert all(df_summary["cause_for_rejection"] == rejection_msg)
393
+ assert list(df_summary.loc[:, "model_type"]) == len(df_summary.index) * ["skew_normal"]
394
+ pass
395
+
396
+
397
+ def test_double_peak_report_add_nan_to_summary():
398
+ # create df_summary
399
+ df_summary = pandas.DataFrame(columns=COLUMNS)
400
+ # create instance of the UserInput class
401
+ path = Path(__file__).absolute().parent.parent / "example"
402
+ raw_data_files = ["A1t1R1Part2_110_109.9_110.1.npy"]
403
+ data_file_format = ".npy"
404
+ model_type = ["double_normal"]
405
+ retention_time_estimate = [22.5]
406
+ peak_width_estimate = 1.5
407
+ pre_filtering = True
408
+ minimum_sn = 5
409
+ timeseries = np.load(
410
+ Path(__file__).absolute().parent.parent / "example" / "A1t1R1Part2_110_109.9_110.1.npy"
411
+ )
412
+ acquisition = "A1t1R1"
413
+ precursor_mz = 118
414
+ product_mz_start = 71.9
415
+ product_mz_end = 72.1
416
+ ui = pl.UserInput(
417
+ path,
418
+ raw_data_files,
419
+ data_file_format,
420
+ model_type,
421
+ retention_time_estimate,
422
+ peak_width_estimate,
423
+ pre_filtering,
424
+ minimum_sn,
425
+ timeseries,
426
+ acquisition,
427
+ precursor_mz,
428
+ product_mz_start,
429
+ product_mz_end,
430
+ )
431
+ filename = "A1t1R1Part2_110_109.9_110.1.npy"
432
+ rejection_msg = "because I said so"
433
+ df_summary = pl.report_add_nan_to_summary(filename, ui, df_summary, rejection_msg)
434
+ # tests
435
+ assert len(df_summary.loc[:, "mean"].values) == 8
436
+ assert list(df_summary.columns) == COLUMNS
437
+ assert all(pandas.isna(df_summary["mean"]))
438
+ assert list(df_summary.loc[:, "acquisition"]) == len(df_summary.index) * ["A1t1R1"]
439
+ assert list(df_summary.loc[:, "experiment_or_precursor_mz"]) == len(df_summary.index) * [118]
440
+ assert list(df_summary.loc[:, "product_mz_start"]) == len(df_summary.index) * [71.9]
441
+ assert list(df_summary.loc[:, "product_mz_end"]) == len(df_summary.index) * [72.1]
442
+ assert not any(df_summary["is_peak"])
443
+ assert all(df_summary["cause_for_rejection"] == rejection_msg)
444
+ assert list(df_summary.loc[:, "model_type"]) == len(df_summary.index) * ["double_normal"]
445
+ pass
446
+
447
+
448
+ def test_single_peak_report():
449
+ # load exemplary inference data object
450
+ idata = az.from_netcdf(
451
+ Path(__file__).absolute().parent.parent / "test_data/test_single_peak_report/idata.nc"
452
+ )
453
+ # create empty DataFrame
454
+ df_summary = pandas.DataFrame(columns=COLUMNS)
455
+ # create instance of the UserInput class
456
+ path = Path(__file__).absolute().parent.parent / "example"
457
+ raw_data_files = ["A1t1R1Part2_110_109.9_110.1.npy"]
458
+ data_file_format = ".npy"
459
+ model_type = ["skew_normal"]
460
+ retention_time_estimate = [22.5]
461
+ peak_width_estimate = 1.5
462
+ pre_filtering = True
463
+ minimum_sn = 5
464
+ timeseries = np.load(
465
+ Path(__file__).absolute().parent.parent / "example" / "A1t1R1Part2_110_109.9_110.1.npy"
466
+ )
467
+ acquisition = "A1t1R1"
468
+ precursor_mz = 118
469
+ product_mz_start = 71.9
470
+ product_mz_end = 72.1
471
+ ui = pl.UserInput(
472
+ path,
473
+ raw_data_files,
474
+ data_file_format,
475
+ model_type,
476
+ retention_time_estimate,
477
+ peak_width_estimate,
478
+ pre_filtering,
479
+ minimum_sn,
480
+ timeseries,
481
+ acquisition,
482
+ precursor_mz,
483
+ product_mz_start,
484
+ product_mz_end,
485
+ )
486
+ filename = "A1t1R1Part2_110_109.9_110.1.npy"
487
+ # add data to df_summary
488
+ df_summary = pl.report_add_data_to_summary(filename, idata, df_summary, ui, True)
489
+ # tests
490
+ assert len(df_summary.loc[:, "mean"].values) == 8
491
+ assert list(df_summary.columns) == COLUMNS
492
+ assert list(df_summary.loc[:, "mean"]) == [
493
+ 5.565,
494
+ 8.446,
495
+ 25.989,
496
+ 132.743,
497
+ 0.516,
498
+ 2180.529,
499
+ 2762.695,
500
+ 20.924,
501
+ ]
502
+ assert list(df_summary.loc[:, "acquisition"]) == len(df_summary.index) * ["A1t1R1"]
503
+ assert list(df_summary.loc[:, "experiment_or_precursor_mz"]) == len(df_summary.index) * [118]
504
+ assert list(df_summary.loc[:, "product_mz_start"]) == len(df_summary.index) * [71.9]
505
+ assert list(df_summary.loc[:, "product_mz_end"]) == len(df_summary.index) * [72.1]
506
+ assert all(df_summary["is_peak"])
507
+ assert all(df_summary["cause_for_rejection"] == "")
508
+ assert list(df_summary.loc[:, "model_type"]) == len(df_summary.index) * ["skew_normal"]
509
+ pass
510
+
511
+
512
+ @pytest.mark.parametrize("idata", ["idata_double_normal.nc", "idata_double_skew_normal.nc"])
513
+ def test_double_peak_report(idata):
514
+ # load exemplary inference data object
515
+ idata = az.from_netcdf(
516
+ Path(__file__).absolute().parent.parent / "test_data/test_double_peak_report" / idata
517
+ )
518
+ # create empty DataFrame
519
+ df_summary = pandas.DataFrame(columns=COLUMNS)
520
+ # create instance of the UserInput class
521
+ path = Path(__file__).absolute().parent.parent / "example"
522
+ raw_data_files = ["A2t2R1Part1_132_85.9_86.1.npy"]
523
+ data_file_format = ".npy"
524
+ model_type = ["double_skew_normal"]
525
+ retention_time_estimate = [22.5]
526
+ peak_width_estimate = 1.5
527
+ pre_filtering = True
528
+ minimum_sn = 5
529
+ timeseries = np.load(
530
+ Path(__file__).absolute().parent.parent / "example" / "A2t2R1Part1_132_85.9_86.1.npy"
531
+ )
532
+ acquisition = "A1t1R1"
533
+ precursor_mz = 132
534
+ product_mz_start = 85.9
535
+ product_mz_end = 86.1
536
+ ui = pl.UserInput(
537
+ path,
538
+ raw_data_files,
539
+ data_file_format,
540
+ model_type,
541
+ retention_time_estimate,
542
+ peak_width_estimate,
543
+ pre_filtering,
544
+ minimum_sn,
545
+ timeseries,
546
+ acquisition,
547
+ precursor_mz,
548
+ product_mz_start,
549
+ product_mz_end,
550
+ )
551
+ filename = "A2t2R1Part1_132_85.9_86.1.npy"
552
+ # add data to df_summary
553
+ df_summary = pl.report_add_data_to_summary(filename, idata, df_summary, ui, True)
554
+ # tests
555
+ assert list(df_summary.columns) == COLUMNS
556
+ assert len(df_summary.index) == 16
557
+ assert list(df_summary.loc[:, "acquisition"]) == len(df_summary.index) * ["A1t1R1"]
558
+ assert list(df_summary.loc[:, "experiment_or_precursor_mz"]) == len(df_summary.index) * [132]
559
+ assert list(df_summary.loc[:, "product_mz_start"]) == len(df_summary.index) * [85.9]
560
+ assert list(df_summary.loc[:, "product_mz_end"]) == len(df_summary.index) * [86.1]
561
+ assert all(df_summary["is_peak"])
562
+ assert all(df_summary["cause_for_rejection"] == "")
563
+ assert list(df_summary.loc[:, "model_type"]) == 16 * ["double_skew_normal"]
564
+ assert list(df_summary.loc[:, "subpeak"]) == 8 * ["1st"] + 8 * ["2nd"]
565
+ pass
566
+
567
+
568
+ def test_parse_unique_identifiers():
569
+ files = [
570
+ "A1t1R1Part2_110_109.9_110.1.npy",
571
+ "A1t1R1Part2_111_109.9_110.1.npy",
572
+ "A1t1R1Part2_111_110.9_111.1.npy",
573
+ "A1t1R1Part2_112_110.9_111.1.npy",
574
+ ]
575
+ unique_identifiers = pl.parse_unique_identifiers(files)
576
+ assert sorted(unique_identifiers) == sorted(
577
+ [
578
+ "110_109.9_110.1",
579
+ "111_109.9_110.1",
580
+ "111_110.9_111.1",
581
+ "112_110.9_111.1",
582
+ ]
583
+ )
584
+ pass
585
+
586
+
587
+ def test_excel_template_prepare():
588
+ path_raw_data = Path(__file__).absolute().parent.parent / "example"
589
+ path_peak_performance = Path(__file__).absolute().parent.parent
590
+ files = ["mp3", "flac", "wav", "m4a"]
591
+ identifiers = ["1", "2", "3"]
592
+ pl.excel_template_prepare(path_raw_data, path_peak_performance, files, identifiers)
593
+ # test whether Template.xlsx was copied from peak-performance to example
594
+ assert Path(path_raw_data / "Template.xlsx").exists()
595
+ # remove Template.xlsx from example
596
+ os.remove(Path(path_raw_data / "Template.xlsx"))
597
+ pass
598
+
599
+
600
+ def test_parse_files_for_model_selection():
601
+ path_peak_performance = Path(__file__).absolute().parent.parent
602
+ # load empty signals sheet from Template.xlsx
603
+ signals = pandas.read_excel(Path(path_peak_performance) / "Template.xlsx", sheet_name="signals")
604
+ signals["unique_identifier"] = ["1", "2", "3", "4", "5", "6", "7"]
605
+ with pytest.raises(pl.InputError):
606
+ files = pl.parse_files_for_model_selection(signals)
607
+ # have one unique_identifier where neither model nor acquisition were given
608
+ # (and multiple different acquisitions were defined for other unique identifiers)
609
+ signals["acquisition_for_choosing_model_type"] = ["A1", "B1", "C1", "D1", "E1", "F1", np.nan]
610
+ signals["model_type"] = 7 * [np.nan]
611
+ with pytest.raises(pl.InputError):
612
+ files = pl.parse_files_for_model_selection(signals)
613
+ # if models for every unique identifier were supplied, the result should be empty
614
+ signals["model_type"] = 7 * ["normal"]
615
+ with pytest.raises(pl.InputError):
616
+ files = pl.parse_files_for_model_selection(signals)
617
+ # mixture of supplied model and supplying different acquisitions for model selection
618
+ signals["acquisition_for_choosing_model_type"] = [np.nan, "B1", "C1", "D1", "E1", "F1", "G1"]
619
+ signals["model_type"] = ["normal"] + 6 * [np.nan]
620
+ files = pl.parse_files_for_model_selection(signals)
621
+ assert files == {"B1_2": "2", "C1_3": "3", "D1_4": "4", "E1_5": "5", "F1_6": "6", "G1_7": "7"}
622
+ # mixture of supplied model and supplying one acquisition for model selection
623
+ signals["acquisition_for_choosing_model_type"] = [np.nan, "B1"] + 5 * [np.nan]
624
+ files = pl.parse_files_for_model_selection(signals)
625
+ assert files == {"B1_2": "2", "B1_3": "3", "B1_4": "4", "B1_5": "5", "B1_6": "6", "B1_7": "7"}
626
+ pass
627
+
628
+
629
+ def test_model_selection_check():
630
+ # case 1: double peak is too close to single peak in elpd score
631
+ result_df = pandas.DataFrame(
632
+ {"elpd_loo": [50, 30, 29, -5], "ic": ["loo", "loo", "loo", "loo"]},
633
+ index=["double_normal", "double_skew_normal", "normal", "skew_normal"],
634
+ )
635
+ selected_model = pl.model_selection_check(result_df, "loo", 25)
636
+ assert selected_model == "normal"
637
+ # case 2: double peak exceeds elpd score difference threshold and is thusly accepted
638
+ result_df = pandas.DataFrame(
639
+ {"elpd_loo": [50, 30, 10, -5], "ic": ["loo", "loo", "loo", "loo"]},
640
+ index=["double_normal", "double_skew_normal", "normal", "skew_normal"],
641
+ )
642
+ selected_model = pl.model_selection_check(result_df, "loo", 25)
643
+ assert selected_model == "double_normal"
644
+ pass
645
+
646
+
647
+ def test_model_selection():
648
+ """
649
+ Test the model_selection function from the pipeline modul.
650
+ The function contains the model selection pipeline.
651
+ """
652
+ path = Path(__file__).absolute().parent.parent / "test_data/test_model_selection"
653
+ # Template.xlsx will be updated so copy it freshly and delete it in the end
654
+ shutil.copy(path / "template/Template.xlsx", path / "Template.xlsx")
655
+ result, model_dict = pl.model_selection(path)
656
+ # make sure that the excluded model was really excluded
657
+ assert "double_normal" not in result.index
658
+ assert "normal" in result.index
659
+ assert "skew_normal" in result.index
660
+ assert model_dict
661
+ os.remove(path / "Template.xlsx")
662
+ pass