peak-performance 0.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,289 @@
1
+ """
2
+ PeakPerformance
3
+ Copyright (C) 2023 Forschungszentrum Jülich GmbH
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU Affero General Public License as published
7
+ by the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU Affero General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Affero General Public License
16
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
17
+ """
18
+
19
+ import os
20
+ from pathlib import Path
21
+ from typing import Sequence, Union
22
+
23
+ import arviz as az
24
+ import numpy as np
25
+ import pandas
26
+ import pymc as pm
27
+ from matplotlib import pyplot as plt
28
+
29
+
30
+ def plot_raw_data(
31
+ identifier: str,
32
+ time: np.ndarray,
33
+ intensity: np.ndarray,
34
+ path: Union[str, os.PathLike],
35
+ save_formats: Sequence[str] = ("png", "svg"),
36
+ ):
37
+ """
38
+ Plot just the raw data in case no peak was found.
39
+
40
+ Parameters
41
+ ----------
42
+ identifier
43
+ Unique identifier of this particular signal (e.g. filename).
44
+ time
45
+ NumPy array with the time values of the relevant timeframe.
46
+ intensity
47
+ NumPy array with the intensity values of the relevant timeframe.
48
+ path
49
+ Path to the folder containing the results of the current run.
50
+ save_formats
51
+ Which file formats to save as.
52
+ Must be supported by `plt.savefig()`, e.g. ``("png", "svg", "pdf")``.
53
+ """
54
+ time = np.array(time)
55
+ intensity = np.array(intensity)
56
+ # plot the data to be able to check if peak detection was correct or not
57
+ fig, ax = plt.subplots()
58
+ ax.scatter(time, intensity, marker="x", color="black", label="data")
59
+ plt.legend()
60
+ ax.set_xlabel("time / min", fontsize=12, fontweight="bold")
61
+ ax.set_ylabel("intensity / a.u.", fontsize=12, fontweight="bold")
62
+ plt.xticks(size=11.5)
63
+ plt.yticks(size=11.5)
64
+ fig.tight_layout()
65
+ for format in save_formats:
66
+ fig.savefig(Path(path) / f"{identifier}_NoPeak.{format}", format=format)
67
+ plt.close(fig)
68
+
69
+ return
70
+
71
+
72
+ def plot_density(
73
+ *, ax, x: np.ndarray, samples, percentiles=(5, 95), percentile_kwargs=None, **kwargs
74
+ ):
75
+ """
76
+ Method to plot the original data points alongside the posterior predictive plot (percentiles marked with a black, dashed line).
77
+ Serves as a more accurate comparison between data and model than comparing data and posterior distribution.
78
+
79
+ Parameters
80
+ ----------
81
+ ax
82
+ Axes of a matplotlib figure.
83
+ x
84
+ Values of the x dimension of the plot (here: time).
85
+ samples
86
+ Posterior predictive samples taken from an inference data obejct.
87
+ percentiles
88
+ Lower and upper percentiles to be plotted.
89
+ **kwargs
90
+ The keyword arguments are used for plotting with ax.plot() and ax.stairs(), e.g. the following:
91
+ linestyle
92
+ Style of the line marking the border of the chosen percentiles (default = "--", i.e. a dashed line).
93
+ color
94
+ Color of the line marking the border of the chosen percentiles (default = "black").
95
+ """
96
+ assert samples.ndim == 2
97
+
98
+ # Step-function mode draws horizontal density bands inbetween the x coordinates
99
+ step_mode = samples.shape[1] == x.shape[0] - 1
100
+ fill_kwargs = {}
101
+ if step_mode:
102
+ samples = np.hstack([samples, samples[:, -1][:, None]])
103
+ fill_kwargs["step"] = "post"
104
+
105
+ # Plot the density band
106
+ pm.gp.util.plot_gp_dist(
107
+ ax=ax,
108
+ x=x,
109
+ samples=samples,
110
+ fill_alpha=1,
111
+ plot_samples=False,
112
+ palette=plt.cm.Blues,
113
+ fill_kwargs=fill_kwargs,
114
+ **kwargs,
115
+ )
116
+
117
+ # Add percentiles for orientation
118
+ pkwargs = dict(
119
+ linestyle="--",
120
+ color="black",
121
+ )
122
+ pkwargs.update(percentile_kwargs or {})
123
+ for p in percentiles:
124
+ values = np.percentile(samples, p, axis=0)
125
+ if step_mode:
126
+ ax.stairs(values[:-1], x, baseline=None, **pkwargs)
127
+ else:
128
+ ax.plot(x, values, **pkwargs)
129
+ pass
130
+
131
+ return
132
+
133
+
134
+ def plot_posterior_predictive(
135
+ identifier: str,
136
+ time: np.ndarray,
137
+ intensity: np.ndarray,
138
+ path: Union[str, os.PathLike],
139
+ idata: az.InferenceData,
140
+ discarded: bool,
141
+ save_formats: Sequence[str] = ("png", "svg"),
142
+ ):
143
+ """
144
+ Save plot of posterior_predictive with 95 % HDI and original data points.
145
+
146
+ Parameters
147
+ ----------
148
+ identifier
149
+ Unique identifier of this particular signal (e.g. filename).
150
+ time
151
+ NumPy array with the time values of the relevant timeframe.
152
+ intensity
153
+ NumPy array with the intensity values of the relevant timeframe.
154
+ path
155
+ Path to the folder containing the results of the current run.
156
+ idata
157
+ Infernce data object.
158
+ discarded
159
+ Alters the name of the saved plot. If True, a "_NoPeak" is added to the name.
160
+ save_formats
161
+ Which file formats to save as.
162
+ Must be supported by `plt.savefig()`, e.g. ``("png", "svg", "pdf")``.
163
+ """
164
+ time = np.array(time)
165
+ intensity = np.array(intensity)
166
+ fig, ax = plt.subplots()
167
+ # plot the posterior predictive
168
+ plot_density(
169
+ ax=ax,
170
+ x=time,
171
+ samples=idata.posterior_predictive.y.stack(sample=("chain", "draw")).T.values,
172
+ percentiles=(2.5, 97.5),
173
+ )
174
+ # plot the raw data points
175
+ ax.scatter(time, intensity, marker="x", color="black", label="data")
176
+ ax.set_xlabel("time / min", fontsize=11.5, fontweight="bold")
177
+ ax.set_ylabel("intensity / a.u.", fontsize=11.5, fontweight="bold")
178
+ plt.xticks(size=11.5)
179
+ plt.yticks(size=11.5)
180
+ plt.legend()
181
+ fig.tight_layout()
182
+ # if signal was discarded, add a "_NoPeak" to the file name
183
+ if discarded:
184
+ for format in save_formats:
185
+ fig.savefig(
186
+ Path(path) / f"{identifier}_predictive_posterior_NoPeak.{format}", format=format
187
+ )
188
+ else:
189
+ for format in save_formats:
190
+ fig.savefig(Path(path) / f"{identifier}_predictive_posterior.{format}", format=format)
191
+ plt.close(fig)
192
+
193
+ return
194
+
195
+
196
+ def plot_posterior(
197
+ identifier: str,
198
+ time: np.ndarray,
199
+ intensity: np.ndarray,
200
+ path: Union[str, os.PathLike],
201
+ idata: az.InferenceData,
202
+ discarded: bool,
203
+ save_formats: Sequence[str] = ("png", "svg"),
204
+ ):
205
+ """
206
+ Saves plot of posterior, estimated baseline, and original data points.
207
+
208
+ Parameters
209
+ ----------
210
+ identifier
211
+ Unique identifier of this particular signal (e.g. filename).
212
+ time
213
+ NumPy array with the time values of the relevant timeframe.
214
+ intensity
215
+ NumPy array with the intensity values of the relevant timeframe.
216
+ path
217
+ Path to the folder containing the results of the current run.
218
+ idata
219
+ Infernce data object.
220
+ discarded
221
+ Alters the name of the saved plot. If True, a "_NoPeak" is added to the name.
222
+ save_formats
223
+ Which file formats to save as.
224
+ Must be supported by `plt.savefig()`, e.g. ``("png", "svg", "pdf")``.
225
+ """
226
+ time = np.array(time)
227
+ intensity = np.array(intensity)
228
+ az_summary: pandas.DataFrame = az.summary(idata)
229
+
230
+ fig, ax = plt.subplots()
231
+ # plot the posterior
232
+ pm.gp.util.plot_gp_dist(
233
+ ax=ax,
234
+ x=time,
235
+ samples=idata.posterior.y.stack(sample=("chain", "draw")).T.values,
236
+ )
237
+ # plot the raw data points
238
+ ax.scatter(time, intensity, marker="x", color="black", label="data")
239
+ # plot the baseline
240
+ x = np.array(ax.get_xlim())
241
+ y = az_summary.loc["baseline_intercept", "mean"] + az_summary.loc["baseline_slope", "mean"] * x
242
+ plt.plot(x, y)
243
+ plt.legend()
244
+ ax.set_xlabel("time / min", fontsize=12, fontweight="bold")
245
+ ax.set_ylabel("intensity / a.u.", fontsize=12, fontweight="bold")
246
+ plt.xticks(size=11.5)
247
+ plt.yticks(size=11.5)
248
+ fig.tight_layout()
249
+ # if signal was discarded, add a "_NoPeak" to the file name
250
+ if discarded:
251
+ for format in save_formats:
252
+ fig.savefig(Path(path) / f"{identifier}_posterior_NoPeak.{format}", format=format)
253
+ else:
254
+ for format in save_formats:
255
+ fig.savefig(Path(path) / f"{identifier}_posterior.{format}", format=format)
256
+ plt.close(fig)
257
+
258
+ return
259
+
260
+
261
+ def plot_model_comparison(
262
+ df_comp: pandas.DataFrame,
263
+ identifier: str,
264
+ path: Union[str, os.PathLike],
265
+ save_formats: Sequence[str] = ("png", "svg"),
266
+ ):
267
+ """
268
+ Function to plot the results of a model comparison.
269
+
270
+ Parameters
271
+ ----------
272
+ df_comp
273
+ DataFrame containing the ranking of the given models.
274
+ identifier
275
+ Unique identifier of this particular signal (e.g. filename).
276
+ path
277
+ Path to the folder containing the results of the current run.
278
+ save_formats
279
+ Which file formats to save as.
280
+ Must be supported by `plt.savefig()`, e.g. ``("png", "svg", "pdf")``.
281
+ """
282
+ axes = az.plot_compare(df_comp, insample_dev=False)
283
+ fig = axes.figure
284
+ plt.tight_layout()
285
+ for format in save_formats:
286
+ fig.savefig(Path(path) / f"model_comparison_{identifier}.{format}", format=format)
287
+ plt.close(fig)
288
+
289
+ return
@@ -0,0 +1,4 @@
1
+ def test_version():
2
+ import peak_performance as pp
3
+
4
+ assert pp.__version__.count(".") == 2
@@ -0,0 +1,196 @@
1
+ from pathlib import Path
2
+
3
+ import arviz as az
4
+ import numpy as np
5
+ import pymc as pm
6
+ import pytest
7
+ import scipy.integrate
8
+ import scipy.stats as st
9
+
10
+ from peak_performance import models
11
+
12
+
13
+ def test_initial_guesses():
14
+ # define time and intensity for example with known result
15
+ time = 2 + 0.1 * np.arange(17)
16
+ intensity = [1, 5, 3] + 11 * [1000] + [7, 9, 11]
17
+ # define expected results
18
+ expected_noise_width = np.ptp([1, 5, 3, 7, 9, 11])
19
+ expected_baseline_fit = st.linregress([2, 2.1, 2.2, 3.4, 3.5, 3.6], [1, 5, 3, 7, 9, 11])
20
+ # get the values from the initial guesses function
21
+ slope, intercept, noise_width = models.initial_guesses(time, intensity)
22
+ # compare the outcome with the expected values
23
+ assert expected_baseline_fit.slope == slope
24
+ assert expected_baseline_fit.intercept == intercept
25
+ assert expected_noise_width == noise_width
26
+ pass
27
+
28
+
29
+ class TestDistributions:
30
+ def test_normal_posterior(self):
31
+ x = np.linspace(-5, 10, 10000)
32
+ expected = st.norm.pdf(x, 3, 2)
33
+ actual_pt = models.normal_posterior(0, x, 3, 2, height=np.max(expected))
34
+ # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
35
+ actual = actual_pt.eval().astype(float)
36
+ expected = expected.astype(float)
37
+ # testing; allow minor difference due to differences in float precision etc.
38
+ np.testing.assert_allclose(expected, actual, atol=0.0000001)
39
+ pass
40
+
41
+ def test_double_normal_posterior(self):
42
+ x = np.linspace(5, 12, 10000)
43
+ y1 = st.norm.pdf(x, loc=7.5, scale=0.6)
44
+ y2 = st.norm.pdf(x, loc=9, scale=0.4) * 2
45
+ y_double_pt = models.double_normal_posterior(
46
+ 0, x, (7.5, 9), (0.6, 0.4), height=(np.max(y1), np.max(y2))
47
+ )
48
+ y_double = y_double_pt.eval().astype(float)
49
+ np.testing.assert_allclose(y1 + y2, y_double, rtol=1, atol=1e-20)
50
+ pass
51
+
52
+ def test_height_calculation_without_baseline(self):
53
+ x = np.linspace(-1, 5.5, 10000)
54
+ mean = 1.2
55
+ std = 1.1
56
+ alpha = 3
57
+ y = st.skewnorm.pdf(x, alpha, loc=mean, scale=std)
58
+ area = 1
59
+ # find the x value to the maximum y value, i.e. the mode
60
+ expected_mode_skew = x[np.argmax(y)]
61
+ expected_height = np.max(y)
62
+ # calculate actual values
63
+ delta = models.delta_calculation(alpha)
64
+ mue_z = models.mue_z_calculation(delta)
65
+ sigma_z = models.sigma_z_calculation(mue_z)
66
+ skewness = models.skewness_calculation(delta)
67
+ mode_offset_pt = models.mode_offset_calculation(mue_z, skewness, sigma_z, alpha)
68
+ mode_skew_pt = models.mode_skew_calculation(mean, std, mode_offset_pt)
69
+ height_pt = models.height_calculation(area, mean, std, alpha, mode_skew_pt)
70
+ # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
71
+ actual_mode = mode_skew_pt.eval().astype(float)
72
+ actual_height = height_pt.eval().astype(float)
73
+ # testing; allow minor difference due to differences in float precision etc.
74
+ np.testing.assert_allclose(expected_height, actual_height, atol=2e-5)
75
+ np.testing.assert_allclose(expected_mode_skew, actual_mode, atol=1e-2)
76
+ pass
77
+
78
+ def test_height_calculation_with_linear_baseline(self):
79
+ x = np.linspace(-1, 5.5, 1000000)
80
+ mean = 1.2
81
+ std = 1.1
82
+ alpha = 3
83
+ baseline = 0.04 * x + 0.3
84
+ y = st.skewnorm.pdf(x, alpha, loc=mean, scale=std) + baseline
85
+ area = 1
86
+ # find the x value to the maximum y value, i.e. the mode
87
+ imax = np.argmax(y - baseline)
88
+ expected_mode_skew = x[imax]
89
+ expected_height = y[imax] - baseline[imax]
90
+
91
+ # calculate actual values
92
+ delta = models.delta_calculation(alpha)
93
+ mue_z = models.mue_z_calculation(delta)
94
+ sigma_z = models.sigma_z_calculation(mue_z)
95
+ skewness = models.skewness_calculation(delta)
96
+ mode_offset_pt = models.mode_offset_calculation(mue_z, skewness, sigma_z, alpha)
97
+ mode_skew_pt = models.mode_skew_calculation(mean, std, mode_offset_pt)
98
+ height_pt = models.height_calculation(area, mean, std, alpha, mode_skew_pt)
99
+ # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
100
+ actual_mode = mode_skew_pt.eval().astype(float)
101
+ actual_height = height_pt.eval().astype(float)
102
+ # testing; allow slight difference due to shift of distribution by baseline
103
+ # (this numerical calculation does not consider the baseline)
104
+ np.testing.assert_allclose(expected_height, actual_height, atol=1e-4)
105
+ np.testing.assert_allclose(expected_mode_skew, actual_mode, atol=5e-3)
106
+ pass
107
+
108
+ def test_skew_normal_posterior(self):
109
+ x = np.linspace(-1, 5.5, 10000)
110
+ # test first with positive alpha
111
+ expected = st.skewnorm.pdf(x, 3, loc=1.2, scale=1.1)
112
+ actual_pt = models.skew_normal_posterior(0, x, 1.2, 1.1, 3, area=1)
113
+ # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
114
+ actual = actual_pt.eval().astype(float)
115
+ expected = expected.astype(float)
116
+ # testing; allow minor difference due to differences in float precision etc.
117
+ np.testing.assert_allclose(expected, actual, atol=1e-8)
118
+
119
+ # test again with negative alpha
120
+ expected = st.skewnorm.pdf(x, -3, loc=1.2, scale=1.1)
121
+ actual_pt = models.skew_normal_posterior(0, x, 1.2, 1.1, -3, area=1)
122
+ # cast arrays to float data type in order to avoid error of np.testing.assert_allclose() due to using np.isfinite under the hood
123
+ actual = actual_pt.eval().astype(float)
124
+ expected = expected.astype(float)
125
+ # testing; allow minor difference due to differences in float precision etc.
126
+ np.testing.assert_allclose(expected, actual, atol=1e-8)
127
+ pass
128
+
129
+ def test_compare_normal_and_skew_as_normal(self):
130
+ """A skew normal distribution with skewness alpha = 0 should be a normal distribution. Test if that is so for our distributions."""
131
+ x = np.linspace(-10, 10, 10000)
132
+ y = st.norm.pdf(x, loc=1, scale=0.5)
133
+ height = np.max(y)
134
+ area = scipy.integrate.quad(lambda x: st.norm.pdf(x, loc=1, scale=1), -10, 10)[0]
135
+ x = np.linspace(-10, 10, 10000)
136
+ y_actual_pt = models.normal_posterior(0, x, 1, 1, height=height)
137
+ y_skew_actual_pt = models.skew_normal_posterior(0, x, 1, 1, 0, area=area)
138
+ y_actual = y_actual_pt.eval().astype(float)
139
+ y_skew_actual = y_skew_actual_pt.eval().astype(float)
140
+ # many values are extremely close to zero so rtol was increased.
141
+ # As guaranteed by the absurdly low atol, this will not mask any actual differences.
142
+ np.testing.assert_allclose(y_skew_actual, y_actual, atol=1e-20, rtol=0.9)
143
+ pass
144
+
145
+ def test_double_skew_normal_posterior(self):
146
+ x1 = np.arange(4, 6, 0.1)
147
+ x2 = np.arange(6, 8, 0.1)
148
+ alpha = 5
149
+ y1 = st.skewnorm.pdf(x1, alpha, loc=5, scale=0.2)
150
+ y2 = st.skewnorm.pdf(x2, alpha, loc=6.3, scale=0.2)
151
+ time = np.array(list(x1) + list(x2))
152
+ intensity = np.array(list(y1) + list(y2))
153
+ y_double_pt = models.double_skew_normal_posterior(
154
+ 0, time, (5, 6.3), (0.2, 0.2), (5, 5), area=(1, 1)
155
+ )
156
+ y_double = y_double_pt.eval().astype(float)
157
+ np.testing.assert_allclose(intensity, y_double, rtol=1, atol=1e-20)
158
+
159
+
160
+ @pytest.mark.parametrize(
161
+ "model_type", ["normal", "skew_normal", "double_normal", "double_skew_normal"]
162
+ )
163
+ def test_pymc_sampling(model_type):
164
+ timeseries = np.load(
165
+ Path(__file__).absolute().parent.parent / "example" / "A2t2R1Part1_132_85.9_86.1.npy"
166
+ )
167
+
168
+ if model_type == models.ModelType.Normal:
169
+ pmodel = models.define_model_normal(timeseries[0], timeseries[1])
170
+ elif model_type == models.ModelType.SkewNormal:
171
+ pmodel = models.define_model_skew(timeseries[0], timeseries[1])
172
+ elif model_type == models.ModelType.DoubleNormal:
173
+ pmodel = models.define_model_double_normal(timeseries[0], timeseries[1])
174
+ elif model_type == models.ModelType.DoubleSkewNormal:
175
+ pmodel = models.define_model_double_skew_normal(timeseries[0], timeseries[1])
176
+ with pmodel:
177
+ idata = pm.sample(cores=2, chains=2, tune=3, draws=5)
178
+ if model_type in [models.ModelType.DoubleNormal, models.ModelType.DoubleSkewNormal]:
179
+ summary = az.summary(idata)
180
+ # test whether the ordered transformation and the subpeak dimension work as intended
181
+ assert summary.loc["mean[0]", "mean"] < summary.loc["mean[1]", "mean"]
182
+ # assert summary.loc["area[0]", "mean"] < summary.loc["area[1]", "mean"]
183
+ pass
184
+
185
+
186
+ def test_model_comparison():
187
+ path = Path(__file__).absolute().parent.parent / "test_data/test_model_comparison"
188
+ idata_normal = az.from_netcdf(path / "idata_normal.nc")
189
+ idata_skew = az.from_netcdf(path / "idata_skew.nc")
190
+ compare_dict = {
191
+ "normal": idata_normal,
192
+ "skew_normal": idata_skew,
193
+ }
194
+ ranking = models.model_comparison(compare_dict)
195
+ assert ranking.index[0] == "skew_normal"
196
+ pass