avoca 0.11.3__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avoca/bindings/ebas.py +57 -8
- avoca/bindings/ebas_flags.py +2 -8
- avoca/bindings/qa_tool.py +65 -1
- avoca/flags.py +8 -0
- avoca/plots.py +122 -0
- avoca/qa_class/abstract.py +8 -3
- avoca/qa_class/rt.py +52 -17
- avoca/qa_class/zscore.py +1 -1
- {avoca-0.11.3.dist-info → avoca-0.12.0.dist-info}/METADATA +1 -1
- {avoca-0.11.3.dist-info → avoca-0.12.0.dist-info}/RECORD +12 -11
- {avoca-0.11.3.dist-info → avoca-0.12.0.dist-info}/WHEEL +0 -0
- {avoca-0.11.3.dist-info → avoca-0.12.0.dist-info}/licenses/LICENCE.txt +0 -0
avoca/bindings/ebas.py
CHANGED
|
@@ -31,6 +31,18 @@ ebas_compname_of_var = {
|
|
|
31
31
|
ebas_compname_to_var = {v: k for k, v in ebas_compname_of_var.items()}
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
# Additional variables that can be in the dataset (not compound dependant)
|
|
35
|
+
additional_vars = [
|
|
36
|
+
"temperature",
|
|
37
|
+
"pressure",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
titles = {
|
|
41
|
+
"temperature": "T_inlet",
|
|
42
|
+
"pressure": "P_inlet",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
34
46
|
class DataLevel(IntEnum):
|
|
35
47
|
"""Values for different type of data used by ebas."""
|
|
36
48
|
|
|
@@ -57,6 +69,7 @@ def set_dataframe(
|
|
|
57
69
|
start_offset: timedelta | None = None,
|
|
58
70
|
end_offset: timedelta | None = None,
|
|
59
71
|
flag_all: list[int] = [],
|
|
72
|
+
invalidate_conc_calib: bool = True,
|
|
60
73
|
):
|
|
61
74
|
"""Put the data from the export dataframe into the nas object.
|
|
62
75
|
|
|
@@ -68,7 +81,10 @@ def set_dataframe(
|
|
|
68
81
|
:arg data_level: The level of the data to export.
|
|
69
82
|
:arg start_offset: The offset to add to the start time
|
|
70
83
|
:arg end_offset: The offset to add to the end time
|
|
71
|
-
|
|
84
|
+
:arg flag_all: List of flags to add to all the data
|
|
85
|
+
:arg invalidate_conc_calib: If True, the concentration calibration
|
|
86
|
+
will be invalidated (flag 980) for all calib samples.
|
|
87
|
+
:returns: A dictionary with the metadata of the compounds exported.
|
|
72
88
|
"""
|
|
73
89
|
|
|
74
90
|
if ("-", "start_datetime") not in df_export.columns:
|
|
@@ -104,6 +120,8 @@ def set_dataframe(
|
|
|
104
120
|
"rt": "s",
|
|
105
121
|
"w": "s",
|
|
106
122
|
"area": "area_unit",
|
|
123
|
+
"temperature": "K",
|
|
124
|
+
"pressure": "hPa",
|
|
107
125
|
}
|
|
108
126
|
|
|
109
127
|
ebas_varname_of_var = {
|
|
@@ -119,6 +137,7 @@ def set_dataframe(
|
|
|
119
137
|
|
|
120
138
|
# Export calibration status if given by the user
|
|
121
139
|
status_col = ("-", "status")
|
|
140
|
+
empty_flags = [[]] * len(df_export)
|
|
122
141
|
if (status_col in df_export.columns) and (data_level not in concs_data_levels):
|
|
123
142
|
metadata = DataObject()
|
|
124
143
|
metadata.comp_name = "status"
|
|
@@ -126,13 +145,38 @@ def set_dataframe(
|
|
|
126
145
|
metadata.matrix = "instrument"
|
|
127
146
|
metadata.unit = "no unit"
|
|
128
147
|
values = [val for val in df_export[status_col]]
|
|
129
|
-
flags = [[] for _ in df_export[status_col]]
|
|
130
148
|
nas.variables.append(
|
|
131
|
-
DataObject(
|
|
149
|
+
DataObject(
|
|
150
|
+
values_=values, flags=empty_flags, flagcol=True, metadata=metadata
|
|
151
|
+
)
|
|
132
152
|
)
|
|
133
153
|
|
|
134
|
-
|
|
154
|
+
for var in additional_vars:
|
|
155
|
+
var_col = ("-", var)
|
|
156
|
+
if var_col not in df_export.columns:
|
|
157
|
+
continue
|
|
158
|
+
metadata = DataObject()
|
|
159
|
+
metadata.comp_name = var
|
|
160
|
+
metadata.title = titles.get(var, var)
|
|
161
|
+
metadata.matrix = "instrument"
|
|
162
|
+
metadata.unit = unit_of_var[var]
|
|
163
|
+
metadata.cal_scale = ""
|
|
164
|
+
values = [val for val in df_export[var_col]]
|
|
165
|
+
nas.variables.append(
|
|
166
|
+
DataObject(
|
|
167
|
+
values_=values,
|
|
168
|
+
flags=empty_flags,
|
|
169
|
+
flagcol=True,
|
|
170
|
+
metadata=metadata,
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
this_nan_flags = nan_flags.copy()
|
|
176
|
+
|
|
177
|
+
if data_level in concs_data_levels and invalidate_conc_calib:
|
|
135
178
|
# Set the flag to the invalid value instead of the valid calibration
|
|
179
|
+
this_nan_flags.append(QA_Flag.CALIBRATION)
|
|
136
180
|
dict_flags_to_ebas[QA_Flag.CALIBRATION] = 980
|
|
137
181
|
|
|
138
182
|
for sub in compounds:
|
|
@@ -144,10 +188,7 @@ def set_dataframe(
|
|
|
144
188
|
)
|
|
145
189
|
for flag in flag_col
|
|
146
190
|
]
|
|
147
|
-
nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in
|
|
148
|
-
if data_level == DataLevel.CONCS:
|
|
149
|
-
# Invalidate also the calibration runs
|
|
150
|
-
nan_flag |= flag_col & QA_Flag.CALIBRATION.value
|
|
191
|
+
nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in this_nan_flags])
|
|
151
192
|
|
|
152
193
|
for var in vars_to_export[data_level]:
|
|
153
194
|
ebas_name = compounds[sub]
|
|
@@ -273,6 +314,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
|
|
|
273
314
|
clean_for_df[("-", "status")] = calib_ids.astype(int)
|
|
274
315
|
continue
|
|
275
316
|
|
|
317
|
+
if comp_name in additional_vars:
|
|
318
|
+
clean_for_df[("-", comp_name)] = np.array(values, dtype=float)
|
|
319
|
+
continue
|
|
320
|
+
|
|
276
321
|
# Split the title on the _
|
|
277
322
|
comp_name = comp_name.split("_")
|
|
278
323
|
if len(comp_name) == 1:
|
|
@@ -288,6 +333,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
|
|
|
288
333
|
elif len(comp_name) == 3:
|
|
289
334
|
compund, var_first, var_second = comp_name
|
|
290
335
|
variable = f"{var_first}_{var_second}"
|
|
336
|
+
elif len(comp_name) == 4 and comp_name[-1] == "compounds":
|
|
337
|
+
# Concentration of merged compounds
|
|
338
|
+
compund = "_".join(comp_name)
|
|
339
|
+
variable = "C"
|
|
291
340
|
else:
|
|
292
341
|
logger.warning(f"passing {comp_name}, could not be understood. Skipping.")
|
|
293
342
|
continue
|
avoca/bindings/ebas_flags.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# https://projects.nilu.no/ccc/flags/flags.html for more info on what ebas uses
|
|
2
|
-
from avoca.flags import QA_Flag
|
|
2
|
+
from avoca.flags import QA_Flag, nan_flags
|
|
3
3
|
|
|
4
4
|
flags_to_ebas: dict[QA_Flag, int] = {
|
|
5
5
|
QA_Flag.MISSING: 999, # M Missing measurement, unspecified reason
|
|
@@ -40,13 +40,7 @@ if missing_flags:
|
|
|
40
40
|
f"Not all QA flags are mapped to Ebas flags. Missing: {missing_flags}"
|
|
41
41
|
)
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
nan_flags = [
|
|
45
|
-
QA_Flag.MISSING,
|
|
46
|
-
QA_Flag.ZERO_NEG_CONC_EXT,
|
|
47
|
-
QA_Flag.INVALIDATED_EXT,
|
|
48
|
-
QA_Flag.INVALID_VALUES,
|
|
49
|
-
]
|
|
43
|
+
nan_flags = nan_flags
|
|
50
44
|
|
|
51
45
|
# priority of the flag to appear in the output
|
|
52
46
|
# Useful when you can select only one flag value
|
avoca/bindings/qa_tool.py
CHANGED
|
@@ -12,7 +12,7 @@ import numpy as np
|
|
|
12
12
|
import pandas as pd
|
|
13
13
|
import pandas.errors
|
|
14
14
|
|
|
15
|
-
from avoca.bindings.ebas_flags import flag_order, flags_to_ebas
|
|
15
|
+
from avoca.bindings.ebas_flags import flag_order, flags_to_ebas, ebas_flag_to_avoca
|
|
16
16
|
from avoca.flags import QA_Flag
|
|
17
17
|
from avoca.utils import compounds_from_df
|
|
18
18
|
|
|
@@ -207,3 +207,67 @@ def export_EmpaQATool(
|
|
|
207
207
|
logger.info(f"Exported to `{out_filepath}`")
|
|
208
208
|
|
|
209
209
|
return out_filepath
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataFrame:
|
|
213
|
+
"""Read an EmpaQATool export file.
|
|
214
|
+
|
|
215
|
+
Data is exported through : https://voc-qc.nilu.no/ExportData
|
|
216
|
+
|
|
217
|
+
:arg file_path: Path to the EmpaQATool export file.
|
|
218
|
+
|
|
219
|
+
:returns: DataFrame with the data.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
# Pandas skips the 2 empty rows
|
|
223
|
+
df = pd.read_csv(file_path, sep=";", header=2)
|
|
224
|
+
|
|
225
|
+
# Convert the datetime columns
|
|
226
|
+
columns = {}
|
|
227
|
+
to_datetime = lambda x: pd.to_datetime(x, format="%Y-%m-%d %H:%M:%S")
|
|
228
|
+
columns[("-", "datetime_start")] = to_datetime(df["Start"])
|
|
229
|
+
columns[("-", "datetime_end")] = to_datetime(df["End"])
|
|
230
|
+
|
|
231
|
+
# Get the datetime column as the start time
|
|
232
|
+
dt = columns[("-", "datetime_start")].copy()
|
|
233
|
+
if shift is not None:
|
|
234
|
+
dt += shift
|
|
235
|
+
columns[("-", "datetime")] = dt
|
|
236
|
+
|
|
237
|
+
# Last column is empty
|
|
238
|
+
compounds = [ '-'.join(s[:-1]) for col in df.columns if len(s:=col.split("-")) >= 2]
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
for compound in compounds:
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
flag_col = f"{compound}-flag"
|
|
245
|
+
value_col = f"{compound}-value"
|
|
246
|
+
acc_col = f"{compound}-accuracy"
|
|
247
|
+
precision_col = f"{compound}-precision"
|
|
248
|
+
|
|
249
|
+
mapping = {
|
|
250
|
+
"conc": value_col,
|
|
251
|
+
"u_expanded":acc_col,
|
|
252
|
+
"u_precision":precision_col,
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
flag_values = (pd.to_numeric(df[flag_col]) * 1e3).astype(int).mod(1000)
|
|
256
|
+
# Flags are adding 1000 for specifying when set by qa tool or not
|
|
257
|
+
flags = flag_values.apply(
|
|
258
|
+
lambda x: ebas_flag_to_avoca[x].value if x else int(0)
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
for key, value in mapping.items():
|
|
262
|
+
# Since the nan flags values are set to 9999, we need to set them to nan
|
|
263
|
+
serie = pd.to_numeric(df[value])
|
|
264
|
+
mask_nan = flags == QA_Flag.MISSING.value
|
|
265
|
+
serie[mask_nan] = np.nan
|
|
266
|
+
columns[(compound, key)] = serie
|
|
267
|
+
|
|
268
|
+
columns[(compound, "flag")] = flags
|
|
269
|
+
|
|
270
|
+
mask_nan = columns[(compound, "conc")].isna()
|
|
271
|
+
columns[(compound, "flag")][mask_nan] |= QA_Flag.MISSING.value
|
|
272
|
+
|
|
273
|
+
return pd.DataFrame(columns)
|
avoca/flags.py
CHANGED
|
@@ -46,6 +46,14 @@ class QA_Flag(Flag):
|
|
|
46
46
|
# Invalid Values
|
|
47
47
|
INVALID_VALUES = auto()
|
|
48
48
|
|
|
49
|
+
# Flags that are considered to have missing values
|
|
50
|
+
nan_flags = [
|
|
51
|
+
QA_Flag.MISSING,
|
|
52
|
+
QA_Flag.ZERO_NEG_CONC_EXT,
|
|
53
|
+
QA_Flag.INVALIDATED_EXT,
|
|
54
|
+
QA_Flag.INVALID_VALUES,
|
|
55
|
+
]
|
|
56
|
+
|
|
49
57
|
|
|
50
58
|
if __name__ == "__main__":
|
|
51
59
|
# Print the flages and their values
|
avoca/plots.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def plot_historical_comparison(
|
|
6
|
+
df_new: pd.DataFrame, df_hist: pd.DataFrame, compound: str, ax=None
|
|
7
|
+
) -> tuple[plt.Figure, plt.Axes]:
|
|
8
|
+
if ax is None:
|
|
9
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
|
10
|
+
|
|
11
|
+
dt_column = ("-", "datetime")
|
|
12
|
+
|
|
13
|
+
for data_type, df in zip(["Historical", "New"], [df_hist, df_new]):
|
|
14
|
+
if data_type == "Historical":
|
|
15
|
+
color = "blue"
|
|
16
|
+
else:
|
|
17
|
+
color = "red"
|
|
18
|
+
|
|
19
|
+
serie = df[(compound, "conc")]
|
|
20
|
+
dt = df[dt_column]
|
|
21
|
+
if ("-", "type") in df.columns:
|
|
22
|
+
mask_air = df[("-", "type")] == "air"
|
|
23
|
+
serie = serie[mask_air]
|
|
24
|
+
dt = dt[mask_air]
|
|
25
|
+
|
|
26
|
+
ax.scatter(dt, serie, label=data_type, color=color, alpha=0.5, s=4)
|
|
27
|
+
ax.set_title(compound)
|
|
28
|
+
ax.set_xlabel("Date")
|
|
29
|
+
ax.set_ylabel("Concentration (ppt)")
|
|
30
|
+
ax.legend()
|
|
31
|
+
return fig, ax
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def plot_yearly_data(
|
|
35
|
+
df: pd.DataFrame, compound: str, ax=None
|
|
36
|
+
) -> tuple[plt.Figure, plt.Axes]:
|
|
37
|
+
if ax is None:
|
|
38
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
|
39
|
+
|
|
40
|
+
dt_column = ("-", "datetime")
|
|
41
|
+
serie = df[(compound, "conc")]
|
|
42
|
+
dt = df[dt_column]
|
|
43
|
+
if ("-", "type") in df.columns:
|
|
44
|
+
mask_air = df[("-", "type")] == "air"
|
|
45
|
+
serie = serie[mask_air]
|
|
46
|
+
dt = dt[mask_air]
|
|
47
|
+
|
|
48
|
+
years = dt.dt.year.unique()
|
|
49
|
+
x = dt.dt.day_of_year + dt.dt.hour / 24.0
|
|
50
|
+
for year in years:
|
|
51
|
+
mask_year = dt.dt.year == year
|
|
52
|
+
ax.scatter(x[mask_year], serie[mask_year], label=str(year), alpha=0.5, s=4)
|
|
53
|
+
|
|
54
|
+
ax.set_title(compound)
|
|
55
|
+
ax.set_xlabel("Time of Year")
|
|
56
|
+
ax.set_ylabel("Concentration (ppt)")
|
|
57
|
+
|
|
58
|
+
# Add ticks with the mounths
|
|
59
|
+
month_starts = pd.date_range(start="2024-01-01", end="2025-01-01", freq="MS")
|
|
60
|
+
month_days = month_starts.dayofyear
|
|
61
|
+
month_labels = month_starts.strftime("%b")
|
|
62
|
+
ax.set_xticks(month_days)
|
|
63
|
+
ax.set_xticklabels(month_labels)
|
|
64
|
+
ax.legend()
|
|
65
|
+
return fig, ax
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def plot_yearly_plotly(
|
|
69
|
+
df: pd.DataFrame,
|
|
70
|
+
compound: str,
|
|
71
|
+
df_new: pd.DataFrame | None = None,
|
|
72
|
+
) -> "plotly.graph_objs._figure.Figure":
|
|
73
|
+
"""Plot yearly data using plotly."""
|
|
74
|
+
import plotly.express as px
|
|
75
|
+
import plotly.graph_objects as go
|
|
76
|
+
|
|
77
|
+
dt_column = ("-", "datetime")
|
|
78
|
+
serie = df[(compound, "conc")]
|
|
79
|
+
dt = df[dt_column]
|
|
80
|
+
if ("-", "type") in df.columns:
|
|
81
|
+
mask_air = df[("-", "type")] == "air"
|
|
82
|
+
serie = serie[mask_air]
|
|
83
|
+
dt = dt[mask_air]
|
|
84
|
+
if ("-", "type") in df_new.columns:
|
|
85
|
+
mask_air_new = df_new[("-", "type")] == "air"
|
|
86
|
+
df_new = df_new[mask_air_new]
|
|
87
|
+
|
|
88
|
+
x = dt.dt.day_of_year + dt.dt.hour / 24.0
|
|
89
|
+
df_to_plot = pd.DataFrame(
|
|
90
|
+
{
|
|
91
|
+
"conc": serie.values,
|
|
92
|
+
"year": dt.dt.year.values,
|
|
93
|
+
},
|
|
94
|
+
index=x.values,
|
|
95
|
+
)
|
|
96
|
+
# Break down by year, to have year as columns and conc as values
|
|
97
|
+
df_to_plot = df_to_plot.pivot_table(
|
|
98
|
+
index=df_to_plot.index, columns="year", values="conc"
|
|
99
|
+
)
|
|
100
|
+
fig = px.scatter(df_to_plot)
|
|
101
|
+
x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
|
|
102
|
+
|
|
103
|
+
dt_new = df_new[dt_column]
|
|
104
|
+
fig.add_trace(
|
|
105
|
+
go.Scatter(
|
|
106
|
+
x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
|
|
107
|
+
y=df_new[(compound, "conc")],
|
|
108
|
+
mode="markers",
|
|
109
|
+
name="New Data",
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
fig.update_layout(
|
|
113
|
+
xaxis_title="Time of Year",
|
|
114
|
+
yaxis_title=f"{compound} (ppt)",
|
|
115
|
+
xaxis=dict(
|
|
116
|
+
tickmode="array",
|
|
117
|
+
tickvals=x_values.dayofyear,
|
|
118
|
+
ticktext=x_values.strftime("%b"),
|
|
119
|
+
),
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
return fig
|
avoca/qa_class/abstract.py
CHANGED
|
@@ -4,13 +4,17 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Sequence
|
|
7
|
+
from typing import TYPE_CHECKING, Sequence
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
11
|
from avoca.flags import QA_Flag
|
|
12
12
|
from avoca.requirements import PythonPackageRequirement
|
|
13
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from matplotlib.axes import Axes
|
|
16
|
+
from matplotlib.figure import Figure
|
|
17
|
+
|
|
14
18
|
|
|
15
19
|
class AbstractQA_Assigner(ABC):
|
|
16
20
|
"""Abstract class for QA assigners.
|
|
@@ -74,10 +78,11 @@ class AbstractQA_Assigner(ABC):
|
|
|
74
78
|
stopp: pd.Timestamp = pd.Timestamp.max,
|
|
75
79
|
name: str | None = None,
|
|
76
80
|
runtypes: list[str] = None,
|
|
81
|
+
log_level: int = logging.INFO,
|
|
77
82
|
):
|
|
78
83
|
"""Create a new QA assigner."""
|
|
79
84
|
self.logger = logging.getLogger(type(self).__name__)
|
|
80
|
-
self.logger.setLevel(
|
|
85
|
+
self.logger.setLevel(log_level)
|
|
81
86
|
|
|
82
87
|
self.name = name or type(self).__name__
|
|
83
88
|
|
|
@@ -161,6 +166,6 @@ class AbstractQA_Assigner(ABC):
|
|
|
161
166
|
raise NotImplementedError
|
|
162
167
|
|
|
163
168
|
# Optional method
|
|
164
|
-
def plot(self):
|
|
169
|
+
def plot(self) -> tuple[Figure, Sequence[Axes]]:
|
|
165
170
|
"""Plot the QA assigner."""
|
|
166
171
|
raise NotImplementedError(f"{type(self).__name__} does not have a plot method.")
|
avoca/qa_class/rt.py
CHANGED
|
@@ -17,15 +17,38 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
17
17
|
retention times of the measurements.
|
|
18
18
|
The correlation is usually very high. If one compound has a low correlation
|
|
19
19
|
with the others, it probably means that is was miss-assigned at some points.
|
|
20
|
+
|
|
21
|
+
:param rt_threshold: The threshold for the retention time deviation.
|
|
22
|
+
Unit is time unit (minutes or seconds, as in the data).
|
|
23
|
+
This will try to fit a linear regression from the average training
|
|
24
|
+
retention times to the measured ones for each sample.
|
|
25
|
+
If after the regression a datapoint is higher than this threshold,
|
|
26
|
+
it will be removed.
|
|
27
|
+
:param rt_relative_max_deviation: The maximum relative deviation allowed
|
|
28
|
+
from the average retention time.
|
|
29
|
+
This is used to remove outliers that are too far from the average.
|
|
30
|
+
if 0.5 is given, it means that the retention time can be 50% higher or lower
|
|
31
|
+
than the average retention time.
|
|
20
32
|
"""
|
|
21
33
|
|
|
22
34
|
runtypes: list[str] = ["air", "std"]
|
|
35
|
+
variable: str = "rt"
|
|
23
36
|
flag = QA_Flag.SUSPICIOUS_RT
|
|
24
37
|
|
|
25
|
-
RT_THRESHOLD: float = 2.0
|
|
26
|
-
|
|
27
38
|
rt_ref: pd.Series
|
|
28
39
|
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
rt_threshold: float = 2.0,
|
|
43
|
+
rt_relative_max_deviation: float = 0.2,
|
|
44
|
+
poly_order: int = 1,
|
|
45
|
+
**kwargs,
|
|
46
|
+
):
|
|
47
|
+
super().__init__(**kwargs)
|
|
48
|
+
self.rt_threshold = rt_threshold
|
|
49
|
+
self.rt_relative_max_deviation = rt_relative_max_deviation
|
|
50
|
+
self.poly_order = poly_order
|
|
51
|
+
|
|
29
52
|
def fit(self, df: pd.DataFrame):
|
|
30
53
|
cols = [(compound, "rt") for compound in self.compounds]
|
|
31
54
|
|
|
@@ -49,6 +72,7 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
49
72
|
|
|
50
73
|
# Get a dataframe for a mean reference
|
|
51
74
|
self.rt_ref = df_rt.median(axis="index")
|
|
75
|
+
self.rt_std = df_rt.std(axis="index")
|
|
52
76
|
|
|
53
77
|
def assign(self, df: pd.DataFrame) -> dict[str, pd.Index]:
|
|
54
78
|
"""Assing flags when expected rt values does not match the measured ones."""
|
|
@@ -58,27 +82,29 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
58
82
|
df_rt = df[rt_cols]
|
|
59
83
|
# Take the reference retention times
|
|
60
84
|
x = self.rt_ref.loc[rt_cols].to_numpy()
|
|
85
|
+
std = self.rt_std.loc[rt_cols].to_numpy()
|
|
61
86
|
|
|
62
87
|
outliers = {}
|
|
63
88
|
|
|
64
89
|
for t, row in df_rt.iterrows():
|
|
65
90
|
# Make a lin reg line
|
|
66
91
|
y = row.to_numpy()
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
self.
|
|
70
|
-
|
|
71
|
-
" measured"
|
|
72
|
-
)
|
|
73
|
-
continue
|
|
92
|
+
# Remove the points that are too far from the reference
|
|
93
|
+
mask_bad = (
|
|
94
|
+
(np.abs(y - x) / x) > self.rt_relative_max_deviation
|
|
95
|
+
) | np.isnan(y)
|
|
74
96
|
|
|
75
|
-
|
|
76
|
-
f = np.poly1d(params)
|
|
77
|
-
y_lin_reg = f(x)
|
|
97
|
+
if np.sum(~mask_bad) > self.poly_order + 2:
|
|
78
98
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
99
|
+
params = np.polyfit(x[~mask_bad], y[~mask_bad], self.poly_order)
|
|
100
|
+
f = np.poly1d(params)
|
|
101
|
+
y_lin_reg = f(x)
|
|
102
|
+
|
|
103
|
+
# Get the points which are too far from the reg line
|
|
104
|
+
error = y - y_lin_reg
|
|
105
|
+
mask_bad |= np.abs(error) > self.rt_threshold
|
|
106
|
+
|
|
107
|
+
if any(mask_bad):
|
|
82
108
|
outliers[t] = mask_bad
|
|
83
109
|
|
|
84
110
|
# Create a dataframe with the flags
|
|
@@ -95,12 +121,12 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
95
121
|
|
|
96
122
|
import matplotlib.pyplot as plt
|
|
97
123
|
|
|
98
|
-
fig, ax = plt.subplots()
|
|
124
|
+
fig, ax = plt.subplots(figsize=(16, 9))
|
|
99
125
|
|
|
100
126
|
assigned = self.assign(self.df_train)
|
|
101
127
|
|
|
102
128
|
for compound in self.compounds:
|
|
103
|
-
ax.scatter(
|
|
129
|
+
points = ax.scatter(
|
|
104
130
|
self.df_train.index,
|
|
105
131
|
self.df_train[(compound, "rt")],
|
|
106
132
|
label=compound,
|
|
@@ -115,6 +141,15 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
115
141
|
color="red",
|
|
116
142
|
marker="x",
|
|
117
143
|
)
|
|
144
|
+
# Line for the mean retention time
|
|
145
|
+
ax.axhline(
|
|
146
|
+
self.rt_ref[(compound, "rt")],
|
|
147
|
+
color=points.get_facecolor()[0],
|
|
148
|
+
linestyle="--",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
ax.set_ylabel("Retention time")
|
|
152
|
+
ax.set_xlabel("Sample")
|
|
118
153
|
|
|
119
154
|
ax.legend()
|
|
120
155
|
plt.show()
|
avoca/qa_class/zscore.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avoca
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.12.0
|
|
4
4
|
Summary: @voc@: Quality assessement of measurement data
|
|
5
5
|
Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
|
|
6
6
|
Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
|
|
@@ -1,35 +1,36 @@
|
|
|
1
1
|
avoca/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
avoca/export_nas.py,sha256=B9B2iFSzB3f83nCfe2_vzouRblthK0_dGF8W3o0Kt5Y,155
|
|
3
3
|
avoca/flagging.py,sha256=tg6k_TVHRXiMJCAij_kUS-S2gSshYt7FKvQ0nJdljYs,2328
|
|
4
|
-
avoca/flags.py,sha256=
|
|
4
|
+
avoca/flags.py,sha256=wobuZoIJh6dFsdiqqYJLZ_AHe4pcFE9tjuoimNXLjIQ,1428
|
|
5
5
|
avoca/io.py,sha256=67D5x1qkLqWC7wWehyOfX96L4H3-tn9x2V4jMCoIRqA,729
|
|
6
6
|
avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
|
|
7
7
|
avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
|
|
8
|
+
avoca/plots.py,sha256=uEo0rTCwQ0iygTaycYPlbtcqNbJpDQd7xjvis686lD4,3567
|
|
8
9
|
avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
|
|
9
10
|
avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
|
|
10
11
|
avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
avoca/bindings/ebas.py,sha256=
|
|
12
|
-
avoca/bindings/ebas_flags.py,sha256=
|
|
12
|
+
avoca/bindings/ebas.py,sha256=48cYR-jwc3GMZCVhYYbMVUj1RgFAoQNtQC2kOpA1iAA,18827
|
|
13
|
+
avoca/bindings/ebas_flags.py,sha256=nts47BB74vDlAbecUatXtjeyL3SALLCt3fRl6BfdOS4,2388
|
|
13
14
|
avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
|
|
14
15
|
avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
|
|
15
16
|
avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
|
|
16
17
|
avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
|
|
17
|
-
avoca/bindings/qa_tool.py,sha256=
|
|
18
|
+
avoca/bindings/qa_tool.py,sha256=ninHe3mrJ8GULxRCkRTZixw-vmNhqu4zwwONd5aXd1Q,9735
|
|
18
19
|
avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
|
|
19
20
|
avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
-
avoca/qa_class/abstract.py,sha256=
|
|
21
|
+
avoca/qa_class/abstract.py,sha256=KCK9OhKNWlMje-5D0hgMIf-g64D_kRwRsoCZ_R4VuqI,5612
|
|
21
22
|
avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
|
|
22
23
|
avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
|
|
23
24
|
avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
|
|
24
|
-
avoca/qa_class/rt.py,sha256=
|
|
25
|
+
avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
|
|
25
26
|
avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
|
|
26
|
-
avoca/qa_class/zscore.py,sha256=
|
|
27
|
+
avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
|
|
27
28
|
avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
|
|
28
29
|
avoca/testing/df.py,sha256=Nc0GUYTApZgYyUTMnHMTbSKLiA5ty9Bg7gUGtnoFYMI,1826
|
|
29
30
|
avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
|
|
30
31
|
avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
|
|
31
32
|
avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
|
|
32
|
-
avoca-0.
|
|
33
|
-
avoca-0.
|
|
34
|
-
avoca-0.
|
|
35
|
-
avoca-0.
|
|
33
|
+
avoca-0.12.0.dist-info/METADATA,sha256=4xD5mqScWJDeAnbM3vFfKLxaszOLz0GlDPvjE1Ej_vw,1570
|
|
34
|
+
avoca-0.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
35
|
+
avoca-0.12.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
|
|
36
|
+
avoca-0.12.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|