avoca 0.11.3__py3-none-any.whl → 0.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avoca/bindings/ebas.py +57 -8
- avoca/qa_class/abstract.py +8 -3
- avoca/qa_class/rt.py +52 -17
- avoca/qa_class/zscore.py +1 -1
- {avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/METADATA +1 -1
- {avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/RECORD +8 -8
- {avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/WHEEL +0 -0
- {avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/licenses/LICENCE.txt +0 -0
avoca/bindings/ebas.py
CHANGED
|
@@ -31,6 +31,18 @@ ebas_compname_of_var = {
|
|
|
31
31
|
ebas_compname_to_var = {v: k for k, v in ebas_compname_of_var.items()}
|
|
32
32
|
|
|
33
33
|
|
|
34
|
+
# Additional variables that can be in the dataset (not compound dependant)
|
|
35
|
+
additional_vars = [
|
|
36
|
+
"temperature",
|
|
37
|
+
"pressure",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
titles = {
|
|
41
|
+
"temperature": "T_inlet",
|
|
42
|
+
"pressure": "P_inlet",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
34
46
|
class DataLevel(IntEnum):
|
|
35
47
|
"""Values for different type of data used by ebas."""
|
|
36
48
|
|
|
@@ -57,6 +69,7 @@ def set_dataframe(
|
|
|
57
69
|
start_offset: timedelta | None = None,
|
|
58
70
|
end_offset: timedelta | None = None,
|
|
59
71
|
flag_all: list[int] = [],
|
|
72
|
+
invalidate_conc_calib: bool = True,
|
|
60
73
|
):
|
|
61
74
|
"""Put the data from the export dataframe into the nas object.
|
|
62
75
|
|
|
@@ -68,7 +81,10 @@ def set_dataframe(
|
|
|
68
81
|
:arg data_level: The level of the data to export.
|
|
69
82
|
:arg start_offset: The offset to add to the start time
|
|
70
83
|
:arg end_offset: The offset to add to the end time
|
|
71
|
-
|
|
84
|
+
:arg flag_all: List of flags to add to all the data
|
|
85
|
+
:arg invalidate_conc_calib: If True, the concentration calibration
|
|
86
|
+
will be invalidated (flag 980) for all calib samples.
|
|
87
|
+
:returns: A dictionary with the metadata of the compounds exported.
|
|
72
88
|
"""
|
|
73
89
|
|
|
74
90
|
if ("-", "start_datetime") not in df_export.columns:
|
|
@@ -104,6 +120,8 @@ def set_dataframe(
|
|
|
104
120
|
"rt": "s",
|
|
105
121
|
"w": "s",
|
|
106
122
|
"area": "area_unit",
|
|
123
|
+
"temperature": "K",
|
|
124
|
+
"pressure": "hPa",
|
|
107
125
|
}
|
|
108
126
|
|
|
109
127
|
ebas_varname_of_var = {
|
|
@@ -119,6 +137,7 @@ def set_dataframe(
|
|
|
119
137
|
|
|
120
138
|
# Export calibration status if given by the user
|
|
121
139
|
status_col = ("-", "status")
|
|
140
|
+
empty_flags = [[]] * len(df_export)
|
|
122
141
|
if (status_col in df_export.columns) and (data_level not in concs_data_levels):
|
|
123
142
|
metadata = DataObject()
|
|
124
143
|
metadata.comp_name = "status"
|
|
@@ -126,13 +145,38 @@ def set_dataframe(
|
|
|
126
145
|
metadata.matrix = "instrument"
|
|
127
146
|
metadata.unit = "no unit"
|
|
128
147
|
values = [val for val in df_export[status_col]]
|
|
129
|
-
flags = [[] for _ in df_export[status_col]]
|
|
130
148
|
nas.variables.append(
|
|
131
|
-
DataObject(
|
|
149
|
+
DataObject(
|
|
150
|
+
values_=values, flags=empty_flags, flagcol=True, metadata=metadata
|
|
151
|
+
)
|
|
132
152
|
)
|
|
133
153
|
|
|
134
|
-
|
|
154
|
+
for var in additional_vars:
|
|
155
|
+
var_col = ("-", var)
|
|
156
|
+
if var_col not in df_export.columns:
|
|
157
|
+
continue
|
|
158
|
+
metadata = DataObject()
|
|
159
|
+
metadata.comp_name = var
|
|
160
|
+
metadata.title = titles.get(var, var)
|
|
161
|
+
metadata.matrix = "instrument"
|
|
162
|
+
metadata.unit = unit_of_var[var]
|
|
163
|
+
metadata.cal_scale = ""
|
|
164
|
+
values = [val for val in df_export[var_col]]
|
|
165
|
+
nas.variables.append(
|
|
166
|
+
DataObject(
|
|
167
|
+
values_=values,
|
|
168
|
+
flags=empty_flags,
|
|
169
|
+
flagcol=True,
|
|
170
|
+
metadata=metadata,
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
this_nan_flags = nan_flags.copy()
|
|
176
|
+
|
|
177
|
+
if data_level in concs_data_levels and invalidate_conc_calib:
|
|
135
178
|
# Set the flag to the invalid value instead of the valid calibration
|
|
179
|
+
this_nan_flags.append(QA_Flag.CALIBRATION)
|
|
136
180
|
dict_flags_to_ebas[QA_Flag.CALIBRATION] = 980
|
|
137
181
|
|
|
138
182
|
for sub in compounds:
|
|
@@ -144,10 +188,7 @@ def set_dataframe(
|
|
|
144
188
|
)
|
|
145
189
|
for flag in flag_col
|
|
146
190
|
]
|
|
147
|
-
nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in
|
|
148
|
-
if data_level == DataLevel.CONCS:
|
|
149
|
-
# Invalidate also the calibration runs
|
|
150
|
-
nan_flag |= flag_col & QA_Flag.CALIBRATION.value
|
|
191
|
+
nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in this_nan_flags])
|
|
151
192
|
|
|
152
193
|
for var in vars_to_export[data_level]:
|
|
153
194
|
ebas_name = compounds[sub]
|
|
@@ -273,6 +314,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
|
|
|
273
314
|
clean_for_df[("-", "status")] = calib_ids.astype(int)
|
|
274
315
|
continue
|
|
275
316
|
|
|
317
|
+
if comp_name in additional_vars:
|
|
318
|
+
clean_for_df[("-", comp_name)] = np.array(values, dtype=float)
|
|
319
|
+
continue
|
|
320
|
+
|
|
276
321
|
# Split the title on the _
|
|
277
322
|
comp_name = comp_name.split("_")
|
|
278
323
|
if len(comp_name) == 1:
|
|
@@ -288,6 +333,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
|
|
|
288
333
|
elif len(comp_name) == 3:
|
|
289
334
|
compund, var_first, var_second = comp_name
|
|
290
335
|
variable = f"{var_first}_{var_second}"
|
|
336
|
+
elif len(comp_name) == 4 and comp_name[-1] == "compounds":
|
|
337
|
+
# Concentration of merged compounds
|
|
338
|
+
compund = "_".join(comp_name)
|
|
339
|
+
variable = "C"
|
|
291
340
|
else:
|
|
292
341
|
logger.warning(f"passing {comp_name}, could not be understood. Skipping.")
|
|
293
342
|
continue
|
avoca/qa_class/abstract.py
CHANGED
|
@@ -4,13 +4,17 @@ from __future__ import annotations
|
|
|
4
4
|
|
|
5
5
|
import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import Sequence
|
|
7
|
+
from typing import TYPE_CHECKING, Sequence
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
11
|
from avoca.flags import QA_Flag
|
|
12
12
|
from avoca.requirements import PythonPackageRequirement
|
|
13
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from matplotlib.axes import Axes
|
|
16
|
+
from matplotlib.figure import Figure
|
|
17
|
+
|
|
14
18
|
|
|
15
19
|
class AbstractQA_Assigner(ABC):
|
|
16
20
|
"""Abstract class for QA assigners.
|
|
@@ -74,10 +78,11 @@ class AbstractQA_Assigner(ABC):
|
|
|
74
78
|
stopp: pd.Timestamp = pd.Timestamp.max,
|
|
75
79
|
name: str | None = None,
|
|
76
80
|
runtypes: list[str] = None,
|
|
81
|
+
log_level: int = logging.INFO,
|
|
77
82
|
):
|
|
78
83
|
"""Create a new QA assigner."""
|
|
79
84
|
self.logger = logging.getLogger(type(self).__name__)
|
|
80
|
-
self.logger.setLevel(
|
|
85
|
+
self.logger.setLevel(log_level)
|
|
81
86
|
|
|
82
87
|
self.name = name or type(self).__name__
|
|
83
88
|
|
|
@@ -161,6 +166,6 @@ class AbstractQA_Assigner(ABC):
|
|
|
161
166
|
raise NotImplementedError
|
|
162
167
|
|
|
163
168
|
# Optional method
|
|
164
|
-
def plot(self):
|
|
169
|
+
def plot(self) -> tuple[Figure, Sequence[Axes]]:
|
|
165
170
|
"""Plot the QA assigner."""
|
|
166
171
|
raise NotImplementedError(f"{type(self).__name__} does not have a plot method.")
|
avoca/qa_class/rt.py
CHANGED
|
@@ -17,15 +17,38 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
17
17
|
retention times of the measurements.
|
|
18
18
|
The correlation is usually very high. If one compound has a low correlation
|
|
19
19
|
with the others, it probably means that is was miss-assigned at some points.
|
|
20
|
+
|
|
21
|
+
:param rt_threshold: The threshold for the retention time deviation.
|
|
22
|
+
Unit is time unit (minutes or seconds, as in the data).
|
|
23
|
+
This will try to fit a linear regression from the average training
|
|
24
|
+
retention times to the measured ones for each sample.
|
|
25
|
+
If after the regression a datapoint is higher than this threshold,
|
|
26
|
+
it will be removed.
|
|
27
|
+
:param rt_relative_max_deviation: The maximum relative deviation allowed
|
|
28
|
+
from the average retention time.
|
|
29
|
+
This is used to remove outliers that are too far from the average.
|
|
30
|
+
if 0.5 is given, it means that the retention time can be 50% higher or lower
|
|
31
|
+
than the average retention time.
|
|
20
32
|
"""
|
|
21
33
|
|
|
22
34
|
runtypes: list[str] = ["air", "std"]
|
|
35
|
+
variable: str = "rt"
|
|
23
36
|
flag = QA_Flag.SUSPICIOUS_RT
|
|
24
37
|
|
|
25
|
-
RT_THRESHOLD: float = 2.0
|
|
26
|
-
|
|
27
38
|
rt_ref: pd.Series
|
|
28
39
|
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
rt_threshold: float = 2.0,
|
|
43
|
+
rt_relative_max_deviation: float = 0.2,
|
|
44
|
+
poly_order: int = 1,
|
|
45
|
+
**kwargs,
|
|
46
|
+
):
|
|
47
|
+
super().__init__(**kwargs)
|
|
48
|
+
self.rt_threshold = rt_threshold
|
|
49
|
+
self.rt_relative_max_deviation = rt_relative_max_deviation
|
|
50
|
+
self.poly_order = poly_order
|
|
51
|
+
|
|
29
52
|
def fit(self, df: pd.DataFrame):
|
|
30
53
|
cols = [(compound, "rt") for compound in self.compounds]
|
|
31
54
|
|
|
@@ -49,6 +72,7 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
49
72
|
|
|
50
73
|
# Get a dataframe for a mean reference
|
|
51
74
|
self.rt_ref = df_rt.median(axis="index")
|
|
75
|
+
self.rt_std = df_rt.std(axis="index")
|
|
52
76
|
|
|
53
77
|
def assign(self, df: pd.DataFrame) -> dict[str, pd.Index]:
|
|
54
78
|
"""Assing flags when expected rt values does not match the measured ones."""
|
|
@@ -58,27 +82,29 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
58
82
|
df_rt = df[rt_cols]
|
|
59
83
|
# Take the reference retention times
|
|
60
84
|
x = self.rt_ref.loc[rt_cols].to_numpy()
|
|
85
|
+
std = self.rt_std.loc[rt_cols].to_numpy()
|
|
61
86
|
|
|
62
87
|
outliers = {}
|
|
63
88
|
|
|
64
89
|
for t, row in df_rt.iterrows():
|
|
65
90
|
# Make a lin reg line
|
|
66
91
|
y = row.to_numpy()
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
self.
|
|
70
|
-
|
|
71
|
-
" measured"
|
|
72
|
-
)
|
|
73
|
-
continue
|
|
92
|
+
# Remove the points that are too far from the reference
|
|
93
|
+
mask_bad = (
|
|
94
|
+
(np.abs(y - x) / x) > self.rt_relative_max_deviation
|
|
95
|
+
) | np.isnan(y)
|
|
74
96
|
|
|
75
|
-
|
|
76
|
-
f = np.poly1d(params)
|
|
77
|
-
y_lin_reg = f(x)
|
|
97
|
+
if np.sum(~mask_bad) > self.poly_order + 2:
|
|
78
98
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
99
|
+
params = np.polyfit(x[~mask_bad], y[~mask_bad], self.poly_order)
|
|
100
|
+
f = np.poly1d(params)
|
|
101
|
+
y_lin_reg = f(x)
|
|
102
|
+
|
|
103
|
+
# Get the points which are too far from the reg line
|
|
104
|
+
error = y - y_lin_reg
|
|
105
|
+
mask_bad |= np.abs(error) > self.rt_threshold
|
|
106
|
+
|
|
107
|
+
if any(mask_bad):
|
|
82
108
|
outliers[t] = mask_bad
|
|
83
109
|
|
|
84
110
|
# Create a dataframe with the flags
|
|
@@ -95,12 +121,12 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
95
121
|
|
|
96
122
|
import matplotlib.pyplot as plt
|
|
97
123
|
|
|
98
|
-
fig, ax = plt.subplots()
|
|
124
|
+
fig, ax = plt.subplots(figsize=(16, 9))
|
|
99
125
|
|
|
100
126
|
assigned = self.assign(self.df_train)
|
|
101
127
|
|
|
102
128
|
for compound in self.compounds:
|
|
103
|
-
ax.scatter(
|
|
129
|
+
points = ax.scatter(
|
|
104
130
|
self.df_train.index,
|
|
105
131
|
self.df_train[(compound, "rt")],
|
|
106
132
|
label=compound,
|
|
@@ -115,6 +141,15 @@ class RetentionTimeChecker(AbstractQA_Assigner):
|
|
|
115
141
|
color="red",
|
|
116
142
|
marker="x",
|
|
117
143
|
)
|
|
144
|
+
# Line for the mean retention time
|
|
145
|
+
ax.axhline(
|
|
146
|
+
self.rt_ref[(compound, "rt")],
|
|
147
|
+
color=points.get_facecolor()[0],
|
|
148
|
+
linestyle="--",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
ax.set_ylabel("Retention time")
|
|
152
|
+
ax.set_xlabel("Sample")
|
|
118
153
|
|
|
119
154
|
ax.legend()
|
|
120
155
|
plt.show()
|
avoca/qa_class/zscore.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avoca
|
|
3
|
-
Version: 0.11.
|
|
3
|
+
Version: 0.11.4
|
|
4
4
|
Summary: @voc@: Quality assessement of measurement data
|
|
5
5
|
Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
|
|
6
6
|
Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
|
|
@@ -8,7 +8,7 @@ avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
|
|
|
8
8
|
avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
|
|
9
9
|
avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
|
|
10
10
|
avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
avoca/bindings/ebas.py,sha256=
|
|
11
|
+
avoca/bindings/ebas.py,sha256=48cYR-jwc3GMZCVhYYbMVUj1RgFAoQNtQC2kOpA1iAA,18827
|
|
12
12
|
avoca/bindings/ebas_flags.py,sha256=uzPrd45OoULycCRYWCwHQG1exUDoWSe8JmULOAsEHRs,2537
|
|
13
13
|
avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
|
|
14
14
|
avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
|
|
@@ -17,19 +17,19 @@ avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
|
|
|
17
17
|
avoca/bindings/qa_tool.py,sha256=ZPtQo8dHo6wooIlc9Vzk8y91Qgso-RBtGR_h2TAZQ24,7583
|
|
18
18
|
avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
|
|
19
19
|
avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
-
avoca/qa_class/abstract.py,sha256=
|
|
20
|
+
avoca/qa_class/abstract.py,sha256=KCK9OhKNWlMje-5D0hgMIf-g64D_kRwRsoCZ_R4VuqI,5612
|
|
21
21
|
avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
|
|
22
22
|
avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
|
|
23
23
|
avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
|
|
24
|
-
avoca/qa_class/rt.py,sha256=
|
|
24
|
+
avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
|
|
25
25
|
avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
|
|
26
|
-
avoca/qa_class/zscore.py,sha256=
|
|
26
|
+
avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
|
|
27
27
|
avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
|
|
28
28
|
avoca/testing/df.py,sha256=Nc0GUYTApZgYyUTMnHMTbSKLiA5ty9Bg7gUGtnoFYMI,1826
|
|
29
29
|
avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
|
|
30
30
|
avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
|
|
31
31
|
avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
|
|
32
|
-
avoca-0.11.
|
|
33
|
-
avoca-0.11.
|
|
34
|
-
avoca-0.11.
|
|
35
|
-
avoca-0.11.
|
|
32
|
+
avoca-0.11.4.dist-info/METADATA,sha256=tx6uIcmzGJU-Gf1RKfxW1crR6hhYc5AwDBkVks4iuHU,1570
|
|
33
|
+
avoca-0.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
34
|
+
avoca-0.11.4.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
|
|
35
|
+
avoca-0.11.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|