avoca 0.12.0__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avoca/bindings/ebas.py +16 -3
- avoca/bindings/ebas_flags.py +1 -1
- avoca/plots.py +26 -2
- avoca/qa_class/abstract.py +9 -0
- avoca/qa_class/rolling.py +133 -0
- avoca/testing/df.py +1 -0
- avoca/testing/utils.py +9 -0
- {avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/METADATA +1 -1
- {avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/RECORD +11 -9
- {avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/WHEEL +1 -1
- {avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/licenses/LICENCE.txt +0 -0
avoca/bindings/ebas.py
CHANGED
|
@@ -171,7 +171,6 @@ def set_dataframe(
|
|
|
171
171
|
)
|
|
172
172
|
)
|
|
173
173
|
|
|
174
|
-
|
|
175
174
|
this_nan_flags = nan_flags.copy()
|
|
176
175
|
|
|
177
176
|
if data_level in concs_data_levels and invalidate_conc_calib:
|
|
@@ -188,7 +187,9 @@ def set_dataframe(
|
|
|
188
187
|
)
|
|
189
188
|
for flag in flag_col
|
|
190
189
|
]
|
|
191
|
-
nan_flag = np.logical_or.reduce(
|
|
190
|
+
nan_flag = np.logical_or.reduce(
|
|
191
|
+
[flag_col & flag.value for flag in this_nan_flags]
|
|
192
|
+
)
|
|
192
193
|
|
|
193
194
|
for var in vars_to_export[data_level]:
|
|
194
195
|
ebas_name = compounds[sub]
|
|
@@ -199,6 +200,16 @@ def set_dataframe(
|
|
|
199
200
|
for val, isnan in zip(serie_to_export, nan_flag)
|
|
200
201
|
]
|
|
201
202
|
|
|
203
|
+
if var == "conc_calib":
|
|
204
|
+
# Invalidate calibration concentration for non-calibration samples
|
|
205
|
+
this_flags = [
|
|
206
|
+
flags_ebas
|
|
207
|
+
+ ([] if (QA_Flag.CALIBRATION.value & flag_avoca) else [980])
|
|
208
|
+
for flags_ebas, flag_avoca in zip(flags, flag_col)
|
|
209
|
+
]
|
|
210
|
+
else:
|
|
211
|
+
this_flags = flags
|
|
212
|
+
|
|
202
213
|
metadata = DataObject()
|
|
203
214
|
metadata.comp_name = (
|
|
204
215
|
f"{ebas_name}_{ebas_compname_of_var[var]}"
|
|
@@ -214,7 +225,9 @@ def set_dataframe(
|
|
|
214
225
|
metadata.matrix = "air"
|
|
215
226
|
# add the variable
|
|
216
227
|
nas.variables.append(
|
|
217
|
-
DataObject(
|
|
228
|
+
DataObject(
|
|
229
|
+
values_=values, flags=this_flags, flagcol=True, metadata=metadata
|
|
230
|
+
)
|
|
218
231
|
)
|
|
219
232
|
|
|
220
233
|
if var == "conc_calib":
|
avoca/bindings/ebas_flags.py
CHANGED
avoca/plots.py
CHANGED
|
@@ -69,6 +69,8 @@ def plot_yearly_plotly(
|
|
|
69
69
|
df: pd.DataFrame,
|
|
70
70
|
compound: str,
|
|
71
71
|
df_new: pd.DataFrame | None = None,
|
|
72
|
+
opacity: float = 0.5,
|
|
73
|
+
size: int = 6,
|
|
72
74
|
) -> "plotly.graph_objs._figure.Figure":
|
|
73
75
|
"""Plot yearly data using plotly."""
|
|
74
76
|
import plotly.express as px
|
|
@@ -97,7 +99,28 @@ def plot_yearly_plotly(
|
|
|
97
99
|
df_to_plot = df_to_plot.pivot_table(
|
|
98
100
|
index=df_to_plot.index, columns="year", values="conc"
|
|
99
101
|
)
|
|
100
|
-
fig =
|
|
102
|
+
fig = go.Figure()
|
|
103
|
+
|
|
104
|
+
hover_template = "Timestamp: %{text}<br>Conc: %{y:.2f} ppt"
|
|
105
|
+
|
|
106
|
+
kwargs = {
|
|
107
|
+
"mode": "markers",
|
|
108
|
+
"opacity": opacity,
|
|
109
|
+
"marker": dict(size=size),
|
|
110
|
+
"hovertemplate": hover_template,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
for year in df_to_plot.columns:
|
|
114
|
+
fig.add_trace(
|
|
115
|
+
go.Scatter(
|
|
116
|
+
x=df_to_plot.index,
|
|
117
|
+
y=df_to_plot[year],
|
|
118
|
+
name=str(year),
|
|
119
|
+
zorder=-year,
|
|
120
|
+
text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
|
|
121
|
+
**kwargs,
|
|
122
|
+
)
|
|
123
|
+
)
|
|
101
124
|
x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
|
|
102
125
|
|
|
103
126
|
dt_new = df_new[dt_column]
|
|
@@ -105,8 +128,9 @@ def plot_yearly_plotly(
|
|
|
105
128
|
go.Scatter(
|
|
106
129
|
x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
|
|
107
130
|
y=df_new[(compound, "conc")],
|
|
108
|
-
mode="markers",
|
|
109
131
|
name="New Data",
|
|
132
|
+
text=dt_new.dt.strftime("%y%m%d.%H%M"),
|
|
133
|
+
**kwargs,
|
|
110
134
|
)
|
|
111
135
|
)
|
|
112
136
|
fig.update_layout(
|
avoca/qa_class/abstract.py
CHANGED
|
@@ -49,6 +49,7 @@ class AbstractQA_Assigner(ABC):
|
|
|
49
49
|
flag: QA_Flag
|
|
50
50
|
runtypes: list[str] | None
|
|
51
51
|
required_packages: list[PythonPackageRequirement] | None = None
|
|
52
|
+
require_datetime_index: bool = False
|
|
52
53
|
|
|
53
54
|
# Options that can be set by the user
|
|
54
55
|
name: str
|
|
@@ -142,6 +143,14 @@ class AbstractQA_Assigner(ABC):
|
|
|
142
143
|
f"Please check the data and the settings for {self.name}"
|
|
143
144
|
)
|
|
144
145
|
|
|
146
|
+
if self.require_datetime_index:
|
|
147
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
|
148
|
+
raise ValueError(
|
|
149
|
+
f"Assigner {self} requires a DatetimeIndex but the dataframe"
|
|
150
|
+
" does not have one. \n "
|
|
151
|
+
f"Please check the data and the settings for {self.name}"
|
|
152
|
+
)
|
|
153
|
+
|
|
145
154
|
@abstractmethod
|
|
146
155
|
def fit(self, df: pd.DataFrame):
|
|
147
156
|
"""Fit the QA assigner on some data.
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Quality assurance based on statistical methods."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import timedelta
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from avoca.qa_class.zscore import ExtremeValues
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
|
|
15
|
+
from avoca.utils.torch_models import MultipleRegressionModel
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RollingWindow(ExtremeValues):
|
|
19
|
+
"""Detect in rolling windows.
|
|
20
|
+
|
|
21
|
+
The method is based on outliers in a rolling window using the median and standard deviation.
|
|
22
|
+
The training is done directly on the fitted data.
|
|
23
|
+
|
|
24
|
+
:param variable: The variable to check for extreme values.
|
|
25
|
+
:param threshold: The threshold for the z-score. To flag values.
|
|
26
|
+
:param use_log_normal: If True, the log of the values will be used to calculate the z-score.
|
|
27
|
+
This can be useful if the values are log-normal distributed.
|
|
28
|
+
:param only_greater: If True, only values greater than the threshold will be flagged.
|
|
29
|
+
The values lower than the negative threshold will not be flagged.
|
|
30
|
+
By default, this is True if use_log_normal is True, and False otherwise.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
require_datetime_index = True
|
|
34
|
+
|
|
35
|
+
rolling_window: timedelta
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
*args,
|
|
40
|
+
rolling_window: timedelta = timedelta(days=7),
|
|
41
|
+
threshold: float = 1.5,
|
|
42
|
+
**kwargs,
|
|
43
|
+
):
|
|
44
|
+
super().__init__(*args, threshold=threshold, **kwargs)
|
|
45
|
+
self.rolling_window = rolling_window
|
|
46
|
+
|
|
47
|
+
def fit(self, df: pd.DataFrame):
|
|
48
|
+
|
|
49
|
+
self.check_columns_or_raise(df, columns=self._stats_columns)
|
|
50
|
+
|
|
51
|
+
self.df_train = df[self._stats_columns]
|
|
52
|
+
|
|
53
|
+
def assign(self, df: pd.DataFrame) -> dict[str, pd.Index]:
|
|
54
|
+
df = df[self._stats_columns]
|
|
55
|
+
df = self._clean_data(df)
|
|
56
|
+
if self.use_log_normal:
|
|
57
|
+
# Replace <=0 with NaN
|
|
58
|
+
df = df.where(df > 0, np.nan)
|
|
59
|
+
df = df.map(lambda x: np.log(x))
|
|
60
|
+
|
|
61
|
+
rolling = df.rolling(window=self.rolling_window)
|
|
62
|
+
means = rolling.median()
|
|
63
|
+
stds = rolling.std()
|
|
64
|
+
|
|
65
|
+
self.rolling_median = means
|
|
66
|
+
self.rolling_std = stds
|
|
67
|
+
|
|
68
|
+
thresholds = means + stds * self.threshold
|
|
69
|
+
|
|
70
|
+
df_fail = df > thresholds
|
|
71
|
+
if not self.only_greater:
|
|
72
|
+
df_fail = df_fail | (df < (means - stds * self.threshold))
|
|
73
|
+
|
|
74
|
+
out_dict = {}
|
|
75
|
+
for compound in self.compounds:
|
|
76
|
+
col = (compound, self.variable)
|
|
77
|
+
this_c_fail = df_fail[col]
|
|
78
|
+
out_dict[compound] = this_c_fail.loc[this_c_fail].index
|
|
79
|
+
|
|
80
|
+
return out_dict
|
|
81
|
+
|
|
82
|
+
def plot(self):
|
|
83
|
+
|
|
84
|
+
import matplotlib.pyplot as plt
|
|
85
|
+
|
|
86
|
+
fig, axes = plt.subplots(
|
|
87
|
+
len(self.compounds), 1, figsize=(6, 3 * len(self.compounds)), sharex=True
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
x = self.dt if hasattr(self, "dt") else self.df_train.index
|
|
91
|
+
x = pd.Series(x, index=self.df_train.index)
|
|
92
|
+
|
|
93
|
+
outliers = self.assign(self.df_train)
|
|
94
|
+
|
|
95
|
+
for i, compound in enumerate(self.compounds):
|
|
96
|
+
ax = axes[i]
|
|
97
|
+
col = (compound, self.variable)
|
|
98
|
+
ax.scatter(
|
|
99
|
+
x,
|
|
100
|
+
self.df_train[col],
|
|
101
|
+
s=1,
|
|
102
|
+
label="darkblue",
|
|
103
|
+
)
|
|
104
|
+
median = self.rolling_median[col]
|
|
105
|
+
std = self.rolling_std[col]
|
|
106
|
+
top, bottom = median + std * self.threshold, median - std * self.threshold
|
|
107
|
+
|
|
108
|
+
ax.fill_between(
|
|
109
|
+
x,
|
|
110
|
+
top,
|
|
111
|
+
bottom,
|
|
112
|
+
color="lightgray",
|
|
113
|
+
label="Rolling threshold",
|
|
114
|
+
alpha=0.5,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
outlier_indices = outliers[compound]
|
|
118
|
+
ax.scatter(
|
|
119
|
+
x.loc[outlier_indices],
|
|
120
|
+
self.df_train.loc[outlier_indices, col],
|
|
121
|
+
s=10,
|
|
122
|
+
marker="x",
|
|
123
|
+
color="red",
|
|
124
|
+
label="Extreme values",
|
|
125
|
+
)
|
|
126
|
+
ax.set_title(
|
|
127
|
+
f"{compound} +- {self.threshold} std",
|
|
128
|
+
# Under teh top line
|
|
129
|
+
y=0.8,
|
|
130
|
+
)
|
|
131
|
+
ax.tick_params(axis="x", rotation=25)
|
|
132
|
+
|
|
133
|
+
return fig, axes
|
avoca/testing/df.py
CHANGED
avoca/testing/utils.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def make_dt_index(df: pd.DataFrame | pd.Index) -> pd.DataFrame | pd.Index:
|
|
5
|
+
"""Create a datetime index for the dataframe."""
|
|
6
|
+
index = pd.date_range(start="2023-01-01", periods=len(df), freq="h")
|
|
7
|
+
if isinstance(df, pd.Index):
|
|
8
|
+
return index
|
|
9
|
+
return df.set_index(index)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avoca
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.14.0
|
|
4
4
|
Summary: @voc@: Quality assessement of measurement data
|
|
5
5
|
Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
|
|
6
6
|
Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
|
|
@@ -5,12 +5,12 @@ avoca/flags.py,sha256=wobuZoIJh6dFsdiqqYJLZ_AHe4pcFE9tjuoimNXLjIQ,1428
|
|
|
5
5
|
avoca/io.py,sha256=67D5x1qkLqWC7wWehyOfX96L4H3-tn9x2V4jMCoIRqA,729
|
|
6
6
|
avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
|
|
7
7
|
avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
|
|
8
|
-
avoca/plots.py,sha256=
|
|
8
|
+
avoca/plots.py,sha256=UjfUgbfxd2veMOGHtSvJycru-w3gWsGjOVO__I-zqzQ,4205
|
|
9
9
|
avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
|
|
10
10
|
avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
|
|
11
11
|
avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
avoca/bindings/ebas.py,sha256=
|
|
13
|
-
avoca/bindings/ebas_flags.py,sha256=
|
|
12
|
+
avoca/bindings/ebas.py,sha256=vil4u4G6jGJrE12Z7nBvGpJuTAT9QyvbNNyWsWr5UaM,19306
|
|
13
|
+
avoca/bindings/ebas_flags.py,sha256=N-JpmA6WCFjcYhvt7XjyOZMbR7vCdyPV6uHBlF45UJU,2397
|
|
14
14
|
avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
|
|
15
15
|
avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
|
|
16
16
|
avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
|
|
@@ -18,19 +18,21 @@ avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
|
|
|
18
18
|
avoca/bindings/qa_tool.py,sha256=ninHe3mrJ8GULxRCkRTZixw-vmNhqu4zwwONd5aXd1Q,9735
|
|
19
19
|
avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
|
|
20
20
|
avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
avoca/qa_class/abstract.py,sha256=
|
|
21
|
+
avoca/qa_class/abstract.py,sha256=CLt-6WFhZhrvKTLVHpdbJYMFM50VPOGiO-GG6IRPWzA,6011
|
|
22
22
|
avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
|
|
23
23
|
avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
|
|
24
24
|
avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
|
|
25
|
+
avoca/qa_class/rolling.py,sha256=CQ2E0qJ7FxDT4TucItkJRmkqhzMoNSnwtVQQ_HzX9Jk,4059
|
|
25
26
|
avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
|
|
26
27
|
avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
|
|
27
28
|
avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
|
|
28
29
|
avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
|
|
29
|
-
avoca/testing/df.py,sha256=
|
|
30
|
+
avoca/testing/df.py,sha256=UQm6TdTDVRWvRNM5WnSWh6vdvDR1lqLNg0ti-B1L760,1865
|
|
31
|
+
avoca/testing/utils.py,sha256=jVV0mIwLIpr0UBLMk8RjZH5J_dV_b6Gugxzo_WRgWU0,308
|
|
30
32
|
avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
|
|
31
33
|
avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
|
|
32
34
|
avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
|
|
33
|
-
avoca-0.
|
|
34
|
-
avoca-0.
|
|
35
|
-
avoca-0.
|
|
36
|
-
avoca-0.
|
|
35
|
+
avoca-0.14.0.dist-info/METADATA,sha256=CayW94kozHUxF8sbKxE0pnWZnS0W5cjkEUKU7_QfgEc,1570
|
|
36
|
+
avoca-0.14.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
37
|
+
avoca-0.14.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
|
|
38
|
+
avoca-0.14.0.dist-info/RECORD,,
|
|
File without changes
|