avoca 0.11.4__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avoca/bindings/ebas.py +16 -3
- avoca/bindings/ebas_flags.py +3 -9
- avoca/bindings/qa_tool.py +65 -1
- avoca/flags.py +8 -0
- avoca/plots.py +146 -0
- avoca/qa_class/abstract.py +9 -0
- avoca/qa_class/rolling.py +133 -0
- avoca/testing/df.py +1 -0
- avoca/testing/utils.py +9 -0
- {avoca-0.11.4.dist-info → avoca-0.14.0.dist-info}/METADATA +1 -1
- {avoca-0.11.4.dist-info → avoca-0.14.0.dist-info}/RECORD +13 -10
- {avoca-0.11.4.dist-info → avoca-0.14.0.dist-info}/WHEEL +1 -1
- {avoca-0.11.4.dist-info → avoca-0.14.0.dist-info}/licenses/LICENCE.txt +0 -0
avoca/bindings/ebas.py
CHANGED
|
@@ -171,7 +171,6 @@ def set_dataframe(
|
|
|
171
171
|
)
|
|
172
172
|
)
|
|
173
173
|
|
|
174
|
-
|
|
175
174
|
this_nan_flags = nan_flags.copy()
|
|
176
175
|
|
|
177
176
|
if data_level in concs_data_levels and invalidate_conc_calib:
|
|
@@ -188,7 +187,9 @@ def set_dataframe(
|
|
|
188
187
|
)
|
|
189
188
|
for flag in flag_col
|
|
190
189
|
]
|
|
191
|
-
nan_flag = np.logical_or.reduce(
|
|
190
|
+
nan_flag = np.logical_or.reduce(
|
|
191
|
+
[flag_col & flag.value for flag in this_nan_flags]
|
|
192
|
+
)
|
|
192
193
|
|
|
193
194
|
for var in vars_to_export[data_level]:
|
|
194
195
|
ebas_name = compounds[sub]
|
|
@@ -199,6 +200,16 @@ def set_dataframe(
|
|
|
199
200
|
for val, isnan in zip(serie_to_export, nan_flag)
|
|
200
201
|
]
|
|
201
202
|
|
|
203
|
+
if var == "conc_calib":
|
|
204
|
+
# Invalidate calibration concentration for non-calibration samples
|
|
205
|
+
this_flags = [
|
|
206
|
+
flags_ebas
|
|
207
|
+
+ ([] if (QA_Flag.CALIBRATION.value & flag_avoca) else [980])
|
|
208
|
+
for flags_ebas, flag_avoca in zip(flags, flag_col)
|
|
209
|
+
]
|
|
210
|
+
else:
|
|
211
|
+
this_flags = flags
|
|
212
|
+
|
|
202
213
|
metadata = DataObject()
|
|
203
214
|
metadata.comp_name = (
|
|
204
215
|
f"{ebas_name}_{ebas_compname_of_var[var]}"
|
|
@@ -214,7 +225,9 @@ def set_dataframe(
|
|
|
214
225
|
metadata.matrix = "air"
|
|
215
226
|
# add the variable
|
|
216
227
|
nas.variables.append(
|
|
217
|
-
DataObject(
|
|
228
|
+
DataObject(
|
|
229
|
+
values_=values, flags=this_flags, flagcol=True, metadata=metadata
|
|
230
|
+
)
|
|
218
231
|
)
|
|
219
232
|
|
|
220
233
|
if var == "conc_calib":
|
avoca/bindings/ebas_flags.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
# https://
|
|
2
|
-
from avoca.flags import QA_Flag
|
|
1
|
+
# https://ebas-submit.nilu.no/templates/comments/fl_flag for more info on what ebas uses
|
|
2
|
+
from avoca.flags import QA_Flag, nan_flags
|
|
3
3
|
|
|
4
4
|
flags_to_ebas: dict[QA_Flag, int] = {
|
|
5
5
|
QA_Flag.MISSING: 999, # M Missing measurement, unspecified reason
|
|
@@ -40,13 +40,7 @@ if missing_flags:
|
|
|
40
40
|
f"Not all QA flags are mapped to Ebas flags. Missing: {missing_flags}"
|
|
41
41
|
)
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
nan_flags = [
|
|
45
|
-
QA_Flag.MISSING,
|
|
46
|
-
QA_Flag.ZERO_NEG_CONC_EXT,
|
|
47
|
-
QA_Flag.INVALIDATED_EXT,
|
|
48
|
-
QA_Flag.INVALID_VALUES,
|
|
49
|
-
]
|
|
43
|
+
nan_flags = nan_flags
|
|
50
44
|
|
|
51
45
|
# priority of the flag to appear in the output
|
|
52
46
|
# Useful when you can select only one flag value
|
avoca/bindings/qa_tool.py
CHANGED
|
@@ -12,7 +12,7 @@ import numpy as np
|
|
|
12
12
|
import pandas as pd
|
|
13
13
|
import pandas.errors
|
|
14
14
|
|
|
15
|
-
from avoca.bindings.ebas_flags import flag_order, flags_to_ebas
|
|
15
|
+
from avoca.bindings.ebas_flags import flag_order, flags_to_ebas, ebas_flag_to_avoca
|
|
16
16
|
from avoca.flags import QA_Flag
|
|
17
17
|
from avoca.utils import compounds_from_df
|
|
18
18
|
|
|
@@ -207,3 +207,67 @@ def export_EmpaQATool(
|
|
|
207
207
|
logger.info(f"Exported to `{out_filepath}`")
|
|
208
208
|
|
|
209
209
|
return out_filepath
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataFrame:
|
|
213
|
+
"""Read an EmpaQATool export file.
|
|
214
|
+
|
|
215
|
+
Data is exported through : https://voc-qc.nilu.no/ExportData
|
|
216
|
+
|
|
217
|
+
:arg file_path: Path to the EmpaQATool export file.
|
|
218
|
+
|
|
219
|
+
:returns: DataFrame with the data.
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
# Pandas skips the 2 empty rows
|
|
223
|
+
df = pd.read_csv(file_path, sep=";", header=2)
|
|
224
|
+
|
|
225
|
+
# Convert the datetime columns
|
|
226
|
+
columns = {}
|
|
227
|
+
to_datetime = lambda x: pd.to_datetime(x, format="%Y-%m-%d %H:%M:%S")
|
|
228
|
+
columns[("-", "datetime_start")] = to_datetime(df["Start"])
|
|
229
|
+
columns[("-", "datetime_end")] = to_datetime(df["End"])
|
|
230
|
+
|
|
231
|
+
# Get the datetime column as the start time
|
|
232
|
+
dt = columns[("-", "datetime_start")].copy()
|
|
233
|
+
if shift is not None:
|
|
234
|
+
dt += shift
|
|
235
|
+
columns[("-", "datetime")] = dt
|
|
236
|
+
|
|
237
|
+
# Last column is empty
|
|
238
|
+
compounds = [ '-'.join(s[:-1]) for col in df.columns if len(s:=col.split("-")) >= 2]
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
for compound in compounds:
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
flag_col = f"{compound}-flag"
|
|
245
|
+
value_col = f"{compound}-value"
|
|
246
|
+
acc_col = f"{compound}-accuracy"
|
|
247
|
+
precision_col = f"{compound}-precision"
|
|
248
|
+
|
|
249
|
+
mapping = {
|
|
250
|
+
"conc": value_col,
|
|
251
|
+
"u_expanded":acc_col,
|
|
252
|
+
"u_precision":precision_col,
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
flag_values = (pd.to_numeric(df[flag_col]) * 1e3).astype(int).mod(1000)
|
|
256
|
+
# Flags are adding 1000 for specifying when set by qa tool or not
|
|
257
|
+
flags = flag_values.apply(
|
|
258
|
+
lambda x: ebas_flag_to_avoca[x].value if x else int(0)
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
for key, value in mapping.items():
|
|
262
|
+
# Since the nan flags values are set to 9999, we need to set them to nan
|
|
263
|
+
serie = pd.to_numeric(df[value])
|
|
264
|
+
mask_nan = flags == QA_Flag.MISSING.value
|
|
265
|
+
serie[mask_nan] = np.nan
|
|
266
|
+
columns[(compound, key)] = serie
|
|
267
|
+
|
|
268
|
+
columns[(compound, "flag")] = flags
|
|
269
|
+
|
|
270
|
+
mask_nan = columns[(compound, "conc")].isna()
|
|
271
|
+
columns[(compound, "flag")][mask_nan] |= QA_Flag.MISSING.value
|
|
272
|
+
|
|
273
|
+
return pd.DataFrame(columns)
|
avoca/flags.py
CHANGED
|
@@ -46,6 +46,14 @@ class QA_Flag(Flag):
|
|
|
46
46
|
# Invalid Values
|
|
47
47
|
INVALID_VALUES = auto()
|
|
48
48
|
|
|
49
|
+
# Flags that are considered to have missing values
|
|
50
|
+
nan_flags = [
|
|
51
|
+
QA_Flag.MISSING,
|
|
52
|
+
QA_Flag.ZERO_NEG_CONC_EXT,
|
|
53
|
+
QA_Flag.INVALIDATED_EXT,
|
|
54
|
+
QA_Flag.INVALID_VALUES,
|
|
55
|
+
]
|
|
56
|
+
|
|
49
57
|
|
|
50
58
|
if __name__ == "__main__":
|
|
51
59
|
# Print the flages and their values
|
avoca/plots.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def plot_historical_comparison(
|
|
6
|
+
df_new: pd.DataFrame, df_hist: pd.DataFrame, compound: str, ax=None
|
|
7
|
+
) -> tuple[plt.Figure, plt.Axes]:
|
|
8
|
+
if ax is None:
|
|
9
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
|
10
|
+
|
|
11
|
+
dt_column = ("-", "datetime")
|
|
12
|
+
|
|
13
|
+
for data_type, df in zip(["Historical", "New"], [df_hist, df_new]):
|
|
14
|
+
if data_type == "Historical":
|
|
15
|
+
color = "blue"
|
|
16
|
+
else:
|
|
17
|
+
color = "red"
|
|
18
|
+
|
|
19
|
+
serie = df[(compound, "conc")]
|
|
20
|
+
dt = df[dt_column]
|
|
21
|
+
if ("-", "type") in df.columns:
|
|
22
|
+
mask_air = df[("-", "type")] == "air"
|
|
23
|
+
serie = serie[mask_air]
|
|
24
|
+
dt = dt[mask_air]
|
|
25
|
+
|
|
26
|
+
ax.scatter(dt, serie, label=data_type, color=color, alpha=0.5, s=4)
|
|
27
|
+
ax.set_title(compound)
|
|
28
|
+
ax.set_xlabel("Date")
|
|
29
|
+
ax.set_ylabel("Concentration (ppt)")
|
|
30
|
+
ax.legend()
|
|
31
|
+
return fig, ax
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def plot_yearly_data(
|
|
35
|
+
df: pd.DataFrame, compound: str, ax=None
|
|
36
|
+
) -> tuple[plt.Figure, plt.Axes]:
|
|
37
|
+
if ax is None:
|
|
38
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
|
39
|
+
|
|
40
|
+
dt_column = ("-", "datetime")
|
|
41
|
+
serie = df[(compound, "conc")]
|
|
42
|
+
dt = df[dt_column]
|
|
43
|
+
if ("-", "type") in df.columns:
|
|
44
|
+
mask_air = df[("-", "type")] == "air"
|
|
45
|
+
serie = serie[mask_air]
|
|
46
|
+
dt = dt[mask_air]
|
|
47
|
+
|
|
48
|
+
years = dt.dt.year.unique()
|
|
49
|
+
x = dt.dt.day_of_year + dt.dt.hour / 24.0
|
|
50
|
+
for year in years:
|
|
51
|
+
mask_year = dt.dt.year == year
|
|
52
|
+
ax.scatter(x[mask_year], serie[mask_year], label=str(year), alpha=0.5, s=4)
|
|
53
|
+
|
|
54
|
+
ax.set_title(compound)
|
|
55
|
+
ax.set_xlabel("Time of Year")
|
|
56
|
+
ax.set_ylabel("Concentration (ppt)")
|
|
57
|
+
|
|
58
|
+
# Add ticks with the mounths
|
|
59
|
+
month_starts = pd.date_range(start="2024-01-01", end="2025-01-01", freq="MS")
|
|
60
|
+
month_days = month_starts.dayofyear
|
|
61
|
+
month_labels = month_starts.strftime("%b")
|
|
62
|
+
ax.set_xticks(month_days)
|
|
63
|
+
ax.set_xticklabels(month_labels)
|
|
64
|
+
ax.legend()
|
|
65
|
+
return fig, ax
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def plot_yearly_plotly(
|
|
69
|
+
df: pd.DataFrame,
|
|
70
|
+
compound: str,
|
|
71
|
+
df_new: pd.DataFrame | None = None,
|
|
72
|
+
opacity: float = 0.5,
|
|
73
|
+
size: int = 6,
|
|
74
|
+
) -> "plotly.graph_objs._figure.Figure":
|
|
75
|
+
"""Plot yearly data using plotly."""
|
|
76
|
+
import plotly.express as px
|
|
77
|
+
import plotly.graph_objects as go
|
|
78
|
+
|
|
79
|
+
dt_column = ("-", "datetime")
|
|
80
|
+
serie = df[(compound, "conc")]
|
|
81
|
+
dt = df[dt_column]
|
|
82
|
+
if ("-", "type") in df.columns:
|
|
83
|
+
mask_air = df[("-", "type")] == "air"
|
|
84
|
+
serie = serie[mask_air]
|
|
85
|
+
dt = dt[mask_air]
|
|
86
|
+
if ("-", "type") in df_new.columns:
|
|
87
|
+
mask_air_new = df_new[("-", "type")] == "air"
|
|
88
|
+
df_new = df_new[mask_air_new]
|
|
89
|
+
|
|
90
|
+
x = dt.dt.day_of_year + dt.dt.hour / 24.0
|
|
91
|
+
df_to_plot = pd.DataFrame(
|
|
92
|
+
{
|
|
93
|
+
"conc": serie.values,
|
|
94
|
+
"year": dt.dt.year.values,
|
|
95
|
+
},
|
|
96
|
+
index=x.values,
|
|
97
|
+
)
|
|
98
|
+
# Break down by year, to have year as columns and conc as values
|
|
99
|
+
df_to_plot = df_to_plot.pivot_table(
|
|
100
|
+
index=df_to_plot.index, columns="year", values="conc"
|
|
101
|
+
)
|
|
102
|
+
fig = go.Figure()
|
|
103
|
+
|
|
104
|
+
hover_template = "Timestamp: %{text}<br>Conc: %{y:.2f} ppt"
|
|
105
|
+
|
|
106
|
+
kwargs = {
|
|
107
|
+
"mode": "markers",
|
|
108
|
+
"opacity": opacity,
|
|
109
|
+
"marker": dict(size=size),
|
|
110
|
+
"hovertemplate": hover_template,
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
for year in df_to_plot.columns:
|
|
114
|
+
fig.add_trace(
|
|
115
|
+
go.Scatter(
|
|
116
|
+
x=df_to_plot.index,
|
|
117
|
+
y=df_to_plot[year],
|
|
118
|
+
name=str(year),
|
|
119
|
+
zorder=-year,
|
|
120
|
+
text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
|
|
121
|
+
**kwargs,
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
|
|
125
|
+
|
|
126
|
+
dt_new = df_new[dt_column]
|
|
127
|
+
fig.add_trace(
|
|
128
|
+
go.Scatter(
|
|
129
|
+
x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
|
|
130
|
+
y=df_new[(compound, "conc")],
|
|
131
|
+
name="New Data",
|
|
132
|
+
text=dt_new.dt.strftime("%y%m%d.%H%M"),
|
|
133
|
+
**kwargs,
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
fig.update_layout(
|
|
137
|
+
xaxis_title="Time of Year",
|
|
138
|
+
yaxis_title=f"{compound} (ppt)",
|
|
139
|
+
xaxis=dict(
|
|
140
|
+
tickmode="array",
|
|
141
|
+
tickvals=x_values.dayofyear,
|
|
142
|
+
ticktext=x_values.strftime("%b"),
|
|
143
|
+
),
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
return fig
|
avoca/qa_class/abstract.py
CHANGED
|
@@ -49,6 +49,7 @@ class AbstractQA_Assigner(ABC):
|
|
|
49
49
|
flag: QA_Flag
|
|
50
50
|
runtypes: list[str] | None
|
|
51
51
|
required_packages: list[PythonPackageRequirement] | None = None
|
|
52
|
+
require_datetime_index: bool = False
|
|
52
53
|
|
|
53
54
|
# Options that can be set by the user
|
|
54
55
|
name: str
|
|
@@ -142,6 +143,14 @@ class AbstractQA_Assigner(ABC):
|
|
|
142
143
|
f"Please check the data and the settings for {self.name}"
|
|
143
144
|
)
|
|
144
145
|
|
|
146
|
+
if self.require_datetime_index:
|
|
147
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
|
148
|
+
raise ValueError(
|
|
149
|
+
f"Assigner {self} requires a DatetimeIndex but the dataframe"
|
|
150
|
+
" does not have one. \n "
|
|
151
|
+
f"Please check the data and the settings for {self.name}"
|
|
152
|
+
)
|
|
153
|
+
|
|
145
154
|
@abstractmethod
|
|
146
155
|
def fit(self, df: pd.DataFrame):
|
|
147
156
|
"""Fit the QA assigner on some data.
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Quality assurance based on statistical methods."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import timedelta
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from avoca.qa_class.zscore import ExtremeValues
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
|
|
15
|
+
from avoca.utils.torch_models import MultipleRegressionModel
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class RollingWindow(ExtremeValues):
|
|
19
|
+
"""Detect in rolling windows.
|
|
20
|
+
|
|
21
|
+
The method is based on outliers in a rolling window using the median and standard deviation.
|
|
22
|
+
The training is done directly on the fitted data.
|
|
23
|
+
|
|
24
|
+
:param variable: The variable to check for extreme values.
|
|
25
|
+
:param threshold: The threshold for the z-score. To flag values.
|
|
26
|
+
:param use_log_normal: If True, the log of the values will be used to calculate the z-score.
|
|
27
|
+
This can be useful if the values are log-normal distributed.
|
|
28
|
+
:param only_greater: If True, only values greater than the threshold will be flagged.
|
|
29
|
+
The values lower than the negative threshold will not be flagged.
|
|
30
|
+
By default, this is True if use_log_normal is True, and False otherwise.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
require_datetime_index = True
|
|
34
|
+
|
|
35
|
+
rolling_window: timedelta
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
*args,
|
|
40
|
+
rolling_window: timedelta = timedelta(days=7),
|
|
41
|
+
threshold: float = 1.5,
|
|
42
|
+
**kwargs,
|
|
43
|
+
):
|
|
44
|
+
super().__init__(*args, threshold=threshold, **kwargs)
|
|
45
|
+
self.rolling_window = rolling_window
|
|
46
|
+
|
|
47
|
+
def fit(self, df: pd.DataFrame):
|
|
48
|
+
|
|
49
|
+
self.check_columns_or_raise(df, columns=self._stats_columns)
|
|
50
|
+
|
|
51
|
+
self.df_train = df[self._stats_columns]
|
|
52
|
+
|
|
53
|
+
def assign(self, df: pd.DataFrame) -> dict[str, pd.Index]:
|
|
54
|
+
df = df[self._stats_columns]
|
|
55
|
+
df = self._clean_data(df)
|
|
56
|
+
if self.use_log_normal:
|
|
57
|
+
# Replace <=0 with NaN
|
|
58
|
+
df = df.where(df > 0, np.nan)
|
|
59
|
+
df = df.map(lambda x: np.log(x))
|
|
60
|
+
|
|
61
|
+
rolling = df.rolling(window=self.rolling_window)
|
|
62
|
+
means = rolling.median()
|
|
63
|
+
stds = rolling.std()
|
|
64
|
+
|
|
65
|
+
self.rolling_median = means
|
|
66
|
+
self.rolling_std = stds
|
|
67
|
+
|
|
68
|
+
thresholds = means + stds * self.threshold
|
|
69
|
+
|
|
70
|
+
df_fail = df > thresholds
|
|
71
|
+
if not self.only_greater:
|
|
72
|
+
df_fail = df_fail | (df < (means - stds * self.threshold))
|
|
73
|
+
|
|
74
|
+
out_dict = {}
|
|
75
|
+
for compound in self.compounds:
|
|
76
|
+
col = (compound, self.variable)
|
|
77
|
+
this_c_fail = df_fail[col]
|
|
78
|
+
out_dict[compound] = this_c_fail.loc[this_c_fail].index
|
|
79
|
+
|
|
80
|
+
return out_dict
|
|
81
|
+
|
|
82
|
+
def plot(self):
|
|
83
|
+
|
|
84
|
+
import matplotlib.pyplot as plt
|
|
85
|
+
|
|
86
|
+
fig, axes = plt.subplots(
|
|
87
|
+
len(self.compounds), 1, figsize=(6, 3 * len(self.compounds)), sharex=True
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
x = self.dt if hasattr(self, "dt") else self.df_train.index
|
|
91
|
+
x = pd.Series(x, index=self.df_train.index)
|
|
92
|
+
|
|
93
|
+
outliers = self.assign(self.df_train)
|
|
94
|
+
|
|
95
|
+
for i, compound in enumerate(self.compounds):
|
|
96
|
+
ax = axes[i]
|
|
97
|
+
col = (compound, self.variable)
|
|
98
|
+
ax.scatter(
|
|
99
|
+
x,
|
|
100
|
+
self.df_train[col],
|
|
101
|
+
s=1,
|
|
102
|
+
label="darkblue",
|
|
103
|
+
)
|
|
104
|
+
median = self.rolling_median[col]
|
|
105
|
+
std = self.rolling_std[col]
|
|
106
|
+
top, bottom = median + std * self.threshold, median - std * self.threshold
|
|
107
|
+
|
|
108
|
+
ax.fill_between(
|
|
109
|
+
x,
|
|
110
|
+
top,
|
|
111
|
+
bottom,
|
|
112
|
+
color="lightgray",
|
|
113
|
+
label="Rolling threshold",
|
|
114
|
+
alpha=0.5,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
outlier_indices = outliers[compound]
|
|
118
|
+
ax.scatter(
|
|
119
|
+
x.loc[outlier_indices],
|
|
120
|
+
self.df_train.loc[outlier_indices, col],
|
|
121
|
+
s=10,
|
|
122
|
+
marker="x",
|
|
123
|
+
color="red",
|
|
124
|
+
label="Extreme values",
|
|
125
|
+
)
|
|
126
|
+
ax.set_title(
|
|
127
|
+
f"{compound} +- {self.threshold} std",
|
|
128
|
+
# Under teh top line
|
|
129
|
+
y=0.8,
|
|
130
|
+
)
|
|
131
|
+
ax.tick_params(axis="x", rotation=25)
|
|
132
|
+
|
|
133
|
+
return fig, axes
|
avoca/testing/df.py
CHANGED
avoca/testing/utils.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def make_dt_index(df: pd.DataFrame | pd.Index) -> pd.DataFrame | pd.Index:
|
|
5
|
+
"""Create a datetime index for the dataframe."""
|
|
6
|
+
index = pd.date_range(start="2023-01-01", periods=len(df), freq="h")
|
|
7
|
+
if isinstance(df, pd.Index):
|
|
8
|
+
return index
|
|
9
|
+
return df.set_index(index)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avoca
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.14.0
|
|
4
4
|
Summary: @voc@: Quality assessement of measurement data
|
|
5
5
|
Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
|
|
6
6
|
Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
|
|
@@ -1,35 +1,38 @@
|
|
|
1
1
|
avoca/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
avoca/export_nas.py,sha256=B9B2iFSzB3f83nCfe2_vzouRblthK0_dGF8W3o0Kt5Y,155
|
|
3
3
|
avoca/flagging.py,sha256=tg6k_TVHRXiMJCAij_kUS-S2gSshYt7FKvQ0nJdljYs,2328
|
|
4
|
-
avoca/flags.py,sha256=
|
|
4
|
+
avoca/flags.py,sha256=wobuZoIJh6dFsdiqqYJLZ_AHe4pcFE9tjuoimNXLjIQ,1428
|
|
5
5
|
avoca/io.py,sha256=67D5x1qkLqWC7wWehyOfX96L4H3-tn9x2V4jMCoIRqA,729
|
|
6
6
|
avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
|
|
7
7
|
avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
|
|
8
|
+
avoca/plots.py,sha256=UjfUgbfxd2veMOGHtSvJycru-w3gWsGjOVO__I-zqzQ,4205
|
|
8
9
|
avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
|
|
9
10
|
avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
|
|
10
11
|
avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
avoca/bindings/ebas.py,sha256=
|
|
12
|
-
avoca/bindings/ebas_flags.py,sha256=
|
|
12
|
+
avoca/bindings/ebas.py,sha256=vil4u4G6jGJrE12Z7nBvGpJuTAT9QyvbNNyWsWr5UaM,19306
|
|
13
|
+
avoca/bindings/ebas_flags.py,sha256=N-JpmA6WCFjcYhvt7XjyOZMbR7vCdyPV6uHBlF45UJU,2397
|
|
13
14
|
avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
|
|
14
15
|
avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
|
|
15
16
|
avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
|
|
16
17
|
avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
|
|
17
|
-
avoca/bindings/qa_tool.py,sha256=
|
|
18
|
+
avoca/bindings/qa_tool.py,sha256=ninHe3mrJ8GULxRCkRTZixw-vmNhqu4zwwONd5aXd1Q,9735
|
|
18
19
|
avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
|
|
19
20
|
avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
-
avoca/qa_class/abstract.py,sha256=
|
|
21
|
+
avoca/qa_class/abstract.py,sha256=CLt-6WFhZhrvKTLVHpdbJYMFM50VPOGiO-GG6IRPWzA,6011
|
|
21
22
|
avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
|
|
22
23
|
avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
|
|
23
24
|
avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
|
|
25
|
+
avoca/qa_class/rolling.py,sha256=CQ2E0qJ7FxDT4TucItkJRmkqhzMoNSnwtVQQ_HzX9Jk,4059
|
|
24
26
|
avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
|
|
25
27
|
avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
|
|
26
28
|
avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
|
|
27
29
|
avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
|
|
28
|
-
avoca/testing/df.py,sha256=
|
|
30
|
+
avoca/testing/df.py,sha256=UQm6TdTDVRWvRNM5WnSWh6vdvDR1lqLNg0ti-B1L760,1865
|
|
31
|
+
avoca/testing/utils.py,sha256=jVV0mIwLIpr0UBLMk8RjZH5J_dV_b6Gugxzo_WRgWU0,308
|
|
29
32
|
avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
|
|
30
33
|
avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
|
|
31
34
|
avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
|
|
32
|
-
avoca-0.
|
|
33
|
-
avoca-0.
|
|
34
|
-
avoca-0.
|
|
35
|
-
avoca-0.
|
|
35
|
+
avoca-0.14.0.dist-info/METADATA,sha256=CayW94kozHUxF8sbKxE0pnWZnS0W5cjkEUKU7_QfgEc,1570
|
|
36
|
+
avoca-0.14.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
37
|
+
avoca-0.14.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
|
|
38
|
+
avoca-0.14.0.dist-info/RECORD,,
|
|
File without changes
|