avoca 0.14.0__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avoca/bindings/nabel.py +2 -3
- avoca/bindings/qa_tool.py +51 -10
- avoca/manager.py +1 -0
- avoca/plots.py +38 -34
- avoca/qa_class/rolling.py +3 -0
- avoca/utils/__init__.py +1 -1
- avoca/utils/flags_doc.py +3 -0
- {avoca-0.14.0.dist-info → avoca-0.15.1.dist-info}/METADATA +1 -1
- {avoca-0.14.0.dist-info → avoca-0.15.1.dist-info}/RECORD +11 -11
- {avoca-0.14.0.dist-info → avoca-0.15.1.dist-info}/WHEEL +0 -0
- {avoca-0.14.0.dist-info → avoca-0.15.1.dist-info}/licenses/LICENCE.txt +0 -0
avoca/bindings/nabel.py
CHANGED
|
@@ -4,7 +4,6 @@ import logging
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
import pandas as pd
|
|
6
6
|
|
|
7
|
-
|
|
8
7
|
logger = logging.getLogger(__name__)
|
|
9
8
|
|
|
10
9
|
|
|
@@ -54,8 +53,8 @@ def add_nabel_data(df: pd.DataFrame, df_nabel: pd.DataFrame) -> pd.DataFrame:
|
|
|
54
53
|
|
|
55
54
|
df_out = df.copy()
|
|
56
55
|
|
|
57
|
-
col_dt_start = ("
|
|
58
|
-
col_dt_end = ("
|
|
56
|
+
col_dt_start = ("-", "datetime_start")
|
|
57
|
+
col_dt_end = ("-", "datetime_end")
|
|
59
58
|
|
|
60
59
|
if col_dt_start not in df.columns or col_dt_end not in df.columns:
|
|
61
60
|
raise ValueError(
|
avoca/bindings/qa_tool.py
CHANGED
|
@@ -37,10 +37,11 @@ def export_EmpaQATool(
|
|
|
37
37
|
station: str = "XXX",
|
|
38
38
|
revision_date: datetime | None = None,
|
|
39
39
|
dataset: datetime | str | None = None,
|
|
40
|
-
export_names: dict[str, str] =
|
|
40
|
+
export_names: dict[str, str] | None = None,
|
|
41
41
|
datetime_offsets: tuple[timedelta, timedelta] | None = None,
|
|
42
42
|
substances: list[str] = [],
|
|
43
43
|
rounding_decimals: int = 4,
|
|
44
|
+
df_substances: pd.DataFrame | None = None,
|
|
44
45
|
) -> Path:
|
|
45
46
|
"""Export to the EmpaQATool format.
|
|
46
47
|
|
|
@@ -64,7 +65,17 @@ def export_EmpaQATool(
|
|
|
64
65
|
:arg datetime_offsets: Tuple of two timedelta to use for the start and end datetime
|
|
65
66
|
:arg substances: List of substances to export. You can also specify group names.
|
|
66
67
|
If not specified, this will use the substances from `df_substances`.
|
|
68
|
+
If a substance is present here and not in `df_substances`, it will still be exported.
|
|
67
69
|
:arg rounding_decimals: Number of decimals to round the values to.
|
|
70
|
+
:arg df_substances: DataFrame with substance information.
|
|
71
|
+
If provided, the substances to export will be taken from this dataframe.
|
|
72
|
+
Columns:
|
|
73
|
+
- index: substance name
|
|
74
|
+
- export: bool, whether to export the substance
|
|
75
|
+
- export_name: str, name to use in the export file
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
:returns: Path to the exported file.
|
|
68
79
|
|
|
69
80
|
"""
|
|
70
81
|
|
|
@@ -113,12 +124,42 @@ def export_EmpaQATool(
|
|
|
113
124
|
logger.debug(f"df_out: {df_out.head()}")
|
|
114
125
|
if not substances:
|
|
115
126
|
substances = compounds_from_df(df)
|
|
127
|
+
if df_substances is not None and "export" in df_substances.columns:
|
|
128
|
+
# Remove the substances that should not be exported
|
|
129
|
+
substances = [
|
|
130
|
+
s
|
|
131
|
+
for s in substances
|
|
132
|
+
if s not in df_substances.index or df_substances.loc[s, "export"]
|
|
133
|
+
]
|
|
116
134
|
|
|
117
135
|
remove_infs = lambda x: x.replace([np.inf, -np.inf], np.nan)
|
|
118
136
|
is_invalid = lambda x: x.isin([np.inf, -np.inf]) | pd.isna(x)
|
|
119
137
|
clean_col = lambda x: remove_infs(x).round(rounding_decimals).astype(str)
|
|
120
138
|
|
|
139
|
+
if export_names is None:
|
|
140
|
+
export_names = {}
|
|
141
|
+
|
|
142
|
+
if df_substances is not None and "export_name" in df_substances.columns:
|
|
143
|
+
# Read export names from the dataframe if provided
|
|
144
|
+
for substance in substances:
|
|
145
|
+
if not substance or substance not in df_substances.index:
|
|
146
|
+
continue
|
|
147
|
+
export_name_df = df_substances.loc[substance, "export_name"]
|
|
148
|
+
if not export_name_df or pd.isna(export_name_df):
|
|
149
|
+
continue
|
|
150
|
+
if substance in export_names and export_names[substance] != export_name_df:
|
|
151
|
+
logger.warning(
|
|
152
|
+
f"Substance {substance} found in both df_substances and"
|
|
153
|
+
" export_names. Using the name from export_names.\n"
|
|
154
|
+
f" - export_names (used): {export_names[substance]}\n"
|
|
155
|
+
f" - df_substances: {export_name_df}"
|
|
156
|
+
)
|
|
157
|
+
continue
|
|
158
|
+
export_names[substance] = export_name_df
|
|
159
|
+
|
|
121
160
|
for substance in substances:
|
|
161
|
+
if not substance:
|
|
162
|
+
continue
|
|
122
163
|
|
|
123
164
|
export_name = export_names.get(substance, substance)
|
|
124
165
|
|
|
@@ -234,12 +275,12 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
|
|
|
234
275
|
dt += shift
|
|
235
276
|
columns[("-", "datetime")] = dt
|
|
236
277
|
|
|
237
|
-
|
|
238
|
-
compounds = [
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
for compound in compounds:
|
|
278
|
+
# Last column is empty
|
|
279
|
+
compounds = [
|
|
280
|
+
"-".join(s[:-1]) for col in df.columns if len(s := col.split("-")) >= 2
|
|
281
|
+
]
|
|
242
282
|
|
|
283
|
+
for compound in compounds:
|
|
243
284
|
|
|
244
285
|
flag_col = f"{compound}-flag"
|
|
245
286
|
value_col = f"{compound}-value"
|
|
@@ -248,8 +289,8 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
|
|
|
248
289
|
|
|
249
290
|
mapping = {
|
|
250
291
|
"conc": value_col,
|
|
251
|
-
"u_expanded":acc_col,
|
|
252
|
-
"u_precision":precision_col,
|
|
292
|
+
"u_expanded": acc_col,
|
|
293
|
+
"u_precision": precision_col,
|
|
253
294
|
}
|
|
254
295
|
|
|
255
296
|
flag_values = (pd.to_numeric(df[flag_col]) * 1e3).astype(int).mod(1000)
|
|
@@ -263,10 +304,10 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
|
|
|
263
304
|
serie = pd.to_numeric(df[value])
|
|
264
305
|
mask_nan = flags == QA_Flag.MISSING.value
|
|
265
306
|
serie[mask_nan] = np.nan
|
|
266
|
-
columns[(compound, key)] = serie
|
|
307
|
+
columns[(compound, key)] = serie
|
|
267
308
|
|
|
268
309
|
columns[(compound, "flag")] = flags
|
|
269
|
-
|
|
310
|
+
|
|
270
311
|
mask_nan = columns[(compound, "conc")].isna()
|
|
271
312
|
columns[(compound, "flag")][mask_nan] |= QA_Flag.MISSING.value
|
|
272
313
|
|
avoca/manager.py
CHANGED
|
@@ -20,6 +20,7 @@ class AssignerManager:
|
|
|
20
20
|
_assigners_importpath = {
|
|
21
21
|
"RetentionTimeChecker": "avoca.qa_class.rt",
|
|
22
22
|
"ExtremeValues": "avoca.qa_class.zscore",
|
|
23
|
+
"RollingWindow": "avoca.qa_class.rolling",
|
|
23
24
|
"ExtremeConcentrations": "avoca.qa_class.concs",
|
|
24
25
|
"XY_Correlations": "avoca.qa_class.zscore",
|
|
25
26
|
"TestAssigner": "avoca.qa_class.test",
|
avoca/plots.py
CHANGED
|
@@ -77,28 +77,17 @@ def plot_yearly_plotly(
|
|
|
77
77
|
import plotly.graph_objects as go
|
|
78
78
|
|
|
79
79
|
dt_column = ("-", "datetime")
|
|
80
|
-
|
|
81
|
-
dt = df[dt_column]
|
|
80
|
+
|
|
82
81
|
if ("-", "type") in df.columns:
|
|
83
82
|
mask_air = df[("-", "type")] == "air"
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
if ("-", "type") in df_new.columns:
|
|
83
|
+
df = df[mask_air]
|
|
84
|
+
if df_new is not None and ("-", "type") in df_new.columns:
|
|
87
85
|
mask_air_new = df_new[("-", "type")] == "air"
|
|
88
86
|
df_new = df_new[mask_air_new]
|
|
89
87
|
|
|
88
|
+
dt = df[dt_column]
|
|
90
89
|
x = dt.dt.day_of_year + dt.dt.hour / 24.0
|
|
91
|
-
|
|
92
|
-
{
|
|
93
|
-
"conc": serie.values,
|
|
94
|
-
"year": dt.dt.year.values,
|
|
95
|
-
},
|
|
96
|
-
index=x.values,
|
|
97
|
-
)
|
|
98
|
-
# Break down by year, to have year as columns and conc as values
|
|
99
|
-
df_to_plot = df_to_plot.pivot_table(
|
|
100
|
-
index=df_to_plot.index, columns="year", values="conc"
|
|
101
|
-
)
|
|
90
|
+
|
|
102
91
|
fig = go.Figure()
|
|
103
92
|
|
|
104
93
|
hover_template = "Timestamp: %{text}<br>Conc: %{y:.2f} ppt"
|
|
@@ -110,29 +99,44 @@ def plot_yearly_plotly(
|
|
|
110
99
|
"hovertemplate": hover_template,
|
|
111
100
|
}
|
|
112
101
|
|
|
113
|
-
|
|
102
|
+
if (compound, "conc") in df:
|
|
103
|
+
serie = df[(compound, "conc")]
|
|
104
|
+
df_to_plot = pd.DataFrame(
|
|
105
|
+
{
|
|
106
|
+
"conc": serie.values,
|
|
107
|
+
"year": dt.dt.year.values,
|
|
108
|
+
},
|
|
109
|
+
index=x.values,
|
|
110
|
+
)
|
|
111
|
+
# Break down by year, to have year as columns and conc as values
|
|
112
|
+
df_to_plot = df_to_plot.pivot_table(
|
|
113
|
+
index=df_to_plot.index, columns="year", values="conc"
|
|
114
|
+
)
|
|
115
|
+
for year in df_to_plot.columns:
|
|
116
|
+
fig.add_trace(
|
|
117
|
+
go.Scatter(
|
|
118
|
+
x=df_to_plot.index,
|
|
119
|
+
y=df_to_plot[year],
|
|
120
|
+
name=str(year),
|
|
121
|
+
zorder=-year,
|
|
122
|
+
text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
|
|
123
|
+
**kwargs,
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
|
|
128
|
+
|
|
129
|
+
if df_new is not None and (compound, "conc") in df_new:
|
|
130
|
+
dt_new = df_new[dt_column]
|
|
114
131
|
fig.add_trace(
|
|
115
132
|
go.Scatter(
|
|
116
|
-
x=
|
|
117
|
-
y=
|
|
118
|
-
name=
|
|
119
|
-
|
|
120
|
-
text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
|
|
133
|
+
x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
|
|
134
|
+
y=df_new[(compound, "conc")],
|
|
135
|
+
name="New Data",
|
|
136
|
+
text=dt_new.dt.strftime("%y%m%d.%H%M"),
|
|
121
137
|
**kwargs,
|
|
122
138
|
)
|
|
123
139
|
)
|
|
124
|
-
x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
|
|
125
|
-
|
|
126
|
-
dt_new = df_new[dt_column]
|
|
127
|
-
fig.add_trace(
|
|
128
|
-
go.Scatter(
|
|
129
|
-
x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
|
|
130
|
-
y=df_new[(compound, "conc")],
|
|
131
|
-
name="New Data",
|
|
132
|
-
text=dt_new.dt.strftime("%y%m%d.%H%M"),
|
|
133
|
-
**kwargs,
|
|
134
|
-
)
|
|
135
|
-
)
|
|
136
140
|
fig.update_layout(
|
|
137
141
|
xaxis_title="Time of Year",
|
|
138
142
|
yaxis_title=f"{compound} (ppt)",
|
avoca/qa_class/rolling.py
CHANGED
|
@@ -28,6 +28,9 @@ class RollingWindow(ExtremeValues):
|
|
|
28
28
|
:param only_greater: If True, only values greater than the threshold will be flagged.
|
|
29
29
|
The values lower than the negative threshold will not be flagged.
|
|
30
30
|
By default, this is True if use_log_normal is True, and False otherwise.
|
|
31
|
+
:param rolling_window: The size of the rolling window as a `timedelta` object.
|
|
32
|
+
See `window` parameters in pandas documentation for more details.
|
|
33
|
+
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html#pandas-dataframe-rolling
|
|
31
34
|
"""
|
|
32
35
|
|
|
33
36
|
require_datetime_index = True
|
avoca/utils/__init__.py
CHANGED
|
@@ -13,7 +13,7 @@ def compounds_from_df(df: pd.DataFrame) -> list[str]:
|
|
|
13
13
|
Returns:
|
|
14
14
|
The compounds in the dataframe.
|
|
15
15
|
"""
|
|
16
|
-
return [c for c in df.columns.get_level_values(0).unique() if c
|
|
16
|
+
return [c for c in df.columns.get_level_values(0).unique() if c not in ["-", ""]]
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def runtypes_from_df(df: pd.DataFrame) -> list[str]:
|
avoca/utils/flags_doc.py
CHANGED
|
@@ -56,6 +56,9 @@ def parse_enum_comments(filepath: Path, enum_class_name: str) -> dict[Enum, str]
|
|
|
56
56
|
exec(code, module)
|
|
57
57
|
enum_cls = module[enum_class_name]
|
|
58
58
|
for name, comment in comment_dict.items():
|
|
59
|
+
if not hasattr(enum_cls, name):
|
|
60
|
+
# Probably somehwere else in the file
|
|
61
|
+
continue
|
|
59
62
|
enum_member = getattr(enum_cls, name)
|
|
60
63
|
enum_obj[enum_member] = comment
|
|
61
64
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avoca
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.15.1
|
|
4
4
|
Summary: @voc@: Quality assessement of measurement data
|
|
5
5
|
Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
|
|
6
6
|
Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
|
|
@@ -4,8 +4,8 @@ avoca/flagging.py,sha256=tg6k_TVHRXiMJCAij_kUS-S2gSshYt7FKvQ0nJdljYs,2328
|
|
|
4
4
|
avoca/flags.py,sha256=wobuZoIJh6dFsdiqqYJLZ_AHe4pcFE9tjuoimNXLjIQ,1428
|
|
5
5
|
avoca/io.py,sha256=67D5x1qkLqWC7wWehyOfX96L4H3-tn9x2V4jMCoIRqA,729
|
|
6
6
|
avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
|
|
7
|
-
avoca/manager.py,sha256=
|
|
8
|
-
avoca/plots.py,sha256=
|
|
7
|
+
avoca/manager.py,sha256=Faf3UyaCV58TMCZz6tWrLcY-W1WUtuh1aMP85yUVlmQ,5336
|
|
8
|
+
avoca/plots.py,sha256=zzoOJystasrKF2ikJLqcT8mlc-f-tu57vksXs-xRXv8,4424
|
|
9
9
|
avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
|
|
10
10
|
avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
|
|
11
11
|
avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -14,25 +14,25 @@ avoca/bindings/ebas_flags.py,sha256=N-JpmA6WCFjcYhvt7XjyOZMbR7vCdyPV6uHBlF45UJU,
|
|
|
14
14
|
avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
|
|
15
15
|
avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
|
|
16
16
|
avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
|
|
17
|
-
avoca/bindings/nabel.py,sha256=
|
|
18
|
-
avoca/bindings/qa_tool.py,sha256=
|
|
17
|
+
avoca/bindings/nabel.py,sha256=6OzaG1imFhOCVDQTO7YXvPQjbTfo4063w74yEuAVCEk,2991
|
|
18
|
+
avoca/bindings/qa_tool.py,sha256=hqsWUU99mYpkKfeULBoox4M2x7Bk0aYO4Q_8WGvt2og,11628
|
|
19
19
|
avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
|
|
20
20
|
avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
avoca/qa_class/abstract.py,sha256=CLt-6WFhZhrvKTLVHpdbJYMFM50VPOGiO-GG6IRPWzA,6011
|
|
22
22
|
avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
|
|
23
23
|
avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
|
|
24
24
|
avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
|
|
25
|
-
avoca/qa_class/rolling.py,sha256=
|
|
25
|
+
avoca/qa_class/rolling.py,sha256=m6KbfMdwSIDcXNTZqdth_I-YgZPnnUf8WAPSQGKBH6w,4324
|
|
26
26
|
avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
|
|
27
27
|
avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
|
|
28
28
|
avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
|
|
29
29
|
avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
|
|
30
30
|
avoca/testing/df.py,sha256=UQm6TdTDVRWvRNM5WnSWh6vdvDR1lqLNg0ti-B1L760,1865
|
|
31
31
|
avoca/testing/utils.py,sha256=jVV0mIwLIpr0UBLMk8RjZH5J_dV_b6Gugxzo_WRgWU0,308
|
|
32
|
-
avoca/utils/__init__.py,sha256=
|
|
33
|
-
avoca/utils/flags_doc.py,sha256=
|
|
32
|
+
avoca/utils/__init__.py,sha256=SZc1bHrQyg1DIYnbdUmANtUhnQWlJaMhPrDSWS8oVRY,1408
|
|
33
|
+
avoca/utils/flags_doc.py,sha256=jT1E0GN-B8ws_FyKGE20nlrKrgTHtoyjdo2r8RgYhwU,4294
|
|
34
34
|
avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
|
|
35
|
-
avoca-0.
|
|
36
|
-
avoca-0.
|
|
37
|
-
avoca-0.
|
|
38
|
-
avoca-0.
|
|
35
|
+
avoca-0.15.1.dist-info/METADATA,sha256=_KHPT1eKCbb2WV9ujy1-wLEDVhFuQKqUHIHm_ylHh4w,1570
|
|
36
|
+
avoca-0.15.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
37
|
+
avoca-0.15.1.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
|
|
38
|
+
avoca-0.15.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|