avoca 0.15.1__py3-none-any.whl → 0.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avoca/bindings/ebas.py +56 -45
- avoca/bindings/ebas_flags.py +6 -0
- avoca/bindings/gcwerks.py +11 -3
- avoca/flags.py +8 -0
- avoca/io.py +5 -0
- avoca/testing/utils.py +1 -1
- {avoca-0.15.1.dist-info → avoca-0.17.0.dist-info}/METADATA +1 -1
- {avoca-0.15.1.dist-info → avoca-0.17.0.dist-info}/RECORD +10 -10
- {avoca-0.15.1.dist-info → avoca-0.17.0.dist-info}/WHEEL +0 -0
- {avoca-0.15.1.dist-info → avoca-0.17.0.dist-info}/licenses/LICENCE.txt +0 -0
avoca/bindings/ebas.py
CHANGED
|
@@ -19,6 +19,7 @@ from nilutility.datetime_helper import DatetimeInterval
|
|
|
19
19
|
|
|
20
20
|
from avoca.bindings.ebas_flags import ebas_flag_to_avoca, flags_to_ebas, nan_flags
|
|
21
21
|
from avoca.flags import QA_Flag
|
|
22
|
+
from avoca.utils import compounds_from_df
|
|
22
23
|
|
|
23
24
|
logger = logging.getLogger(__name__)
|
|
24
25
|
|
|
@@ -31,15 +32,29 @@ ebas_compname_of_var = {
|
|
|
31
32
|
ebas_compname_to_var = {v: k for k, v in ebas_compname_of_var.items()}
|
|
32
33
|
|
|
33
34
|
|
|
34
|
-
# Additional variables that can be in the dataset (not compound dependant)
|
|
35
|
-
additional_vars = [
|
|
36
|
-
"temperature",
|
|
37
|
-
"pressure",
|
|
38
|
-
]
|
|
39
|
-
|
|
40
35
|
titles = {
|
|
41
36
|
"temperature": "T_inlet",
|
|
42
37
|
"pressure": "P_inlet",
|
|
38
|
+
"volume_sample": "svol",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
unit_of_var = {
|
|
42
|
+
"C": "pmol/mol",
|
|
43
|
+
"conc_calib": "pmol/mol",
|
|
44
|
+
"rt": "s",
|
|
45
|
+
"w": "s",
|
|
46
|
+
"area": "area_unit",
|
|
47
|
+
"temperature": "K",
|
|
48
|
+
"pressure": "hPa",
|
|
49
|
+
"volume_sample": "mL",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
ebas_varname_of_var = {
|
|
53
|
+
"rt": "rt",
|
|
54
|
+
"w": "pw",
|
|
55
|
+
"area": "pa",
|
|
56
|
+
"conc_calib": "cal",
|
|
57
|
+
"volume_sample": "sample_volume",
|
|
43
58
|
}
|
|
44
59
|
|
|
45
60
|
|
|
@@ -51,6 +66,17 @@ class DataLevel(IntEnum):
|
|
|
51
66
|
QA_CONCS = 2
|
|
52
67
|
|
|
53
68
|
|
|
69
|
+
vars_to_export = {
|
|
70
|
+
DataLevel.AREAS: ["area", "rt", "w", "conc_calib"],
|
|
71
|
+
DataLevel.CONCS: ["C"],
|
|
72
|
+
DataLevel.QA_CONCS: ["C"],
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# Additional variables that can be in the dataset (not compound dependant)
|
|
76
|
+
additional_vars = {
|
|
77
|
+
DataLevel.AREAS: ["volume_sample", "temperature", "pressure"],
|
|
78
|
+
}
|
|
79
|
+
valid_additional_vars = sum(additional_vars.values(), [])
|
|
54
80
|
concs_data_levels = [DataLevel.CONCS, DataLevel.QA_CONCS]
|
|
55
81
|
|
|
56
82
|
|
|
@@ -64,8 +90,8 @@ def data_level_after_qa(data_level: DataLevel) -> DataLevel:
|
|
|
64
90
|
def set_dataframe(
|
|
65
91
|
nas,
|
|
66
92
|
df_export: pd.DataFrame,
|
|
67
|
-
compounds: dict[str, str],
|
|
68
|
-
data_level: DataLevel,
|
|
93
|
+
compounds: dict[str, str] | None = None,
|
|
94
|
+
data_level: DataLevel = DataLevel.CONCS,
|
|
69
95
|
start_offset: timedelta | None = None,
|
|
70
96
|
end_offset: timedelta | None = None,
|
|
71
97
|
flag_all: list[int] = [],
|
|
@@ -108,28 +134,8 @@ def set_dataframe(
|
|
|
108
134
|
)
|
|
109
135
|
]
|
|
110
136
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
DataLevel.CONCS: ["C"],
|
|
114
|
-
DataLevel.QA_CONCS: ["C"],
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
unit_of_var = {
|
|
118
|
-
"C": "pmol/mol",
|
|
119
|
-
"conc_calib": "pmol/mol",
|
|
120
|
-
"rt": "s",
|
|
121
|
-
"w": "s",
|
|
122
|
-
"area": "area_unit",
|
|
123
|
-
"temperature": "K",
|
|
124
|
-
"pressure": "hPa",
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
ebas_varname_of_var = {
|
|
128
|
-
"rt": "rt",
|
|
129
|
-
"w": "pw",
|
|
130
|
-
"area": "pa",
|
|
131
|
-
"conc_calib": "cal",
|
|
132
|
-
}
|
|
137
|
+
if compounds is None:
|
|
138
|
+
compounds = {c: c for c in compounds_from_df(df_export)}
|
|
133
139
|
|
|
134
140
|
dict_flags_to_ebas = flags_to_ebas.copy()
|
|
135
141
|
|
|
@@ -151,12 +157,12 @@ def set_dataframe(
|
|
|
151
157
|
)
|
|
152
158
|
)
|
|
153
159
|
|
|
154
|
-
for var in additional_vars:
|
|
160
|
+
for var in additional_vars.get(data_level, []):
|
|
155
161
|
var_col = ("-", var)
|
|
156
162
|
if var_col not in df_export.columns:
|
|
157
163
|
continue
|
|
158
164
|
metadata = DataObject()
|
|
159
|
-
metadata.comp_name = var
|
|
165
|
+
metadata.comp_name = ebas_varname_of_var.get(var, var)
|
|
160
166
|
metadata.title = titles.get(var, var)
|
|
161
167
|
metadata.matrix = "instrument"
|
|
162
168
|
metadata.unit = unit_of_var[var]
|
|
@@ -233,9 +239,9 @@ def set_dataframe(
|
|
|
233
239
|
if var == "conc_calib":
|
|
234
240
|
# Set Nominal/measured=Calibration gas concentration
|
|
235
241
|
vnum = len(nas.variables) - 1
|
|
236
|
-
nas.add_var_characteristics(
|
|
237
|
-
|
|
238
|
-
)
|
|
242
|
+
# nas.add_var_characteristics(
|
|
243
|
+
# vnum, "Nominal/measured", "Calibration gas concentration"
|
|
244
|
+
# )
|
|
239
245
|
|
|
240
246
|
metadatas[sub] = metadata
|
|
241
247
|
return metadatas
|
|
@@ -327,7 +333,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
|
|
|
327
333
|
clean_for_df[("-", "status")] = calib_ids.astype(int)
|
|
328
334
|
continue
|
|
329
335
|
|
|
330
|
-
if comp_name
|
|
336
|
+
if comp_name == "sample_volume":
|
|
337
|
+
comp_name = "volume_sample"
|
|
338
|
+
|
|
339
|
+
if comp_name in valid_additional_vars:
|
|
331
340
|
clean_for_df[("-", comp_name)] = np.array(values, dtype=float)
|
|
332
341
|
continue
|
|
333
342
|
|
|
@@ -335,27 +344,27 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
|
|
|
335
344
|
comp_name = comp_name.split("_")
|
|
336
345
|
if len(comp_name) == 1:
|
|
337
346
|
# Can be either concentration measured or calibration
|
|
338
|
-
|
|
347
|
+
compound = comp_name[0]
|
|
339
348
|
title: str = metadata["title"]
|
|
340
349
|
if title.endswith("_cal"):
|
|
341
350
|
variable = "cal"
|
|
342
351
|
else:
|
|
343
352
|
variable = "C"
|
|
344
353
|
elif len(comp_name) == 2:
|
|
345
|
-
|
|
354
|
+
compound, variable = comp_name
|
|
346
355
|
elif len(comp_name) == 3:
|
|
347
|
-
|
|
356
|
+
compound, var_first, var_second = comp_name
|
|
348
357
|
variable = f"{var_first}_{var_second}"
|
|
349
358
|
elif len(comp_name) == 4 and comp_name[-1] == "compounds":
|
|
350
359
|
# Concentration of merged compounds
|
|
351
|
-
|
|
360
|
+
compound = "_".join(comp_name)
|
|
352
361
|
variable = "C"
|
|
353
362
|
else:
|
|
354
363
|
logger.warning(f"passing {comp_name}, could not be understood. Skipping.")
|
|
355
364
|
continue
|
|
356
365
|
|
|
357
|
-
if
|
|
358
|
-
compounds.append(
|
|
366
|
+
if compound not in compounds:
|
|
367
|
+
compounds.append(compound)
|
|
359
368
|
|
|
360
369
|
# Convert the variable name to the avoca format
|
|
361
370
|
if variable == "cal":
|
|
@@ -363,10 +372,12 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
|
|
|
363
372
|
variable = "conc_calib"
|
|
364
373
|
elif variable != "C":
|
|
365
374
|
if variable not in ebas_compname_to_var:
|
|
366
|
-
raise ValueError(
|
|
375
|
+
raise ValueError(
|
|
376
|
+
f"Variable {variable} from {comp_name=} not recognized"
|
|
377
|
+
)
|
|
367
378
|
variable = ebas_compname_to_var[variable]
|
|
368
379
|
|
|
369
|
-
clean_for_df[(
|
|
380
|
+
clean_for_df[(compound, variable)] = np.array(values, dtype=float)
|
|
370
381
|
|
|
371
382
|
flag_serie = pd.Series(
|
|
372
383
|
[
|
|
@@ -375,7 +386,7 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
|
|
|
375
386
|
],
|
|
376
387
|
dtype=int,
|
|
377
388
|
)
|
|
378
|
-
flag_col = (
|
|
389
|
+
flag_col = (compound, "flag")
|
|
379
390
|
if variable == "conc_calib":
|
|
380
391
|
# Calibration will have missing values for air smaples
|
|
381
392
|
# so we need to remove the missing values
|
avoca/bindings/ebas_flags.py
CHANGED
|
@@ -9,6 +9,12 @@ flags_to_ebas: dict[QA_Flag, int] = {
|
|
|
9
9
|
QA_Flag.EXTREME_VALUE: 458,
|
|
10
10
|
QA_Flag.CALIBRATION: 683, # I Invalid due to calibration. Used for Level 0.
|
|
11
11
|
QA_Flag.BLANK: 684, # Invalid due to zero/span check. Used for Level 0.
|
|
12
|
+
# Invalid due to laboratory standard measurement. Level 0.
|
|
13
|
+
QA_Flag.LABORATORY_STANDARD: 688,
|
|
14
|
+
# Invalid due to working standard measurement. Level 0.
|
|
15
|
+
QA_Flag.WORKING_STANDARD: 689,
|
|
16
|
+
# Invalid due to target standard measurement. Level 0.
|
|
17
|
+
QA_Flag.TARGET_MEASUREMENT: 690,
|
|
12
18
|
QA_Flag.HEIGHT_INTEGRATION: 0, # Valid
|
|
13
19
|
QA_Flag.UNCORRELATED: 0, # Valid
|
|
14
20
|
QA_Flag.MET_OFFICE_BASELINE: 0, # Valid
|
avoca/bindings/gcwerks.py
CHANGED
|
@@ -90,6 +90,8 @@ flag_values = {
|
|
|
90
90
|
"F": QA_Flag.INVALIDATED_EXT,
|
|
91
91
|
# X: An X flag is an 'un-do' the flag. If there is an automatic flag by GCWerks, but I decide I want that data point still included, I have the option to set an X flag.
|
|
92
92
|
"X": ValidFlag,
|
|
93
|
+
# Nans read from pandas
|
|
94
|
+
pd.NA: QA_Flag.MISSING,
|
|
93
95
|
}
|
|
94
96
|
|
|
95
97
|
# Show the flags and the columns they are applied to
|
|
@@ -203,6 +205,11 @@ def read_gcwerks(
|
|
|
203
205
|
format=datetime_format,
|
|
204
206
|
)
|
|
205
207
|
|
|
208
|
+
if not df[("-", "volume")].isna().all():
|
|
209
|
+
df[("-", "volume_sample")] = df[("-", "volume")]
|
|
210
|
+
# Drop useless columns
|
|
211
|
+
df = df.drop(columns=[("-", "date"), ("-", "time"), ("-", "volume")])
|
|
212
|
+
|
|
206
213
|
substances = []
|
|
207
214
|
|
|
208
215
|
for col in df.columns:
|
|
@@ -224,9 +231,9 @@ def read_gcwerks(
|
|
|
224
231
|
flags: pd.Series = serie_str.str[-1]
|
|
225
232
|
if col[1] in cols_float:
|
|
226
233
|
# Remove the flag value when given
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
234
|
+
mask_flag_allowed = flags.isin(flags_allowed)
|
|
235
|
+
serie_str = serie_str.where(~mask_flag_allowed, serie_str.str[:-1])
|
|
236
|
+
|
|
230
237
|
# Convert the serie to numeric
|
|
231
238
|
df[col] = pd.to_numeric(serie_str, errors="coerce")
|
|
232
239
|
|
|
@@ -359,6 +366,7 @@ def export(
|
|
|
359
366
|
"time",
|
|
360
367
|
"type",
|
|
361
368
|
"sample",
|
|
369
|
+
"volume",
|
|
362
370
|
f"{variables_str}",
|
|
363
371
|
f"> {out_file}",
|
|
364
372
|
)
|
avoca/flags.py
CHANGED
|
@@ -46,6 +46,14 @@ class QA_Flag(Flag):
|
|
|
46
46
|
# Invalid Values
|
|
47
47
|
INVALID_VALUES = auto()
|
|
48
48
|
|
|
49
|
+
# Target measurement
|
|
50
|
+
TARGET_MEASUREMENT = auto()
|
|
51
|
+
# Laboratory standard
|
|
52
|
+
LABORATORY_STANDARD = auto()
|
|
53
|
+
# Working standard
|
|
54
|
+
WORKING_STANDARD = auto()
|
|
55
|
+
|
|
56
|
+
|
|
49
57
|
# Flags that are considered to have missing values
|
|
50
58
|
nan_flags = [
|
|
51
59
|
QA_Flag.MISSING,
|
avoca/io.py
CHANGED
|
@@ -8,6 +8,11 @@ date_format = "%Y-%m-%d %H:%M:%S"
|
|
|
8
8
|
|
|
9
9
|
def to_csv(df: pd.DataFrame, path: Path, **kwargs) -> None:
|
|
10
10
|
"""Export a dataframe to a csv file."""
|
|
11
|
+
|
|
12
|
+
# Put the columsn with "-" first
|
|
13
|
+
cols = df.columns.tolist()
|
|
14
|
+
cols_sorted = sorted(cols, key=lambda x: (x[0] != "-", x))
|
|
15
|
+
df = df[cols_sorted]
|
|
11
16
|
df.to_csv(path, index=False, date_format=date_format, **kwargs)
|
|
12
17
|
|
|
13
18
|
|
avoca/testing/utils.py
CHANGED
|
@@ -3,7 +3,7 @@ import pandas as pd
|
|
|
3
3
|
|
|
4
4
|
def make_dt_index(df: pd.DataFrame | pd.Index) -> pd.DataFrame | pd.Index:
|
|
5
5
|
"""Create a datetime index for the dataframe."""
|
|
6
|
-
index = pd.date_range(start="2023-01-01", periods=len(df), freq="h")
|
|
6
|
+
index = pd.date_range(start="2023-01-01", periods=len(df), freq="h", unit="s")
|
|
7
7
|
if isinstance(df, pd.Index):
|
|
8
8
|
return index
|
|
9
9
|
return df.set_index(index)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: avoca
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.17.0
|
|
4
4
|
Summary: @voc@: Quality assessement of measurement data
|
|
5
5
|
Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
|
|
6
6
|
Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
avoca/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
avoca/export_nas.py,sha256=B9B2iFSzB3f83nCfe2_vzouRblthK0_dGF8W3o0Kt5Y,155
|
|
3
3
|
avoca/flagging.py,sha256=tg6k_TVHRXiMJCAij_kUS-S2gSshYt7FKvQ0nJdljYs,2328
|
|
4
|
-
avoca/flags.py,sha256=
|
|
5
|
-
avoca/io.py,sha256=
|
|
4
|
+
avoca/flags.py,sha256=9LF-e8bcUdBQmxtoXU3ysx7KRzZiU6bU6nhDLhZtowU,1599
|
|
5
|
+
avoca/io.py,sha256=VMGqSPdtPM5Xu4kugMbr6TaMS9-U6pnObMu2ERKhNxE,891
|
|
6
6
|
avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
|
|
7
7
|
avoca/manager.py,sha256=Faf3UyaCV58TMCZz6tWrLcY-W1WUtuh1aMP85yUVlmQ,5336
|
|
8
8
|
avoca/plots.py,sha256=zzoOJystasrKF2ikJLqcT8mlc-f-tu57vksXs-xRXv8,4424
|
|
9
9
|
avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
|
|
10
10
|
avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
|
|
11
11
|
avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
avoca/bindings/ebas.py,sha256=
|
|
13
|
-
avoca/bindings/ebas_flags.py,sha256=
|
|
12
|
+
avoca/bindings/ebas.py,sha256=3oWgN3teyEvQ9acpD2767A18IbgxjtbOdmXD79PLVrE,19793
|
|
13
|
+
avoca/bindings/ebas_flags.py,sha256=TEkmOI9Bia0C2KFO5GqLFXNIvcLak5yedlBFCKY5Gqg,2695
|
|
14
14
|
avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
|
|
15
|
-
avoca/bindings/gcwerks.py,sha256=
|
|
15
|
+
avoca/bindings/gcwerks.py,sha256=2Keff174EUjRwbBRDNKwGFEMr6TxJ2mpsjIN71DjUsQ,15188
|
|
16
16
|
avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
|
|
17
17
|
avoca/bindings/nabel.py,sha256=6OzaG1imFhOCVDQTO7YXvPQjbTfo4063w74yEuAVCEk,2991
|
|
18
18
|
avoca/bindings/qa_tool.py,sha256=hqsWUU99mYpkKfeULBoox4M2x7Bk0aYO4Q_8WGvt2og,11628
|
|
@@ -28,11 +28,11 @@ avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
|
|
|
28
28
|
avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
|
|
29
29
|
avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
|
|
30
30
|
avoca/testing/df.py,sha256=UQm6TdTDVRWvRNM5WnSWh6vdvDR1lqLNg0ti-B1L760,1865
|
|
31
|
-
avoca/testing/utils.py,sha256=
|
|
31
|
+
avoca/testing/utils.py,sha256=w0i-x3xG40JMxhoV_odVV7995VMKpUsHRYVJk2XYJ7I,318
|
|
32
32
|
avoca/utils/__init__.py,sha256=SZc1bHrQyg1DIYnbdUmANtUhnQWlJaMhPrDSWS8oVRY,1408
|
|
33
33
|
avoca/utils/flags_doc.py,sha256=jT1E0GN-B8ws_FyKGE20nlrKrgTHtoyjdo2r8RgYhwU,4294
|
|
34
34
|
avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
|
|
35
|
-
avoca-0.
|
|
36
|
-
avoca-0.
|
|
37
|
-
avoca-0.
|
|
38
|
-
avoca-0.
|
|
35
|
+
avoca-0.17.0.dist-info/METADATA,sha256=1V1osu38cBEsVtCqP4wIbe37PT4LlKv56jqJZk1OI38,1570
|
|
36
|
+
avoca-0.17.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
37
|
+
avoca-0.17.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
|
|
38
|
+
avoca-0.17.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|