avoca 0.10.4__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avoca/bindings/ebas.py CHANGED
@@ -17,50 +17,11 @@ from ebas.io.file.nasa_ames import EbasNasaAmes
17
17
  from nilutility.datatypes import DataObject
18
18
  from nilutility.datetime_helper import DatetimeInterval
19
19
 
20
+ from avoca.bindings.ebas_flags import ebas_flag_to_avoca, flags_to_ebas
20
21
  from avoca.flags import QA_Flag
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
24
- # https://projects.nilu.no/ccc/flags/flags.html for more info on what ebas uses
25
- flags_to_ebas: dict[QA_Flag, int] = {
26
- QA_Flag.MISSING: 999, # M Missing measurement, unspecified reason
27
- QA_Flag.ZERO_NEG_CONC_EXT: 999,
28
- QA_Flag.INVALIDATED_EXT: 900, # H Hidden and invalidated by data originator
29
- # V Extremely high value, outside four times standard deviation in a lognormal distribution
30
- QA_Flag.EXTREME_VALUE: 458,
31
- QA_Flag.CALIBRATION: 683, # I Invalid due to calibration. Used for Level 0.
32
- QA_Flag.BLANK: 684, # Invalid due to zero/span check. Used for Level 0.
33
- QA_Flag.HEIGHT_INTEGRATION: 0, # Valid
34
- QA_Flag.UNCORRELATED: 0, # Valid
35
- QA_Flag.MET_OFFICE_BASELINE: 0, # Valid
36
- QA_Flag.BELOW_DETECTION_LIMIT: 147, # B Below detection limit
37
- QA_Flag.POLLUTION: 900,
38
- QA_Flag.SUSPICIOUS_RT: 900,
39
- QA_Flag.INVALID_VALUES: 999, # M Missing measurement, unspecified reason
40
- }
41
-
42
- ebas_flag_to_avoca: dict[int, QA_Flag] = {
43
- ebas_flag: avoca_flag for avoca_flag, ebas_flag in flags_to_ebas.items()
44
- }
45
- # Set some flags with Multiple values to the same value
46
- ebas_flag_to_avoca.pop(0) # 0 is valid in avoca
47
- ebas_flag_to_avoca[999] = QA_Flag.MISSING
48
- ebas_flag_to_avoca[900] = QA_Flag.INVALIDATED_EXT
49
- # Unspecified contamination or local influence, but considered valid
50
- ebas_flag_to_avoca[559] = QA_Flag.POLLUTION
51
- ebas_flag_to_avoca[685] = (
52
- QA_Flag.CALIBRATION
53
- ) # Invalid due to secondary standard gas measurement. Used for Level 0.
54
- ebas_flag_to_avoca[980] = (
55
- QA_Flag.CALIBRATION
56
- ) # Missing due to calibration or zero/span check
57
-
58
- missing_flags = set(QA_Flag) - set(flags_to_ebas.keys())
59
- if missing_flags:
60
- raise RuntimeError(
61
- f"Not all QA flags are mapped to Ebas flags. Missing: {missing_flags}"
62
- )
63
-
64
25
 
65
26
  ebas_compname_of_var = {
66
27
  "rt": "retention_time",
@@ -0,0 +1,59 @@
1
+ # https://projects.nilu.no/ccc/flags/flags.html for more info on what ebas uses
2
+ from avoca.flags import QA_Flag
3
+
4
+ flags_to_ebas: dict[QA_Flag, int] = {
5
+ QA_Flag.MISSING: 999, # M Missing measurement, unspecified reason
6
+ QA_Flag.ZERO_NEG_CONC_EXT: 999,
7
+ QA_Flag.INVALIDATED_EXT: 900, # H Hidden and invalidated by data originator
8
+ # V Extremely high value, outside four times standard deviation in a lognormal distribution
9
+ QA_Flag.EXTREME_VALUE: 458,
10
+ QA_Flag.CALIBRATION: 683, # I Invalid due to calibration. Used for Level 0.
11
+ QA_Flag.BLANK: 684, # Invalid due to zero/span check. Used for Level 0.
12
+ QA_Flag.HEIGHT_INTEGRATION: 0, # Valid
13
+ QA_Flag.UNCORRELATED: 0, # Valid
14
+ QA_Flag.MET_OFFICE_BASELINE: 0, # Valid
15
+ QA_Flag.BELOW_DETECTION_LIMIT: 147, # B Below detection limit
16
+ QA_Flag.POLLUTION: 900,
17
+ QA_Flag.SUSPICIOUS_RT: 900,
18
+ QA_Flag.INVALID_VALUES: 999, # M Missing measurement, unspecified reason
19
+ }
20
+
21
+ ebas_flag_to_avoca: dict[int, QA_Flag] = {
22
+ ebas_flag: avoca_flag for avoca_flag, ebas_flag in flags_to_ebas.items()
23
+ }
24
+ # Set some flags with Multiple values to the same value
25
+ ebas_flag_to_avoca.pop(0) # 0 is valid in avoca
26
+ ebas_flag_to_avoca[999] = QA_Flag.MISSING
27
+ ebas_flag_to_avoca[900] = QA_Flag.INVALIDATED_EXT
28
+ # Unspecified contamination or local influence, but considered valid
29
+ ebas_flag_to_avoca[559] = QA_Flag.POLLUTION
30
+ ebas_flag_to_avoca[685] = (
31
+ QA_Flag.CALIBRATION
32
+ ) # Invalid due to secondary standard gas measurement. Used for Level 0.
33
+ ebas_flag_to_avoca[980] = (
34
+ QA_Flag.CALIBRATION
35
+ ) # Missing due to calibration or zero/span check
36
+
37
+ missing_flags = set(QA_Flag) - set(flags_to_ebas.keys())
38
+ if missing_flags:
39
+ raise RuntimeError(
40
+ f"Not all QA flags are mapped to Ebas flags. Missing: {missing_flags}"
41
+ )
42
+
43
+ # priority of the flag to appear in the output
44
+ # Useful when you can select only one flag value
45
+ flag_order = [
46
+ QA_Flag.CALIBRATION,
47
+ QA_Flag.BLANK,
48
+ QA_Flag.HEIGHT_INTEGRATION,
49
+ QA_Flag.MET_OFFICE_BASELINE,
50
+ QA_Flag.BELOW_DETECTION_LIMIT,
51
+ QA_Flag.POLLUTION,
52
+ QA_Flag.SUSPICIOUS_RT,
53
+ QA_Flag.UNCORRELATED,
54
+ QA_Flag.EXTREME_VALUE,
55
+ QA_Flag.INVALIDATED_EXT,
56
+ QA_Flag.ZERO_NEG_CONC_EXT,
57
+ QA_Flag.MISSING,
58
+ QA_Flag.INVALID_VALUES,
59
+ ]
@@ -0,0 +1,209 @@
1
+ """Few modules for importing and exporting from https://voc-qc.nilu.no/
2
+
3
+ Originally taken from tucavoc.
4
+ """
5
+
6
+ import logging
7
+ import warnings
8
+ from datetime import datetime, timedelta
9
+ from pathlib import Path
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ import pandas.errors
14
+
15
+ from avoca.bindings.ebas_flags import flag_order, flags_to_ebas
16
+ from avoca.flags import QA_Flag
17
+ from avoca.utils import compounds_from_df
18
+
19
+
20
+ def number_of_digits_required(serie: pd.Series) -> int:
21
+ """Return the number of digits required for the calculation"""
22
+ # TODO: need to check if we need the actual int value, we can put a .9 at the end
23
+ if all(pd.isna(serie) | (serie == 0)):
24
+ # Only 2 will be required
25
+ return 2
26
+ else:
27
+ number_of_digits = np.log10(serie[serie > 0])
28
+ max_digits = number_of_digits[number_of_digits != np.inf]
29
+ if len(max_digits) == 0:
30
+ return 2
31
+ return int(max(np.max(max_digits), 0) + 2)
32
+
33
+
34
+ def export_EmpaQATool(
35
+ df: pd.DataFrame,
36
+ export_path: Path,
37
+ station: str = "XXX",
38
+ revision_date: datetime | None = None,
39
+ dataset: datetime | str | None = None,
40
+ export_names: dict[str, str] = {},
41
+ datetime_offsets: tuple[timedelta, timedelta] | None = None,
42
+ substances: list[str] = [],
43
+ rounding_decimals: int = 4,
44
+ ) -> Path:
45
+ """Export to the EmpaQATool format.
46
+
47
+ The exported file from the program can then be imported to
48
+ the tool on https://voc-qc.nilu.no/Import
49
+ The specs fro that file can be found in
50
+ https://voc-qc.nilu.no/doc/CSVImport_FormatSpecifications.pdf
51
+
52
+ This will add the additional data from the dataframe.
53
+
54
+ The file genereated will be named:
55
+ export_path/[station]_[dataset]_[revision].csv
56
+
57
+ :arg df: Calculation dataframe
58
+ :arg export_path: Path (directory) to export the file
59
+ :arg station: Station name to use in the file name
60
+ :arg revision_date: Revision date as datetime to use in the file name
61
+ :arg dataset: Dataset name as datetime or string to use in the file name
62
+ :arg export_names: Dictionary of substance names to use in the file name
63
+ The keys are the substance names and the values are the names to use in the file.
64
+ :arg datetime_offsets: Tuple of two timedelta to use for the start and end datetime
65
+ :arg substances: List of substances to export. You can also specify group names.
66
+ If not specified, this will use the substances from `df_substances`.
67
+ :arg rounding_decimals: Number of decimals to round the values to.
68
+
69
+ """
70
+
71
+ logger = logging.getLogger(__name__)
72
+
73
+ warnings.filterwarnings(
74
+ action="ignore",
75
+ category=pandas.errors.PerformanceWarning,
76
+ module="pandas",
77
+ )
78
+
79
+ # fmt = "%Y-%m-%d %H:%M:%S"
80
+ fmt = "%d.%m.%Y %H:%M:%S"
81
+
82
+ need_datetime_col = ("-", "datetime_start") not in df.columns and (
83
+ "-",
84
+ "datetime_end",
85
+ ) not in df.columns
86
+
87
+ if need_datetime_col:
88
+ if ("-", "datetime") not in df.columns:
89
+ df[("-", "datetime")] = df.index
90
+ # Check type of the datetime column
91
+ if not pd.api.types.is_datetime64_any_dtype(df[("-", "datetime")]):
92
+ raise ValueError(
93
+ "The datetime column is not of type datetime64. "
94
+ "Please convert it to datetime64."
95
+ "Or provide ()"
96
+ )
97
+ if datetime_offsets is None:
98
+ raise ValueError(
99
+ "No datetime_start or datetime_end column in the dataframe. "
100
+ "Please provide the datetime_offsets to specify."
101
+ )
102
+
103
+ df[("-", "datetime_start")] = df[("-", "datetime")] + datetime_offsets[0]
104
+ df[("-", "datetime_end")] = df[("-", "datetime")] + datetime_offsets[1]
105
+
106
+ df_out = pd.DataFrame(
107
+ {
108
+ "start": df[("-", "datetime_start")].dt.strftime(fmt),
109
+ "end": df[("-", "datetime_end")].dt.strftime(fmt),
110
+ },
111
+ index=df.index,
112
+ )
113
+ logger.debug(f"df_out: {df_out.head()}")
114
+ if not substances:
115
+ substances = compounds_from_df(df)
116
+
117
+ remove_infs = lambda x: x.replace([np.inf, -np.inf], np.nan)
118
+ is_invalid = lambda x: x.isin([np.inf, -np.inf]) | pd.isna(x)
119
+ clean_col = lambda x: remove_infs(x).round(rounding_decimals).astype(str)
120
+
121
+ for substance in substances:
122
+
123
+ export_name = export_names.get(substance, substance)
124
+
125
+ conc_col = (
126
+ (substance, "conc")
127
+ if (substance, "conc") in df.columns
128
+ else (substance, "C")
129
+ )
130
+ u_expanded_col = (substance, "u_expanded")
131
+ u_precision_col = (substance, "u_precision")
132
+ flag_col = (substance, "flag")
133
+
134
+ mask_invalid = (
135
+ (
136
+ df[flag_col] & (QA_Flag.MISSING.value + QA_Flag.INVALIDATED_EXT.value)
137
+ ).astype(bool)
138
+ | is_invalid(df[conc_col])
139
+ | (
140
+ is_invalid(df[u_expanded_col])
141
+ if u_expanded_col in df.columns
142
+ else False
143
+ )
144
+ | (
145
+ is_invalid(df[u_precision_col])
146
+ if u_precision_col in df.columns
147
+ else False
148
+ )
149
+ )
150
+
151
+ logger.debug(f"mask_invalid: {mask_invalid}")
152
+ # Flag the invalids
153
+ df.loc[mask_invalid, flag_col] ^= QA_Flag.INVALID_VALUES.value
154
+
155
+ # Convert to str so we can control the formatting
156
+ df_out[f"{export_name}-Value"] = clean_col(df[conc_col])
157
+
158
+ # Input the missing values as 9. see issue #7 gitlab.empa.ch
159
+ df_out.loc[mask_invalid, f"{export_name}-Value"] = (
160
+ "9" * number_of_digits_required(df[conc_col])
161
+ )
162
+
163
+ if u_expanded_col in df.columns:
164
+ # Convert to str so we can control the formatting
165
+ df_out[f"{export_name}-Accuracy"] = clean_col(df[u_expanded_col])
166
+ # Input the missing values as 9. see issue #7 gitlab.empa.ch
167
+ df_out.loc[mask_invalid, f"{export_name}-Accuracy"] = (
168
+ "9" * number_of_digits_required(df[u_expanded_col])
169
+ )
170
+
171
+ if u_precision_col in df.columns:
172
+ # Convert to str so we can control the formatting
173
+ df_out[f"{export_name}-Precision"] = clean_col(df[u_precision_col])
174
+
175
+ # Input the missing values as 9. see issue #7 gitlab.empa.ch
176
+ df_out.loc[mask_invalid, f"{export_name}-Precision"] = (
177
+ "9" * number_of_digits_required(df[u_precision_col])
178
+ )
179
+
180
+ flag_col_out = f"{export_name}-Flag"
181
+ df_out[flag_col_out] = 0.0
182
+ for flag in flag_order:
183
+ df_out.loc[
184
+ (df[flag_col].values & flag.value).astype(bool), flag_col_out
185
+ ] = flags_to_ebas[flag]
186
+ df_out[flag_col_out] = (df_out[flag_col_out] * 1e-3).map("{:.3f}".format)
187
+
188
+ export_path.mkdir(exist_ok=True)
189
+
190
+ dt_format = "%Y%m%d"
191
+ if dataset is None:
192
+ dataset = datetime.strptime(df_out["start"].iloc[0], fmt).strftime(dt_format)
193
+
194
+ if revision_date is None:
195
+ revision_date = datetime.now().strftime(dt_format)
196
+
197
+ # [station]_[dataset]_[revision]
198
+ file_name = f"{station}_{dataset}_{revision_date}"
199
+
200
+ out_filepath = Path(export_path, file_name).with_suffix(".csv")
201
+ df_out.to_csv(
202
+ out_filepath,
203
+ sep=";",
204
+ index=False,
205
+ encoding="utf-8",
206
+ )
207
+ logger.info(f"Exported to `{out_filepath}`")
208
+
209
+ return out_filepath
avoca/testing/df.py CHANGED
@@ -23,6 +23,16 @@ simple_df = pd.DataFrame(
23
23
  ),
24
24
  )
25
25
 
26
+ invalids_df = pd.DataFrame(
27
+ np.transpose([[1.0, 1.1, 0.8, 0.9], [1.0, np.inf, np.nan, -0.3]]),
28
+ columns=pd.MultiIndex.from_tuples(
29
+ [
30
+ ("compA", "C"),
31
+ ("compB", "C"),
32
+ ]
33
+ ),
34
+ )
35
+
26
36
  compab_multiindex = pd.MultiIndex.from_tuples(
27
37
  [
28
38
  ("compA", "test_var"),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avoca
3
- Version: 0.10.4
3
+ Version: 0.11.0
4
4
  Summary: @voc@: Quality assessement of measurement data
5
5
  Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
6
6
  Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
@@ -8,11 +8,13 @@ avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
8
8
  avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
9
9
  avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
10
10
  avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- avoca/bindings/ebas.py,sha256=U5RhjQuEvYcEQQ17XDj2HOQ_Wr1WX03aR_kL3BVmsO8,18975
11
+ avoca/bindings/ebas.py,sha256=Xe0TkV4fAm0KJGsEUAlkRyHsohL_2DSZFc5pPE41OS0,17217
12
+ avoca/bindings/ebas_flags.py,sha256=ls8cEKPC2QjlkWyqiytyjFfemqM8ot8suys_Qi5Xx1o,2352
12
13
  avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
13
14
  avoca/bindings/gcwerks.py,sha256=pTwZhSuoD4usER1-JhQJtgj1KUcZR1ZN1loZMCSd3TQ,14651
14
15
  avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
15
16
  avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
17
+ avoca/bindings/qa_tool.py,sha256=ZPtQo8dHo6wooIlc9Vzk8y91Qgso-RBtGR_h2TAZQ24,7583
16
18
  avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
17
19
  avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  avoca/qa_class/abstract.py,sha256=4s8GgkeC3WbMnNxygajhawh7TU9v13i-SP0j5b5YOMc,5432
@@ -23,11 +25,11 @@ avoca/qa_class/rt.py,sha256=t927H_o0Kn-VwEkG9TW33MSDW_2of2-pBX_gTMuaXA0,3817
23
25
  avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
24
26
  avoca/qa_class/zscore.py,sha256=HqOxV45smhXqcv2XrB7W7plE9RoHzBGVEAbmuwsiv7w,16696
25
27
  avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
26
- avoca/testing/df.py,sha256=D1ONXe2b6vuM68sTIh318dcRRqodQ5KQioiVMl6OJYo,1592
28
+ avoca/testing/df.py,sha256=Nc0GUYTApZgYyUTMnHMTbSKLiA5ty9Bg7gUGtnoFYMI,1826
27
29
  avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
28
30
  avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
29
31
  avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
30
- avoca-0.10.4.dist-info/METADATA,sha256=usLBL-Kyms4faFat9-TYcYtw-U-jSqFDgMCQeZCFdjc,1570
31
- avoca-0.10.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
32
- avoca-0.10.4.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
33
- avoca-0.10.4.dist-info/RECORD,,
32
+ avoca-0.11.0.dist-info/METADATA,sha256=sYa3FgsjU0xh8NOCmHVr78aVcMCu-hOvGi-b6-H4c00,1570
33
+ avoca-0.11.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ avoca-0.11.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
35
+ avoca-0.11.0.dist-info/RECORD,,
File without changes