avoca 0.15.1__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avoca/bindings/ebas.py CHANGED
@@ -19,6 +19,7 @@ from nilutility.datetime_helper import DatetimeInterval
19
19
 
20
20
  from avoca.bindings.ebas_flags import ebas_flag_to_avoca, flags_to_ebas, nan_flags
21
21
  from avoca.flags import QA_Flag
22
+ from avoca.utils import compounds_from_df
22
23
 
23
24
  logger = logging.getLogger(__name__)
24
25
 
@@ -31,15 +32,29 @@ ebas_compname_of_var = {
31
32
  ebas_compname_to_var = {v: k for k, v in ebas_compname_of_var.items()}
32
33
 
33
34
 
34
- # Additional variables that can be in the dataset (not compound dependant)
35
- additional_vars = [
36
- "temperature",
37
- "pressure",
38
- ]
39
-
40
35
  titles = {
41
36
  "temperature": "T_inlet",
42
37
  "pressure": "P_inlet",
38
+ "volume_sample": "svol",
39
+ }
40
+
41
+ unit_of_var = {
42
+ "C": "pmol/mol",
43
+ "conc_calib": "pmol/mol",
44
+ "rt": "s",
45
+ "w": "s",
46
+ "area": "area_unit",
47
+ "temperature": "K",
48
+ "pressure": "hPa",
49
+ "volume_sample": "mL",
50
+ }
51
+
52
+ ebas_varname_of_var = {
53
+ "rt": "rt",
54
+ "w": "pw",
55
+ "area": "pa",
56
+ "conc_calib": "cal",
57
+ "volume_sample": "sample_volume",
43
58
  }
44
59
 
45
60
 
@@ -51,6 +66,17 @@ class DataLevel(IntEnum):
51
66
  QA_CONCS = 2
52
67
 
53
68
 
69
+ vars_to_export = {
70
+ DataLevel.AREAS: ["area", "rt", "w", "conc_calib"],
71
+ DataLevel.CONCS: ["C"],
72
+ DataLevel.QA_CONCS: ["C"],
73
+ }
74
+
75
+ # Additional variables that can be in the dataset (not compound dependant)
76
+ additional_vars = {
77
+ DataLevel.AREAS: ["volume_sample", "temperature", "pressure"],
78
+ }
79
+ valid_additional_vars = sum(additional_vars.values(), [])
54
80
  concs_data_levels = [DataLevel.CONCS, DataLevel.QA_CONCS]
55
81
 
56
82
 
@@ -64,8 +90,8 @@ def data_level_after_qa(data_level: DataLevel) -> DataLevel:
64
90
  def set_dataframe(
65
91
  nas,
66
92
  df_export: pd.DataFrame,
67
- compounds: dict[str, str],
68
- data_level: DataLevel,
93
+ compounds: dict[str, str] | None = None,
94
+ data_level: DataLevel = DataLevel.CONCS,
69
95
  start_offset: timedelta | None = None,
70
96
  end_offset: timedelta | None = None,
71
97
  flag_all: list[int] = [],
@@ -108,28 +134,8 @@ def set_dataframe(
108
134
  )
109
135
  ]
110
136
 
111
- vars_to_export = {
112
- DataLevel.AREAS: ["area", "rt", "w", "conc_calib"],
113
- DataLevel.CONCS: ["C"],
114
- DataLevel.QA_CONCS: ["C"],
115
- }
116
-
117
- unit_of_var = {
118
- "C": "pmol/mol",
119
- "conc_calib": "pmol/mol",
120
- "rt": "s",
121
- "w": "s",
122
- "area": "area_unit",
123
- "temperature": "K",
124
- "pressure": "hPa",
125
- }
126
-
127
- ebas_varname_of_var = {
128
- "rt": "rt",
129
- "w": "pw",
130
- "area": "pa",
131
- "conc_calib": "cal",
132
- }
137
+ if compounds is None:
138
+ compounds = {c: c for c in compounds_from_df(df_export)}
133
139
 
134
140
  dict_flags_to_ebas = flags_to_ebas.copy()
135
141
 
@@ -151,12 +157,12 @@ def set_dataframe(
151
157
  )
152
158
  )
153
159
 
154
- for var in additional_vars:
160
+ for var in additional_vars.get(data_level, []):
155
161
  var_col = ("-", var)
156
162
  if var_col not in df_export.columns:
157
163
  continue
158
164
  metadata = DataObject()
159
- metadata.comp_name = var
165
+ metadata.comp_name = ebas_varname_of_var.get(var, var)
160
166
  metadata.title = titles.get(var, var)
161
167
  metadata.matrix = "instrument"
162
168
  metadata.unit = unit_of_var[var]
@@ -233,9 +239,9 @@ def set_dataframe(
233
239
  if var == "conc_calib":
234
240
  # Set Nominal/measured=Calibration gas concentration
235
241
  vnum = len(nas.variables) - 1
236
- nas.add_var_characteristics(
237
- vnum, "Nominal/measured", "Calibration gas concentration"
238
- )
242
+ # nas.add_var_characteristics(
243
+ # vnum, "Nominal/measured", "Calibration gas concentration"
244
+ # )
239
245
 
240
246
  metadatas[sub] = metadata
241
247
  return metadatas
@@ -327,7 +333,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
327
333
  clean_for_df[("-", "status")] = calib_ids.astype(int)
328
334
  continue
329
335
 
330
- if comp_name in additional_vars:
336
+ if comp_name == "sample_volume":
337
+ comp_name = "volume_sample"
338
+
339
+ if comp_name in valid_additional_vars:
331
340
  clean_for_df[("-", comp_name)] = np.array(values, dtype=float)
332
341
  continue
333
342
 
@@ -335,27 +344,27 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
335
344
  comp_name = comp_name.split("_")
336
345
  if len(comp_name) == 1:
337
346
  # Can be either concentration measured or calibration
338
- compund = comp_name[0]
347
+ compound = comp_name[0]
339
348
  title: str = metadata["title"]
340
349
  if title.endswith("_cal"):
341
350
  variable = "cal"
342
351
  else:
343
352
  variable = "C"
344
353
  elif len(comp_name) == 2:
345
- compund, variable = comp_name
354
+ compound, variable = comp_name
346
355
  elif len(comp_name) == 3:
347
- compund, var_first, var_second = comp_name
356
+ compound, var_first, var_second = comp_name
348
357
  variable = f"{var_first}_{var_second}"
349
358
  elif len(comp_name) == 4 and comp_name[-1] == "compounds":
350
359
  # Concentration of merged compounds
351
- compund = "_".join(comp_name)
360
+ compound = "_".join(comp_name)
352
361
  variable = "C"
353
362
  else:
354
363
  logger.warning(f"passing {comp_name}, could not be understood. Skipping.")
355
364
  continue
356
365
 
357
- if compund not in compounds:
358
- compounds.append(compund)
366
+ if compound not in compounds:
367
+ compounds.append(compound)
359
368
 
360
369
  # Convert the variable name to the avoca format
361
370
  if variable == "cal":
@@ -363,10 +372,12 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
363
372
  variable = "conc_calib"
364
373
  elif variable != "C":
365
374
  if variable not in ebas_compname_to_var:
366
- raise ValueError(f"Variable {variable} not recognized")
375
+ raise ValueError(
376
+ f"Variable {variable} from {comp_name=} not recognized"
377
+ )
367
378
  variable = ebas_compname_to_var[variable]
368
379
 
369
- clean_for_df[(compund, variable)] = np.array(values, dtype=float)
380
+ clean_for_df[(compound, variable)] = np.array(values, dtype=float)
370
381
 
371
382
  flag_serie = pd.Series(
372
383
  [
@@ -375,7 +386,7 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
375
386
  ],
376
387
  dtype=int,
377
388
  )
378
- flag_col = (compund, "flag")
389
+ flag_col = (compound, "flag")
379
390
  if variable == "conc_calib":
380
391
  # Calibration will have missing values for air smaples
381
392
  # so we need to remove the missing values
@@ -9,6 +9,12 @@ flags_to_ebas: dict[QA_Flag, int] = {
9
9
  QA_Flag.EXTREME_VALUE: 458,
10
10
  QA_Flag.CALIBRATION: 683, # I Invalid due to calibration. Used for Level 0.
11
11
  QA_Flag.BLANK: 684, # Invalid due to zero/span check. Used for Level 0.
12
+ # Invalid due to laboratory standard measurement. Level 0.
13
+ QA_Flag.LABORATORY_STANDARD: 688,
14
+ # Invalid due to working standard measurement. Level 0.
15
+ QA_Flag.WORKING_STANDARD: 689,
16
+ # Invalid due to target standard measurement. Level 0.
17
+ QA_Flag.TARGET_MEASUREMENT: 690,
12
18
  QA_Flag.HEIGHT_INTEGRATION: 0, # Valid
13
19
  QA_Flag.UNCORRELATED: 0, # Valid
14
20
  QA_Flag.MET_OFFICE_BASELINE: 0, # Valid
avoca/bindings/gcwerks.py CHANGED
@@ -90,6 +90,8 @@ flag_values = {
90
90
  "F": QA_Flag.INVALIDATED_EXT,
91
91
  # X: An X flag is an 'un-do' the flag. If there is an automatic flag by GCWerks, but I decide I want that data point still included, I have the option to set an X flag.
92
92
  "X": ValidFlag,
93
+ # Nans read from pandas
94
+ pd.NA: QA_Flag.MISSING,
93
95
  }
94
96
 
95
97
  # Show the flags and the columns they are applied to
@@ -203,6 +205,11 @@ def read_gcwerks(
203
205
  format=datetime_format,
204
206
  )
205
207
 
208
+ if not df[("-", "volume")].isna().all():
209
+ df[("-", "volume_sample")] = df[("-", "volume")]
210
+ # Drop useless columns
211
+ df = df.drop(columns=[("-", "date"), ("-", "time"), ("-", "volume")])
212
+
206
213
  substances = []
207
214
 
208
215
  for col in df.columns:
@@ -224,9 +231,9 @@ def read_gcwerks(
224
231
  flags: pd.Series = serie_str.str[-1]
225
232
  if col[1] in cols_float:
226
233
  # Remove the flag value when given
227
- serie_str = serie_str.apply(
228
- lambda x: x[:-1] if x[-1] in flags_allowed else x
229
- )
234
+ mask_flag_allowed = flags.isin(flags_allowed)
235
+ serie_str = serie_str.where(~mask_flag_allowed, serie_str.str[:-1])
236
+
230
237
  # Convert the serie to numeric
231
238
  df[col] = pd.to_numeric(serie_str, errors="coerce")
232
239
 
@@ -359,6 +366,7 @@ def export(
359
366
  "time",
360
367
  "type",
361
368
  "sample",
369
+ "volume",
362
370
  f"{variables_str}",
363
371
  f"> {out_file}",
364
372
  )
avoca/flags.py CHANGED
@@ -46,6 +46,14 @@ class QA_Flag(Flag):
46
46
  # Invalid Values
47
47
  INVALID_VALUES = auto()
48
48
 
49
+ # Target measurement
50
+ TARGET_MEASUREMENT = auto()
51
+ # Laboratory standard
52
+ LABORATORY_STANDARD = auto()
53
+ # Working standard
54
+ WORKING_STANDARD = auto()
55
+
56
+
49
57
  # Flags that are considered to have missing values
50
58
  nan_flags = [
51
59
  QA_Flag.MISSING,
avoca/io.py CHANGED
@@ -8,6 +8,11 @@ date_format = "%Y-%m-%d %H:%M:%S"
8
8
 
9
9
  def to_csv(df: pd.DataFrame, path: Path, **kwargs) -> None:
10
10
  """Export a dataframe to a csv file."""
11
+
12
+ # Put the columsn with "-" first
13
+ cols = df.columns.tolist()
14
+ cols_sorted = sorted(cols, key=lambda x: (x[0] != "-", x))
15
+ df = df[cols_sorted]
11
16
  df.to_csv(path, index=False, date_format=date_format, **kwargs)
12
17
 
13
18
 
avoca/testing/utils.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
3
 
4
4
  def make_dt_index(df: pd.DataFrame | pd.Index) -> pd.DataFrame | pd.Index:
5
5
  """Create a datetime index for the dataframe."""
6
- index = pd.date_range(start="2023-01-01", periods=len(df), freq="h")
6
+ index = pd.date_range(start="2023-01-01", periods=len(df), freq="h", unit="s")
7
7
  if isinstance(df, pd.Index):
8
8
  return index
9
9
  return df.set_index(index)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avoca
3
- Version: 0.15.1
3
+ Version: 0.17.0
4
4
  Summary: @voc@: Quality assessement of measurement data
5
5
  Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
6
6
  Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
@@ -1,18 +1,18 @@
1
1
  avoca/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  avoca/export_nas.py,sha256=B9B2iFSzB3f83nCfe2_vzouRblthK0_dGF8W3o0Kt5Y,155
3
3
  avoca/flagging.py,sha256=tg6k_TVHRXiMJCAij_kUS-S2gSshYt7FKvQ0nJdljYs,2328
4
- avoca/flags.py,sha256=wobuZoIJh6dFsdiqqYJLZ_AHe4pcFE9tjuoimNXLjIQ,1428
5
- avoca/io.py,sha256=67D5x1qkLqWC7wWehyOfX96L4H3-tn9x2V4jMCoIRqA,729
4
+ avoca/flags.py,sha256=9LF-e8bcUdBQmxtoXU3ysx7KRzZiU6bU6nhDLhZtowU,1599
5
+ avoca/io.py,sha256=VMGqSPdtPM5Xu4kugMbr6TaMS9-U6pnObMu2ERKhNxE,891
6
6
  avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
7
7
  avoca/manager.py,sha256=Faf3UyaCV58TMCZz6tWrLcY-W1WUtuh1aMP85yUVlmQ,5336
8
8
  avoca/plots.py,sha256=zzoOJystasrKF2ikJLqcT8mlc-f-tu57vksXs-xRXv8,4424
9
9
  avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
10
10
  avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
11
11
  avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- avoca/bindings/ebas.py,sha256=vil4u4G6jGJrE12Z7nBvGpJuTAT9QyvbNNyWsWr5UaM,19306
13
- avoca/bindings/ebas_flags.py,sha256=N-JpmA6WCFjcYhvt7XjyOZMbR7vCdyPV6uHBlF45UJU,2397
12
+ avoca/bindings/ebas.py,sha256=3oWgN3teyEvQ9acpD2767A18IbgxjtbOdmXD79PLVrE,19793
13
+ avoca/bindings/ebas_flags.py,sha256=TEkmOI9Bia0C2KFO5GqLFXNIvcLak5yedlBFCKY5Gqg,2695
14
14
  avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
15
- avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
15
+ avoca/bindings/gcwerks.py,sha256=2Keff174EUjRwbBRDNKwGFEMr6TxJ2mpsjIN71DjUsQ,15188
16
16
  avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
17
17
  avoca/bindings/nabel.py,sha256=6OzaG1imFhOCVDQTO7YXvPQjbTfo4063w74yEuAVCEk,2991
18
18
  avoca/bindings/qa_tool.py,sha256=hqsWUU99mYpkKfeULBoox4M2x7Bk0aYO4Q_8WGvt2og,11628
@@ -28,11 +28,11 @@ avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
28
28
  avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
29
29
  avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
30
30
  avoca/testing/df.py,sha256=UQm6TdTDVRWvRNM5WnSWh6vdvDR1lqLNg0ti-B1L760,1865
31
- avoca/testing/utils.py,sha256=jVV0mIwLIpr0UBLMk8RjZH5J_dV_b6Gugxzo_WRgWU0,308
31
+ avoca/testing/utils.py,sha256=w0i-x3xG40JMxhoV_odVV7995VMKpUsHRYVJk2XYJ7I,318
32
32
  avoca/utils/__init__.py,sha256=SZc1bHrQyg1DIYnbdUmANtUhnQWlJaMhPrDSWS8oVRY,1408
33
33
  avoca/utils/flags_doc.py,sha256=jT1E0GN-B8ws_FyKGE20nlrKrgTHtoyjdo2r8RgYhwU,4294
34
34
  avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
35
- avoca-0.15.1.dist-info/METADATA,sha256=_KHPT1eKCbb2WV9ujy1-wLEDVhFuQKqUHIHm_ylHh4w,1570
36
- avoca-0.15.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
37
- avoca-0.15.1.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
38
- avoca-0.15.1.dist-info/RECORD,,
35
+ avoca-0.17.0.dist-info/METADATA,sha256=1V1osu38cBEsVtCqP4wIbe37PT4LlKv56jqJZk1OI38,1570
36
+ avoca-0.17.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
37
+ avoca-0.17.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
38
+ avoca-0.17.0.dist-info/RECORD,,
File without changes