avoca 0.11.3__py3-none-any.whl → 0.11.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avoca/bindings/ebas.py CHANGED
@@ -31,6 +31,18 @@ ebas_compname_of_var = {
31
31
  ebas_compname_to_var = {v: k for k, v in ebas_compname_of_var.items()}
32
32
 
33
33
 
34
+ # Additional variables that can be in the dataset (not compound dependant)
35
+ additional_vars = [
36
+ "temperature",
37
+ "pressure",
38
+ ]
39
+
40
+ titles = {
41
+ "temperature": "T_inlet",
42
+ "pressure": "P_inlet",
43
+ }
44
+
45
+
34
46
  class DataLevel(IntEnum):
35
47
  """Values for different type of data used by ebas."""
36
48
 
@@ -57,6 +69,7 @@ def set_dataframe(
57
69
  start_offset: timedelta | None = None,
58
70
  end_offset: timedelta | None = None,
59
71
  flag_all: list[int] = [],
72
+ invalidate_conc_calib: bool = True,
60
73
  ):
61
74
  """Put the data from the export dataframe into the nas object.
62
75
 
@@ -68,7 +81,10 @@ def set_dataframe(
68
81
  :arg data_level: The level of the data to export.
69
82
  :arg start_offset: The offset to add to the start time
70
83
  :arg end_offset: The offset to add to the end time
71
-
84
+ :arg flag_all: List of flags to add to all the data
85
+ :arg invalidate_conc_calib: If True, the concentration calibration
86
+ will be invalidated (flag 980) for all calib samples.
87
+ :returns: A dictionary with the metadata of the compounds exported.
72
88
  """
73
89
 
74
90
  if ("-", "start_datetime") not in df_export.columns:
@@ -104,6 +120,8 @@ def set_dataframe(
104
120
  "rt": "s",
105
121
  "w": "s",
106
122
  "area": "area_unit",
123
+ "temperature": "K",
124
+ "pressure": "hPa",
107
125
  }
108
126
 
109
127
  ebas_varname_of_var = {
@@ -119,6 +137,7 @@ def set_dataframe(
119
137
 
120
138
  # Export calibration status if given by the user
121
139
  status_col = ("-", "status")
140
+ empty_flags = [[]] * len(df_export)
122
141
  if (status_col in df_export.columns) and (data_level not in concs_data_levels):
123
142
  metadata = DataObject()
124
143
  metadata.comp_name = "status"
@@ -126,13 +145,38 @@ def set_dataframe(
126
145
  metadata.matrix = "instrument"
127
146
  metadata.unit = "no unit"
128
147
  values = [val for val in df_export[status_col]]
129
- flags = [[] for _ in df_export[status_col]]
130
148
  nas.variables.append(
131
- DataObject(values_=values, flags=flags, flagcol=True, metadata=metadata)
149
+ DataObject(
150
+ values_=values, flags=empty_flags, flagcol=True, metadata=metadata
151
+ )
132
152
  )
133
153
 
134
- if data_level in concs_data_levels:
154
+ for var in additional_vars:
155
+ var_col = ("-", var)
156
+ if var_col not in df_export.columns:
157
+ continue
158
+ metadata = DataObject()
159
+ metadata.comp_name = var
160
+ metadata.title = titles.get(var, var)
161
+ metadata.matrix = "instrument"
162
+ metadata.unit = unit_of_var[var]
163
+ metadata.cal_scale = ""
164
+ values = [val for val in df_export[var_col]]
165
+ nas.variables.append(
166
+ DataObject(
167
+ values_=values,
168
+ flags=empty_flags,
169
+ flagcol=True,
170
+ metadata=metadata,
171
+ )
172
+ )
173
+
174
+
175
+ this_nan_flags = nan_flags.copy()
176
+
177
+ if data_level in concs_data_levels and invalidate_conc_calib:
135
178
  # Set the flag to the invalid value instead of the valid calibration
179
+ this_nan_flags.append(QA_Flag.CALIBRATION)
136
180
  dict_flags_to_ebas[QA_Flag.CALIBRATION] = 980
137
181
 
138
182
  for sub in compounds:
@@ -144,10 +188,7 @@ def set_dataframe(
144
188
  )
145
189
  for flag in flag_col
146
190
  ]
147
- nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in nan_flags])
148
- if data_level == DataLevel.CONCS:
149
- # Invalidate also the calibration runs
150
- nan_flag |= flag_col & QA_Flag.CALIBRATION.value
191
+ nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in this_nan_flags])
151
192
 
152
193
  for var in vars_to_export[data_level]:
153
194
  ebas_name = compounds[sub]
@@ -273,6 +314,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
273
314
  clean_for_df[("-", "status")] = calib_ids.astype(int)
274
315
  continue
275
316
 
317
+ if comp_name in additional_vars:
318
+ clean_for_df[("-", comp_name)] = np.array(values, dtype=float)
319
+ continue
320
+
276
321
  # Split the title on the _
277
322
  comp_name = comp_name.split("_")
278
323
  if len(comp_name) == 1:
@@ -288,6 +333,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
288
333
  elif len(comp_name) == 3:
289
334
  compund, var_first, var_second = comp_name
290
335
  variable = f"{var_first}_{var_second}"
336
+ elif len(comp_name) == 4 and comp_name[-1] == "compounds":
337
+ # Concentration of merged compounds
338
+ compund = "_".join(comp_name)
339
+ variable = "C"
291
340
  else:
292
341
  logger.warning(f"passing {comp_name}, could not be understood. Skipping.")
293
342
  continue
@@ -4,13 +4,17 @@ from __future__ import annotations
4
4
 
5
5
  import logging
6
6
  from abc import ABC, abstractmethod
7
- from typing import Sequence
7
+ from typing import TYPE_CHECKING, Sequence
8
8
 
9
9
  import pandas as pd
10
10
 
11
11
  from avoca.flags import QA_Flag
12
12
  from avoca.requirements import PythonPackageRequirement
13
13
 
14
+ if TYPE_CHECKING:
15
+ from matplotlib.axes import Axes
16
+ from matplotlib.figure import Figure
17
+
14
18
 
15
19
  class AbstractQA_Assigner(ABC):
16
20
  """Abstract class for QA assigners.
@@ -74,10 +78,11 @@ class AbstractQA_Assigner(ABC):
74
78
  stopp: pd.Timestamp = pd.Timestamp.max,
75
79
  name: str | None = None,
76
80
  runtypes: list[str] = None,
81
+ log_level: int = logging.INFO,
77
82
  ):
78
83
  """Create a new QA assigner."""
79
84
  self.logger = logging.getLogger(type(self).__name__)
80
- self.logger.setLevel(logging.DEBUG)
85
+ self.logger.setLevel(log_level)
81
86
 
82
87
  self.name = name or type(self).__name__
83
88
 
@@ -161,6 +166,6 @@ class AbstractQA_Assigner(ABC):
161
166
  raise NotImplementedError
162
167
 
163
168
  # Optional method
164
- def plot(self):
169
+ def plot(self) -> tuple[Figure, Sequence[Axes]]:
165
170
  """Plot the QA assigner."""
166
171
  raise NotImplementedError(f"{type(self).__name__} does not have a plot method.")
avoca/qa_class/rt.py CHANGED
@@ -17,15 +17,38 @@ class RetentionTimeChecker(AbstractQA_Assigner):
17
17
  retention times of the measurements.
18
18
  The correlation is usually very high. If one compound has a low correlation
19
19
  with the others, it probably means that is was miss-assigned at some points.
20
+
21
+ :param rt_threshold: The threshold for the retention time deviation.
22
+ Unit is time unit (minutes or seconds, as in the data).
23
+ This will try to fit a linear regression from the average training
24
+ retention times to the measured ones for each sample.
25
+ If after the regression a datapoint is higher than this threshold,
26
+ it will be removed.
27
+ :param rt_relative_max_deviation: The maximum relative deviation allowed
28
+ from the average retention time.
29
+ This is used to remove outliers that are too far from the average.
30
+ if 0.5 is given, it means that the retention time can be 50% higher or lower
31
+ than the average retention time.
20
32
  """
21
33
 
22
34
  runtypes: list[str] = ["air", "std"]
35
+ variable: str = "rt"
23
36
  flag = QA_Flag.SUSPICIOUS_RT
24
37
 
25
- RT_THRESHOLD: float = 2.0
26
-
27
38
  rt_ref: pd.Series
28
39
 
40
+ def __init__(
41
+ self,
42
+ rt_threshold: float = 2.0,
43
+ rt_relative_max_deviation: float = 0.2,
44
+ poly_order: int = 1,
45
+ **kwargs,
46
+ ):
47
+ super().__init__(**kwargs)
48
+ self.rt_threshold = rt_threshold
49
+ self.rt_relative_max_deviation = rt_relative_max_deviation
50
+ self.poly_order = poly_order
51
+
29
52
  def fit(self, df: pd.DataFrame):
30
53
  cols = [(compound, "rt") for compound in self.compounds]
31
54
 
@@ -49,6 +72,7 @@ class RetentionTimeChecker(AbstractQA_Assigner):
49
72
 
50
73
  # Get a dataframe for a mean reference
51
74
  self.rt_ref = df_rt.median(axis="index")
75
+ self.rt_std = df_rt.std(axis="index")
52
76
 
53
77
  def assign(self, df: pd.DataFrame) -> dict[str, pd.Index]:
54
78
  """Assing flags when expected rt values does not match the measured ones."""
@@ -58,27 +82,29 @@ class RetentionTimeChecker(AbstractQA_Assigner):
58
82
  df_rt = df[rt_cols]
59
83
  # Take the reference retention times
60
84
  x = self.rt_ref.loc[rt_cols].to_numpy()
85
+ std = self.rt_std.loc[rt_cols].to_numpy()
61
86
 
62
87
  outliers = {}
63
88
 
64
89
  for t, row in df_rt.iterrows():
65
90
  # Make a lin reg line
66
91
  y = row.to_numpy()
67
- mask_not_nan = ~np.isnan(y)
68
- if np.sum(mask_not_nan) < 3:
69
- self.logger.warning(
70
- f"{self} skipping {t} because there are not enough compounds"
71
- " measured"
72
- )
73
- continue
92
+ # Remove the points that are too far from the reference
93
+ mask_bad = (
94
+ (np.abs(y - x) / x) > self.rt_relative_max_deviation
95
+ ) | np.isnan(y)
74
96
 
75
- params = np.polyfit(x[mask_not_nan], y[mask_not_nan], 1)
76
- f = np.poly1d(params)
77
- y_lin_reg = f(x)
97
+ if np.sum(~mask_bad) > self.poly_order + 2:
78
98
 
79
- # Get the points which are too far from the reg line
80
- mask_bad = np.abs(y - y_lin_reg) > self.RT_THRESHOLD
81
- if np.any(mask_bad):
99
+ params = np.polyfit(x[~mask_bad], y[~mask_bad], self.poly_order)
100
+ f = np.poly1d(params)
101
+ y_lin_reg = f(x)
102
+
103
+ # Get the points which are too far from the reg line
104
+ error = y - y_lin_reg
105
+ mask_bad |= np.abs(error) > self.rt_threshold
106
+
107
+ if any(mask_bad):
82
108
  outliers[t] = mask_bad
83
109
 
84
110
  # Create a dataframe with the flags
@@ -95,12 +121,12 @@ class RetentionTimeChecker(AbstractQA_Assigner):
95
121
 
96
122
  import matplotlib.pyplot as plt
97
123
 
98
- fig, ax = plt.subplots()
124
+ fig, ax = plt.subplots(figsize=(16, 9))
99
125
 
100
126
  assigned = self.assign(self.df_train)
101
127
 
102
128
  for compound in self.compounds:
103
- ax.scatter(
129
+ points = ax.scatter(
104
130
  self.df_train.index,
105
131
  self.df_train[(compound, "rt")],
106
132
  label=compound,
@@ -115,6 +141,15 @@ class RetentionTimeChecker(AbstractQA_Assigner):
115
141
  color="red",
116
142
  marker="x",
117
143
  )
144
+ # Line for the mean retention time
145
+ ax.axhline(
146
+ self.rt_ref[(compound, "rt")],
147
+ color=points.get_facecolor()[0],
148
+ linestyle="--",
149
+ )
150
+
151
+ ax.set_ylabel("Retention time")
152
+ ax.set_xlabel("Sample")
118
153
 
119
154
  ax.legend()
120
155
  plt.show()
avoca/qa_class/zscore.py CHANGED
@@ -128,7 +128,7 @@ class ExtremeValues(AbstractQA_Assigner):
128
128
  )
129
129
 
130
130
  x = self.dt if hasattr(self, "dt") else self.df_train.index
131
- x = pd.Series(x, index=x)
131
+ x = pd.Series(x, index=self.df_train.index)
132
132
 
133
133
  outliers = self.assign(self.df_train)
134
134
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avoca
3
- Version: 0.11.3
3
+ Version: 0.11.4
4
4
  Summary: @voc@: Quality assessement of measurement data
5
5
  Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
6
6
  Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
@@ -8,7 +8,7 @@ avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
8
8
  avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
9
9
  avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
10
10
  avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- avoca/bindings/ebas.py,sha256=s9274kwymZs0EO-2UMEUHV1iLgbWv7YR_r2e-O5m0SI,17286
11
+ avoca/bindings/ebas.py,sha256=48cYR-jwc3GMZCVhYYbMVUj1RgFAoQNtQC2kOpA1iAA,18827
12
12
  avoca/bindings/ebas_flags.py,sha256=uzPrd45OoULycCRYWCwHQG1exUDoWSe8JmULOAsEHRs,2537
13
13
  avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
14
14
  avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
@@ -17,19 +17,19 @@ avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
17
17
  avoca/bindings/qa_tool.py,sha256=ZPtQo8dHo6wooIlc9Vzk8y91Qgso-RBtGR_h2TAZQ24,7583
18
18
  avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
19
19
  avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- avoca/qa_class/abstract.py,sha256=4s8GgkeC3WbMnNxygajhawh7TU9v13i-SP0j5b5YOMc,5432
20
+ avoca/qa_class/abstract.py,sha256=KCK9OhKNWlMje-5D0hgMIf-g64D_kRwRsoCZ_R4VuqI,5612
21
21
  avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
22
22
  avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
23
23
  avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
24
- avoca/qa_class/rt.py,sha256=t927H_o0Kn-VwEkG9TW33MSDW_2of2-pBX_gTMuaXA0,3817
24
+ avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
25
25
  avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
26
- avoca/qa_class/zscore.py,sha256=HqOxV45smhXqcv2XrB7W7plE9RoHzBGVEAbmuwsiv7w,16696
26
+ avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
27
27
  avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
28
28
  avoca/testing/df.py,sha256=Nc0GUYTApZgYyUTMnHMTbSKLiA5ty9Bg7gUGtnoFYMI,1826
29
29
  avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
30
30
  avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
31
31
  avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
32
- avoca-0.11.3.dist-info/METADATA,sha256=NsJF7yn7mtQVgkOnDjo-ARqRa3hzvauGhp9BJ-kmTAk,1570
33
- avoca-0.11.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
- avoca-0.11.3.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
35
- avoca-0.11.3.dist-info/RECORD,,
32
+ avoca-0.11.4.dist-info/METADATA,sha256=tx6uIcmzGJU-Gf1RKfxW1crR6hhYc5AwDBkVks4iuHU,1570
33
+ avoca-0.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
+ avoca-0.11.4.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
35
+ avoca-0.11.4.dist-info/RECORD,,
File without changes