avoca 0.12.0__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avoca/bindings/ebas.py CHANGED
@@ -171,7 +171,6 @@ def set_dataframe(
171
171
  )
172
172
  )
173
173
 
174
-
175
174
  this_nan_flags = nan_flags.copy()
176
175
 
177
176
  if data_level in concs_data_levels and invalidate_conc_calib:
@@ -188,7 +187,9 @@ def set_dataframe(
188
187
  )
189
188
  for flag in flag_col
190
189
  ]
191
- nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in this_nan_flags])
190
+ nan_flag = np.logical_or.reduce(
191
+ [flag_col & flag.value for flag in this_nan_flags]
192
+ )
192
193
 
193
194
  for var in vars_to_export[data_level]:
194
195
  ebas_name = compounds[sub]
@@ -199,6 +200,16 @@ def set_dataframe(
199
200
  for val, isnan in zip(serie_to_export, nan_flag)
200
201
  ]
201
202
 
203
+ if var == "conc_calib":
204
+ # Invalidate calibration concentration for non-calibration samples
205
+ this_flags = [
206
+ flags_ebas
207
+ + ([] if (QA_Flag.CALIBRATION.value & flag_avoca) else [980])
208
+ for flags_ebas, flag_avoca in zip(flags, flag_col)
209
+ ]
210
+ else:
211
+ this_flags = flags
212
+
202
213
  metadata = DataObject()
203
214
  metadata.comp_name = (
204
215
  f"{ebas_name}_{ebas_compname_of_var[var]}"
@@ -214,7 +225,9 @@ def set_dataframe(
214
225
  metadata.matrix = "air"
215
226
  # add the variable
216
227
  nas.variables.append(
217
- DataObject(values_=values, flags=flags, flagcol=True, metadata=metadata)
228
+ DataObject(
229
+ values_=values, flags=this_flags, flagcol=True, metadata=metadata
230
+ )
218
231
  )
219
232
 
220
233
  if var == "conc_calib":
@@ -1,4 +1,4 @@
1
- # https://projects.nilu.no/ccc/flags/flags.html for more info on what ebas uses
1
+ # https://ebas-submit.nilu.no/templates/comments/fl_flag for more info on what ebas uses
2
2
  from avoca.flags import QA_Flag, nan_flags
3
3
 
4
4
  flags_to_ebas: dict[QA_Flag, int] = {
avoca/plots.py CHANGED
@@ -69,6 +69,8 @@ def plot_yearly_plotly(
69
69
  df: pd.DataFrame,
70
70
  compound: str,
71
71
  df_new: pd.DataFrame | None = None,
72
+ opacity: float = 0.5,
73
+ size: int = 6,
72
74
  ) -> "plotly.graph_objs._figure.Figure":
73
75
  """Plot yearly data using plotly."""
74
76
  import plotly.express as px
@@ -97,7 +99,28 @@ def plot_yearly_plotly(
97
99
  df_to_plot = df_to_plot.pivot_table(
98
100
  index=df_to_plot.index, columns="year", values="conc"
99
101
  )
100
- fig = px.scatter(df_to_plot)
102
+ fig = go.Figure()
103
+
104
+ hover_template = "Timestamp: %{text}<br>Conc: %{y:.2f} ppt"
105
+
106
+ kwargs = {
107
+ "mode": "markers",
108
+ "opacity": opacity,
109
+ "marker": dict(size=size),
110
+ "hovertemplate": hover_template,
111
+ }
112
+
113
+ for year in df_to_plot.columns:
114
+ fig.add_trace(
115
+ go.Scatter(
116
+ x=df_to_plot.index,
117
+ y=df_to_plot[year],
118
+ name=str(year),
119
+ zorder=-year,
120
+ text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
121
+ **kwargs,
122
+ )
123
+ )
101
124
  x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
102
125
 
103
126
  dt_new = df_new[dt_column]
@@ -105,8 +128,9 @@ def plot_yearly_plotly(
105
128
  go.Scatter(
106
129
  x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
107
130
  y=df_new[(compound, "conc")],
108
- mode="markers",
109
131
  name="New Data",
132
+ text=dt_new.dt.strftime("%y%m%d.%H%M"),
133
+ **kwargs,
110
134
  )
111
135
  )
112
136
  fig.update_layout(
@@ -49,6 +49,7 @@ class AbstractQA_Assigner(ABC):
49
49
  flag: QA_Flag
50
50
  runtypes: list[str] | None
51
51
  required_packages: list[PythonPackageRequirement] | None = None
52
+ require_datetime_index: bool = False
52
53
 
53
54
  # Options that can be set by the user
54
55
  name: str
@@ -142,6 +143,14 @@ class AbstractQA_Assigner(ABC):
142
143
  f"Please check the data and the settings for {self.name}"
143
144
  )
144
145
 
146
+ if self.require_datetime_index:
147
+ if not isinstance(df.index, pd.DatetimeIndex):
148
+ raise ValueError(
149
+ f"Assigner {self} requires a DatetimeIndex but the dataframe"
150
+ " does not have one. \n "
151
+ f"Please check the data and the settings for {self.name}"
152
+ )
153
+
145
154
  @abstractmethod
146
155
  def fit(self, df: pd.DataFrame):
147
156
  """Fit the QA assigner on some data.
@@ -0,0 +1,133 @@
1
+ """Quality assurance based on statistical methods."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import timedelta
6
+ from typing import TYPE_CHECKING
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+
11
+ from avoca.qa_class.zscore import ExtremeValues
12
+
13
+ if TYPE_CHECKING:
14
+
15
+ from avoca.utils.torch_models import MultipleRegressionModel
16
+
17
+
18
+ class RollingWindow(ExtremeValues):
19
+ """Detect in rolling windows.
20
+
21
+ The method is based on outliers in a rolling window using the median and standard deviation.
22
+ The training is done directly on the fitted data.
23
+
24
+ :param variable: The variable to check for extreme values.
25
+ :param threshold: The threshold for the z-score. To flag values.
26
+ :param use_log_normal: If True, the log of the values will be used to calculate the z-score.
27
+ This can be useful if the values are log-normal distributed.
28
+ :param only_greater: If True, only values greater than the threshold will be flagged.
29
+ The values lower than the negative threshold will not be flagged.
30
+ By default, this is True if use_log_normal is True, and False otherwise.
31
+ """
32
+
33
+ require_datetime_index = True
34
+
35
+ rolling_window: timedelta
36
+
37
+ def __init__(
38
+ self,
39
+ *args,
40
+ rolling_window: timedelta = timedelta(days=7),
41
+ threshold: float = 1.5,
42
+ **kwargs,
43
+ ):
44
+ super().__init__(*args, threshold=threshold, **kwargs)
45
+ self.rolling_window = rolling_window
46
+
47
+ def fit(self, df: pd.DataFrame):
48
+
49
+ self.check_columns_or_raise(df, columns=self._stats_columns)
50
+
51
+ self.df_train = df[self._stats_columns]
52
+
53
+ def assign(self, df: pd.DataFrame) -> dict[str, pd.Index]:
54
+ df = df[self._stats_columns]
55
+ df = self._clean_data(df)
56
+ if self.use_log_normal:
57
+ # Replace <=0 with NaN
58
+ df = df.where(df > 0, np.nan)
59
+ df = df.map(lambda x: np.log(x))
60
+
61
+ rolling = df.rolling(window=self.rolling_window)
62
+ means = rolling.median()
63
+ stds = rolling.std()
64
+
65
+ self.rolling_median = means
66
+ self.rolling_std = stds
67
+
68
+ thresholds = means + stds * self.threshold
69
+
70
+ df_fail = df > thresholds
71
+ if not self.only_greater:
72
+ df_fail = df_fail | (df < (means - stds * self.threshold))
73
+
74
+ out_dict = {}
75
+ for compound in self.compounds:
76
+ col = (compound, self.variable)
77
+ this_c_fail = df_fail[col]
78
+ out_dict[compound] = this_c_fail.loc[this_c_fail].index
79
+
80
+ return out_dict
81
+
82
+ def plot(self):
83
+
84
+ import matplotlib.pyplot as plt
85
+
86
+ fig, axes = plt.subplots(
87
+ len(self.compounds), 1, figsize=(6, 3 * len(self.compounds)), sharex=True
88
+ )
89
+
90
+ x = self.dt if hasattr(self, "dt") else self.df_train.index
91
+ x = pd.Series(x, index=self.df_train.index)
92
+
93
+ outliers = self.assign(self.df_train)
94
+
95
+ for i, compound in enumerate(self.compounds):
96
+ ax = axes[i]
97
+ col = (compound, self.variable)
98
+ ax.scatter(
99
+ x,
100
+ self.df_train[col],
101
+ s=1,
102
+ label="darkblue",
103
+ )
104
+ median = self.rolling_median[col]
105
+ std = self.rolling_std[col]
106
+ top, bottom = median + std * self.threshold, median - std * self.threshold
107
+
108
+ ax.fill_between(
109
+ x,
110
+ top,
111
+ bottom,
112
+ color="lightgray",
113
+ label="Rolling threshold",
114
+ alpha=0.5,
115
+ )
116
+
117
+ outlier_indices = outliers[compound]
118
+ ax.scatter(
119
+ x.loc[outlier_indices],
120
+ self.df_train.loc[outlier_indices, col],
121
+ s=10,
122
+ marker="x",
123
+ color="red",
124
+ label="Extreme values",
125
+ )
126
+ ax.set_title(
127
+ f"{compound} +- {self.threshold} std",
128
+ # Under teh top line
129
+ y=0.8,
130
+ )
131
+ ax.tick_params(axis="x", rotation=25)
132
+
133
+ return fig, axes
avoca/testing/df.py CHANGED
@@ -10,6 +10,7 @@ import numpy as np
10
10
  import pandas as pd
11
11
 
12
12
  empty_index = pd.Index([], dtype="int64")
13
+ empty_index_dt = pd.DatetimeIndex([])
13
14
 
14
15
  simple_df = pd.DataFrame(
15
16
  np.ones((2, 4)),
avoca/testing/utils.py ADDED
@@ -0,0 +1,9 @@
1
+ import pandas as pd
2
+
3
+
4
+ def make_dt_index(df: pd.DataFrame | pd.Index) -> pd.DataFrame | pd.Index:
5
+ """Create a datetime index for the dataframe."""
6
+ index = pd.date_range(start="2023-01-01", periods=len(df), freq="h")
7
+ if isinstance(df, pd.Index):
8
+ return index
9
+ return df.set_index(index)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avoca
3
- Version: 0.12.0
3
+ Version: 0.14.0
4
4
  Summary: @voc@: Quality assessement of measurement data
5
5
  Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
6
6
  Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
@@ -5,12 +5,12 @@ avoca/flags.py,sha256=wobuZoIJh6dFsdiqqYJLZ_AHe4pcFE9tjuoimNXLjIQ,1428
5
5
  avoca/io.py,sha256=67D5x1qkLqWC7wWehyOfX96L4H3-tn9x2V4jMCoIRqA,729
6
6
  avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
7
7
  avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
8
- avoca/plots.py,sha256=uEo0rTCwQ0iygTaycYPlbtcqNbJpDQd7xjvis686lD4,3567
8
+ avoca/plots.py,sha256=UjfUgbfxd2veMOGHtSvJycru-w3gWsGjOVO__I-zqzQ,4205
9
9
  avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
10
10
  avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
11
11
  avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- avoca/bindings/ebas.py,sha256=48cYR-jwc3GMZCVhYYbMVUj1RgFAoQNtQC2kOpA1iAA,18827
13
- avoca/bindings/ebas_flags.py,sha256=nts47BB74vDlAbecUatXtjeyL3SALLCt3fRl6BfdOS4,2388
12
+ avoca/bindings/ebas.py,sha256=vil4u4G6jGJrE12Z7nBvGpJuTAT9QyvbNNyWsWr5UaM,19306
13
+ avoca/bindings/ebas_flags.py,sha256=N-JpmA6WCFjcYhvt7XjyOZMbR7vCdyPV6uHBlF45UJU,2397
14
14
  avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
15
15
  avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
16
16
  avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
@@ -18,19 +18,21 @@ avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
18
18
  avoca/bindings/qa_tool.py,sha256=ninHe3mrJ8GULxRCkRTZixw-vmNhqu4zwwONd5aXd1Q,9735
19
19
  avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
20
20
  avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- avoca/qa_class/abstract.py,sha256=KCK9OhKNWlMje-5D0hgMIf-g64D_kRwRsoCZ_R4VuqI,5612
21
+ avoca/qa_class/abstract.py,sha256=CLt-6WFhZhrvKTLVHpdbJYMFM50VPOGiO-GG6IRPWzA,6011
22
22
  avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
23
23
  avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
24
24
  avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
25
+ avoca/qa_class/rolling.py,sha256=CQ2E0qJ7FxDT4TucItkJRmkqhzMoNSnwtVQQ_HzX9Jk,4059
25
26
  avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
26
27
  avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
27
28
  avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
28
29
  avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
29
- avoca/testing/df.py,sha256=Nc0GUYTApZgYyUTMnHMTbSKLiA5ty9Bg7gUGtnoFYMI,1826
30
+ avoca/testing/df.py,sha256=UQm6TdTDVRWvRNM5WnSWh6vdvDR1lqLNg0ti-B1L760,1865
31
+ avoca/testing/utils.py,sha256=jVV0mIwLIpr0UBLMk8RjZH5J_dV_b6Gugxzo_WRgWU0,308
30
32
  avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
31
33
  avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
32
34
  avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
33
- avoca-0.12.0.dist-info/METADATA,sha256=4xD5mqScWJDeAnbM3vFfKLxaszOLz0GlDPvjE1Ej_vw,1570
34
- avoca-0.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
35
- avoca-0.12.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
36
- avoca-0.12.0.dist-info/RECORD,,
35
+ avoca-0.14.0.dist-info/METADATA,sha256=CayW94kozHUxF8sbKxE0pnWZnS0W5cjkEUKU7_QfgEc,1570
36
+ avoca-0.14.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
37
+ avoca-0.14.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
38
+ avoca-0.14.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any