avoca 0.14.0__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avoca/bindings/qa_tool.py CHANGED
@@ -37,10 +37,11 @@ def export_EmpaQATool(
37
37
  station: str = "XXX",
38
38
  revision_date: datetime | None = None,
39
39
  dataset: datetime | str | None = None,
40
- export_names: dict[str, str] = {},
40
+ export_names: dict[str, str] | None = None,
41
41
  datetime_offsets: tuple[timedelta, timedelta] | None = None,
42
42
  substances: list[str] = [],
43
43
  rounding_decimals: int = 4,
44
+ df_substances: pd.DataFrame | None = None,
44
45
  ) -> Path:
45
46
  """Export to the EmpaQATool format.
46
47
 
@@ -64,7 +65,17 @@ def export_EmpaQATool(
64
65
  :arg datetime_offsets: Tuple of two timedelta to use for the start and end datetime
65
66
  :arg substances: List of substances to export. You can also specify group names.
66
67
  If not specified, this will use the substances from `df_substances`.
68
+ If a substance is present here and not in `df_substances`, it will still be exported.
67
69
  :arg rounding_decimals: Number of decimals to round the values to.
70
+ :arg df_substances: DataFrame with substance information.
71
+ If provided, the substances to export will be taken from this dataframe.
72
+ Columns:
73
+ - index: substance name
74
+ - export: bool, whether to export the substance
75
+ - export_name: str, name to use in the export file
76
+
77
+
78
+ :returns: Path to the exported file.
68
79
 
69
80
  """
70
81
 
@@ -113,12 +124,42 @@ def export_EmpaQATool(
113
124
  logger.debug(f"df_out: {df_out.head()}")
114
125
  if not substances:
115
126
  substances = compounds_from_df(df)
127
+ if df_substances is not None and "export" in df_substances.columns:
128
+ # Remove the substances that should not be exported
129
+ substances = [
130
+ s
131
+ for s in substances
132
+ if s not in df_substances.index or df_substances.loc[s, "export"]
133
+ ]
116
134
 
117
135
  remove_infs = lambda x: x.replace([np.inf, -np.inf], np.nan)
118
136
  is_invalid = lambda x: x.isin([np.inf, -np.inf]) | pd.isna(x)
119
137
  clean_col = lambda x: remove_infs(x).round(rounding_decimals).astype(str)
120
138
 
139
+ if export_names is None:
140
+ export_names = {}
141
+
142
+ if df_substances is not None and "export_name" in df_substances.columns:
143
+ # Read export names from the dataframe if provided
144
+ for substance in substances:
145
+ if not substance or substance not in df_substances.index:
146
+ continue
147
+ export_name_df = df_substances.loc[substance, "export_name"]
148
+ if not export_name_df or pd.isna(export_name_df):
149
+ continue
150
+ if substance in export_names and export_names[substance] != export_name_df:
151
+ logger.warning(
152
+ f"Substance {substance} found in both df_substances and"
153
+ " export_names. Using the name from export_names.\n"
154
+ f" - export_names (used): {export_names[substance]}\n"
155
+ f" - df_substances: {export_name_df}"
156
+ )
157
+ continue
158
+ export_names[substance] = export_name_df
159
+
121
160
  for substance in substances:
161
+ if not substance:
162
+ continue
122
163
 
123
164
  export_name = export_names.get(substance, substance)
124
165
 
@@ -234,12 +275,12 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
234
275
  dt += shift
235
276
  columns[("-", "datetime")] = dt
236
277
 
237
- # Last column is empty
238
- compounds = [ '-'.join(s[:-1]) for col in df.columns if len(s:=col.split("-")) >= 2]
239
-
240
-
241
- for compound in compounds:
278
+ # Last column is empty
279
+ compounds = [
280
+ "-".join(s[:-1]) for col in df.columns if len(s := col.split("-")) >= 2
281
+ ]
242
282
 
283
+ for compound in compounds:
243
284
 
244
285
  flag_col = f"{compound}-flag"
245
286
  value_col = f"{compound}-value"
@@ -248,8 +289,8 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
248
289
 
249
290
  mapping = {
250
291
  "conc": value_col,
251
- "u_expanded":acc_col,
252
- "u_precision":precision_col,
292
+ "u_expanded": acc_col,
293
+ "u_precision": precision_col,
253
294
  }
254
295
 
255
296
  flag_values = (pd.to_numeric(df[flag_col]) * 1e3).astype(int).mod(1000)
@@ -263,10 +304,10 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
263
304
  serie = pd.to_numeric(df[value])
264
305
  mask_nan = flags == QA_Flag.MISSING.value
265
306
  serie[mask_nan] = np.nan
266
- columns[(compound, key)] = serie
307
+ columns[(compound, key)] = serie
267
308
 
268
309
  columns[(compound, "flag")] = flags
269
-
310
+
270
311
  mask_nan = columns[(compound, "conc")].isna()
271
312
  columns[(compound, "flag")][mask_nan] |= QA_Flag.MISSING.value
272
313
 
avoca/manager.py CHANGED
@@ -20,6 +20,7 @@ class AssignerManager:
20
20
  _assigners_importpath = {
21
21
  "RetentionTimeChecker": "avoca.qa_class.rt",
22
22
  "ExtremeValues": "avoca.qa_class.zscore",
23
+ "RollingWindow": "avoca.qa_class.rolling",
23
24
  "ExtremeConcentrations": "avoca.qa_class.concs",
24
25
  "XY_Correlations": "avoca.qa_class.zscore",
25
26
  "TestAssigner": "avoca.qa_class.test",
avoca/plots.py CHANGED
@@ -77,28 +77,17 @@ def plot_yearly_plotly(
77
77
  import plotly.graph_objects as go
78
78
 
79
79
  dt_column = ("-", "datetime")
80
- serie = df[(compound, "conc")]
81
- dt = df[dt_column]
80
+
82
81
  if ("-", "type") in df.columns:
83
82
  mask_air = df[("-", "type")] == "air"
84
- serie = serie[mask_air]
85
- dt = dt[mask_air]
86
- if ("-", "type") in df_new.columns:
83
+ df = df[mask_air]
84
+ if df_new is not None and ("-", "type") in df_new.columns:
87
85
  mask_air_new = df_new[("-", "type")] == "air"
88
86
  df_new = df_new[mask_air_new]
89
87
 
88
+ dt = df[dt_column]
90
89
  x = dt.dt.day_of_year + dt.dt.hour / 24.0
91
- df_to_plot = pd.DataFrame(
92
- {
93
- "conc": serie.values,
94
- "year": dt.dt.year.values,
95
- },
96
- index=x.values,
97
- )
98
- # Break down by year, to have year as columns and conc as values
99
- df_to_plot = df_to_plot.pivot_table(
100
- index=df_to_plot.index, columns="year", values="conc"
101
- )
90
+
102
91
  fig = go.Figure()
103
92
 
104
93
  hover_template = "Timestamp: %{text}<br>Conc: %{y:.2f} ppt"
@@ -110,29 +99,44 @@ def plot_yearly_plotly(
110
99
  "hovertemplate": hover_template,
111
100
  }
112
101
 
113
- for year in df_to_plot.columns:
102
+ if (compound, "conc") in df:
103
+ serie = df[(compound, "conc")]
104
+ df_to_plot = pd.DataFrame(
105
+ {
106
+ "conc": serie.values,
107
+ "year": dt.dt.year.values,
108
+ },
109
+ index=x.values,
110
+ )
111
+ # Break down by year, to have year as columns and conc as values
112
+ df_to_plot = df_to_plot.pivot_table(
113
+ index=df_to_plot.index, columns="year", values="conc"
114
+ )
115
+ for year in df_to_plot.columns:
116
+ fig.add_trace(
117
+ go.Scatter(
118
+ x=df_to_plot.index,
119
+ y=df_to_plot[year],
120
+ name=str(year),
121
+ zorder=-year,
122
+ text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
123
+ **kwargs,
124
+ )
125
+ )
126
+
127
+ x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
128
+
129
+ if df_new is not None and (compound, "conc") in df_new:
130
+ dt_new = df_new[dt_column]
114
131
  fig.add_trace(
115
132
  go.Scatter(
116
- x=df_to_plot.index,
117
- y=df_to_plot[year],
118
- name=str(year),
119
- zorder=-year,
120
- text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
133
+ x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
134
+ y=df_new[(compound, "conc")],
135
+ name="New Data",
136
+ text=dt_new.dt.strftime("%y%m%d.%H%M"),
121
137
  **kwargs,
122
138
  )
123
139
  )
124
- x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
125
-
126
- dt_new = df_new[dt_column]
127
- fig.add_trace(
128
- go.Scatter(
129
- x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
130
- y=df_new[(compound, "conc")],
131
- name="New Data",
132
- text=dt_new.dt.strftime("%y%m%d.%H%M"),
133
- **kwargs,
134
- )
135
- )
136
140
  fig.update_layout(
137
141
  xaxis_title="Time of Year",
138
142
  yaxis_title=f"{compound} (ppt)",
avoca/qa_class/rolling.py CHANGED
@@ -28,6 +28,9 @@ class RollingWindow(ExtremeValues):
28
28
  :param only_greater: If True, only values greater than the threshold will be flagged.
29
29
  The values lower than the negative threshold will not be flagged.
30
30
  By default, this is True if use_log_normal is True, and False otherwise.
31
+ :param rolling_window: The size of the rolling window as a `timedelta` object.
32
+ See `window` parameters in pandas documentation for more details.
33
+ https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html#pandas-dataframe-rolling
31
34
  """
32
35
 
33
36
  require_datetime_index = True
avoca/utils/__init__.py CHANGED
@@ -13,7 +13,7 @@ def compounds_from_df(df: pd.DataFrame) -> list[str]:
13
13
  Returns:
14
14
  The compounds in the dataframe.
15
15
  """
16
- return [c for c in df.columns.get_level_values(0).unique() if c != "-"]
16
+ return [c for c in df.columns.get_level_values(0).unique() if c not in ["-", ""]]
17
17
 
18
18
 
19
19
  def runtypes_from_df(df: pd.DataFrame) -> list[str]:
avoca/utils/flags_doc.py CHANGED
@@ -56,6 +56,9 @@ def parse_enum_comments(filepath: Path, enum_class_name: str) -> dict[Enum, str]
56
56
  exec(code, module)
57
57
  enum_cls = module[enum_class_name]
58
58
  for name, comment in comment_dict.items():
59
+ if not hasattr(enum_cls, name):
60
+ # Probably somehwere else in the file
61
+ continue
59
62
  enum_member = getattr(enum_cls, name)
60
63
  enum_obj[enum_member] = comment
61
64
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avoca
3
- Version: 0.14.0
3
+ Version: 0.15.0
4
4
  Summary: @voc@: Quality assessement of measurement data
5
5
  Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
6
6
  Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
@@ -4,8 +4,8 @@ avoca/flagging.py,sha256=tg6k_TVHRXiMJCAij_kUS-S2gSshYt7FKvQ0nJdljYs,2328
4
4
  avoca/flags.py,sha256=wobuZoIJh6dFsdiqqYJLZ_AHe4pcFE9tjuoimNXLjIQ,1428
5
5
  avoca/io.py,sha256=67D5x1qkLqWC7wWehyOfX96L4H3-tn9x2V4jMCoIRqA,729
6
6
  avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
7
- avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
8
- avoca/plots.py,sha256=UjfUgbfxd2veMOGHtSvJycru-w3gWsGjOVO__I-zqzQ,4205
7
+ avoca/manager.py,sha256=Faf3UyaCV58TMCZz6tWrLcY-W1WUtuh1aMP85yUVlmQ,5336
8
+ avoca/plots.py,sha256=zzoOJystasrKF2ikJLqcT8mlc-f-tu57vksXs-xRXv8,4424
9
9
  avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
10
10
  avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
11
11
  avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -15,24 +15,24 @@ avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk
15
15
  avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
16
16
  avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
17
17
  avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
18
- avoca/bindings/qa_tool.py,sha256=ninHe3mrJ8GULxRCkRTZixw-vmNhqu4zwwONd5aXd1Q,9735
18
+ avoca/bindings/qa_tool.py,sha256=hqsWUU99mYpkKfeULBoox4M2x7Bk0aYO4Q_8WGvt2og,11628
19
19
  avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
20
20
  avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  avoca/qa_class/abstract.py,sha256=CLt-6WFhZhrvKTLVHpdbJYMFM50VPOGiO-GG6IRPWzA,6011
22
22
  avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
23
23
  avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
24
24
  avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
25
- avoca/qa_class/rolling.py,sha256=CQ2E0qJ7FxDT4TucItkJRmkqhzMoNSnwtVQQ_HzX9Jk,4059
25
+ avoca/qa_class/rolling.py,sha256=m6KbfMdwSIDcXNTZqdth_I-YgZPnnUf8WAPSQGKBH6w,4324
26
26
  avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
27
27
  avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
28
28
  avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
29
29
  avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
30
30
  avoca/testing/df.py,sha256=UQm6TdTDVRWvRNM5WnSWh6vdvDR1lqLNg0ti-B1L760,1865
31
31
  avoca/testing/utils.py,sha256=jVV0mIwLIpr0UBLMk8RjZH5J_dV_b6Gugxzo_WRgWU0,308
32
- avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
33
- avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
32
+ avoca/utils/__init__.py,sha256=SZc1bHrQyg1DIYnbdUmANtUhnQWlJaMhPrDSWS8oVRY,1408
33
+ avoca/utils/flags_doc.py,sha256=jT1E0GN-B8ws_FyKGE20nlrKrgTHtoyjdo2r8RgYhwU,4294
34
34
  avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
35
- avoca-0.14.0.dist-info/METADATA,sha256=CayW94kozHUxF8sbKxE0pnWZnS0W5cjkEUKU7_QfgEc,1570
36
- avoca-0.14.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
37
- avoca-0.14.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
38
- avoca-0.14.0.dist-info/RECORD,,
35
+ avoca-0.15.0.dist-info/METADATA,sha256=aJUaStu4uKLKeTarDTVyPTIflFiZ90OrcoktZKGRXWs,1570
36
+ avoca-0.15.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
37
+ avoca-0.15.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
38
+ avoca-0.15.0.dist-info/RECORD,,
File without changes