avoca 0.14.0__tar.gz → 0.15.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {avoca-0.14.0 → avoca-0.15.1}/.gitignore +1 -0
  2. {avoca-0.14.0 → avoca-0.15.1}/PKG-INFO +1 -1
  3. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/nabel.py +2 -3
  4. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/qa_tool.py +51 -10
  5. {avoca-0.14.0 → avoca-0.15.1}/avoca/manager.py +1 -0
  6. {avoca-0.14.0 → avoca-0.15.1}/avoca/plots.py +38 -34
  7. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/rolling.py +3 -0
  8. {avoca-0.14.0 → avoca-0.15.1}/avoca/utils/__init__.py +1 -1
  9. {avoca-0.14.0 → avoca-0.15.1}/avoca/utils/flags_doc.py +3 -0
  10. {avoca-0.14.0 → avoca-0.15.1}/pyproject.toml +1 -1
  11. avoca-0.15.1/tests/bindings/test_qatool.py +216 -0
  12. {avoca-0.14.0 → avoca-0.15.1}/tests/test_assigners.py +7 -0
  13. avoca-0.14.0/tests/bindings/test_qatool.py +0 -49
  14. {avoca-0.14.0 → avoca-0.15.1}/.gitlab-ci.yml +0 -0
  15. {avoca-0.14.0 → avoca-0.15.1}/.readthedocs.yaml +0 -0
  16. {avoca-0.14.0 → avoca-0.15.1}/.vscode/settings.json +0 -0
  17. {avoca-0.14.0 → avoca-0.15.1}/LICENCE.txt +0 -0
  18. {avoca-0.14.0 → avoca-0.15.1}/README.md +0 -0
  19. {avoca-0.14.0 → avoca-0.15.1}/avoca/__init__.py +0 -0
  20. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/__init__.py +0 -0
  21. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/ebas.py +0 -0
  22. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/ebas_flags.py +0 -0
  23. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/gcwerks-report.conf +0 -0
  24. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/gcwerks.py +0 -0
  25. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/gcwerks_gui.py +0 -0
  26. {avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/synspec.py +0 -0
  27. {avoca-0.14.0 → avoca-0.15.1}/avoca/export_nas.py +0 -0
  28. {avoca-0.14.0 → avoca-0.15.1}/avoca/flagging.py +0 -0
  29. {avoca-0.14.0 → avoca-0.15.1}/avoca/flags.py +0 -0
  30. {avoca-0.14.0 → avoca-0.15.1}/avoca/io.py +0 -0
  31. {avoca-0.14.0 → avoca-0.15.1}/avoca/logging.py +0 -0
  32. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/__init__.py +0 -0
  33. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/abstract.py +0 -0
  34. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/concs.py +0 -0
  35. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/generate_classes_doc.py +0 -0
  36. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/invalid.py +0 -0
  37. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/rt.py +0 -0
  38. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/test.py +0 -0
  39. {avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/zscore.py +0 -0
  40. {avoca-0.14.0 → avoca-0.15.1}/avoca/requirements.py +0 -0
  41. {avoca-0.14.0 → avoca-0.15.1}/avoca/settings.py +0 -0
  42. {avoca-0.14.0 → avoca-0.15.1}/avoca/testing/__init__.py +0 -0
  43. {avoca-0.14.0 → avoca-0.15.1}/avoca/testing/df.py +0 -0
  44. {avoca-0.14.0 → avoca-0.15.1}/avoca/testing/utils.py +0 -0
  45. {avoca-0.14.0 → avoca-0.15.1}/avoca/utils/torch_models.py +0 -0
  46. {avoca-0.14.0 → avoca-0.15.1}/data/.avoca/config.yaml +0 -0
  47. {avoca-0.14.0 → avoca-0.15.1}/data/CH0001G.20240219123300.20240307132229.online_gc.NMHC.air.16d.61mn.CH01L_Agilent_GC-MS-MEDUSA_Medusa-12_JFJ.CH01L_gc_ms.lev0.nas +0 -0
  48. {avoca-0.14.0 → avoca-0.15.1}/data/tests/missing_area_cols.csv +0 -0
  49. {avoca-0.14.0 → avoca-0.15.1}/data/voc_jan2jun_2023.csv +0 -0
  50. {avoca-0.14.0 → avoca-0.15.1}/docs/Makefile +0 -0
  51. {avoca-0.14.0 → avoca-0.15.1}/docs/make.bat +0 -0
  52. {avoca-0.14.0 → avoca-0.15.1}/docs/source/bindings/ebas.md +0 -0
  53. {avoca-0.14.0 → avoca-0.15.1}/docs/source/bindings/gcwerks.md +0 -0
  54. {avoca-0.14.0 → avoca-0.15.1}/docs/source/bindings/index.rst +0 -0
  55. {avoca-0.14.0 → avoca-0.15.1}/docs/source/bindings/qa_tool.md +0 -0
  56. {avoca-0.14.0 → avoca-0.15.1}/docs/source/conf.py +0 -0
  57. {avoca-0.14.0 → avoca-0.15.1}/docs/source/index.rst +0 -0
  58. {avoca-0.14.0 → avoca-0.15.1}/docs/source/quickstart.ipynb +0 -0
  59. {avoca-0.14.0 → avoca-0.15.1}/examples/config.yaml +0 -0
  60. {avoca-0.14.0 → avoca-0.15.1}/examples/convert_synspec_to_gcwerks.py +0 -0
  61. {avoca-0.14.0 → avoca-0.15.1}/examples/data_qa.ipynb +0 -0
  62. {avoca-0.14.0 → avoca-0.15.1}/examples/data_qa_gcwerks.ipynb +0 -0
  63. {avoca-0.14.0 → avoca-0.15.1}/examples/export_gc_werks.py +0 -0
  64. {avoca-0.14.0 → avoca-0.15.1}/examples/export_gc_werks_secondary_peaks.py +0 -0
  65. {avoca-0.14.0 → avoca-0.15.1}/examples/get_tanks.ipynb +0 -0
  66. {avoca-0.14.0 → avoca-0.15.1}/examples/read_nas.ipynb +0 -0
  67. {avoca-0.14.0 → avoca-0.15.1}/tests/bindings/gcwerks.dat +0 -0
  68. {avoca-0.14.0 → avoca-0.15.1}/tests/bindings/test_gcwerks.py +0 -0
  69. {avoca-0.14.0 → avoca-0.15.1}/tests/test_flagging.py +0 -0
  70. {avoca-0.14.0 → avoca-0.15.1}/tests/test_io.py +0 -0
  71. {avoca-0.14.0 → avoca-0.15.1}/tests/test_manager.py +0 -0
@@ -13,3 +13,4 @@ dist/
13
13
 
14
14
  # Generated by pytests
15
15
  simple_df.csv
16
+ data/tests/export_empa_qa_tool/*.csv
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: avoca
3
- Version: 0.14.0
3
+ Version: 0.15.1
4
4
  Summary: @voc@: Quality assessement of measurement data
5
5
  Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
6
6
  Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues
@@ -4,7 +4,6 @@ import logging
4
4
  from pathlib import Path
5
5
  import pandas as pd
6
6
 
7
-
8
7
  logger = logging.getLogger(__name__)
9
8
 
10
9
 
@@ -54,8 +53,8 @@ def add_nabel_data(df: pd.DataFrame, df_nabel: pd.DataFrame) -> pd.DataFrame:
54
53
 
55
54
  df_out = df.copy()
56
55
 
57
- col_dt_start = ("StartEndOffsets", "datetime_start")
58
- col_dt_end = ("StartEndOffsets", "datetime_end")
56
+ col_dt_start = ("-", "datetime_start")
57
+ col_dt_end = ("-", "datetime_end")
59
58
 
60
59
  if col_dt_start not in df.columns or col_dt_end not in df.columns:
61
60
  raise ValueError(
@@ -37,10 +37,11 @@ def export_EmpaQATool(
37
37
  station: str = "XXX",
38
38
  revision_date: datetime | None = None,
39
39
  dataset: datetime | str | None = None,
40
- export_names: dict[str, str] = {},
40
+ export_names: dict[str, str] | None = None,
41
41
  datetime_offsets: tuple[timedelta, timedelta] | None = None,
42
42
  substances: list[str] = [],
43
43
  rounding_decimals: int = 4,
44
+ df_substances: pd.DataFrame | None = None,
44
45
  ) -> Path:
45
46
  """Export to the EmpaQATool format.
46
47
 
@@ -64,7 +65,17 @@ def export_EmpaQATool(
64
65
  :arg datetime_offsets: Tuple of two timedelta to use for the start and end datetime
65
66
  :arg substances: List of substances to export. You can also specify group names.
66
67
  If not specified, this will use the substances from `df_substances`.
68
+ If a substance is present here and not in `df_substances`, it will still be exported.
67
69
  :arg rounding_decimals: Number of decimals to round the values to.
70
+ :arg df_substances: DataFrame with substance information.
71
+ If provided, the substances to export will be taken from this dataframe.
72
+ Columns:
73
+ - index: substance name
74
+ - export: bool, whether to export the substance
75
+ - export_name: str, name to use in the export file
76
+
77
+
78
+ :returns: Path to the exported file.
68
79
 
69
80
  """
70
81
 
@@ -113,12 +124,42 @@ def export_EmpaQATool(
113
124
  logger.debug(f"df_out: {df_out.head()}")
114
125
  if not substances:
115
126
  substances = compounds_from_df(df)
127
+ if df_substances is not None and "export" in df_substances.columns:
128
+ # Remove the substances that should not be exported
129
+ substances = [
130
+ s
131
+ for s in substances
132
+ if s not in df_substances.index or df_substances.loc[s, "export"]
133
+ ]
116
134
 
117
135
  remove_infs = lambda x: x.replace([np.inf, -np.inf], np.nan)
118
136
  is_invalid = lambda x: x.isin([np.inf, -np.inf]) | pd.isna(x)
119
137
  clean_col = lambda x: remove_infs(x).round(rounding_decimals).astype(str)
120
138
 
139
+ if export_names is None:
140
+ export_names = {}
141
+
142
+ if df_substances is not None and "export_name" in df_substances.columns:
143
+ # Read export names from the dataframe if provided
144
+ for substance in substances:
145
+ if not substance or substance not in df_substances.index:
146
+ continue
147
+ export_name_df = df_substances.loc[substance, "export_name"]
148
+ if not export_name_df or pd.isna(export_name_df):
149
+ continue
150
+ if substance in export_names and export_names[substance] != export_name_df:
151
+ logger.warning(
152
+ f"Substance {substance} found in both df_substances and"
153
+ " export_names. Using the name from export_names.\n"
154
+ f" - export_names (used): {export_names[substance]}\n"
155
+ f" - df_substances: {export_name_df}"
156
+ )
157
+ continue
158
+ export_names[substance] = export_name_df
159
+
121
160
  for substance in substances:
161
+ if not substance:
162
+ continue
122
163
 
123
164
  export_name = export_names.get(substance, substance)
124
165
 
@@ -234,12 +275,12 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
234
275
  dt += shift
235
276
  columns[("-", "datetime")] = dt
236
277
 
237
- # Last column is empty
238
- compounds = [ '-'.join(s[:-1]) for col in df.columns if len(s:=col.split("-")) >= 2]
239
-
240
-
241
- for compound in compounds:
278
+ # Last column is empty
279
+ compounds = [
280
+ "-".join(s[:-1]) for col in df.columns if len(s := col.split("-")) >= 2
281
+ ]
242
282
 
283
+ for compound in compounds:
243
284
 
244
285
  flag_col = f"{compound}-flag"
245
286
  value_col = f"{compound}-value"
@@ -248,8 +289,8 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
248
289
 
249
290
  mapping = {
250
291
  "conc": value_col,
251
- "u_expanded":acc_col,
252
- "u_precision":precision_col,
292
+ "u_expanded": acc_col,
293
+ "u_precision": precision_col,
253
294
  }
254
295
 
255
296
  flag_values = (pd.to_numeric(df[flag_col]) * 1e3).astype(int).mod(1000)
@@ -263,10 +304,10 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
263
304
  serie = pd.to_numeric(df[value])
264
305
  mask_nan = flags == QA_Flag.MISSING.value
265
306
  serie[mask_nan] = np.nan
266
- columns[(compound, key)] = serie
307
+ columns[(compound, key)] = serie
267
308
 
268
309
  columns[(compound, "flag")] = flags
269
-
310
+
270
311
  mask_nan = columns[(compound, "conc")].isna()
271
312
  columns[(compound, "flag")][mask_nan] |= QA_Flag.MISSING.value
272
313
 
@@ -20,6 +20,7 @@ class AssignerManager:
20
20
  _assigners_importpath = {
21
21
  "RetentionTimeChecker": "avoca.qa_class.rt",
22
22
  "ExtremeValues": "avoca.qa_class.zscore",
23
+ "RollingWindow": "avoca.qa_class.rolling",
23
24
  "ExtremeConcentrations": "avoca.qa_class.concs",
24
25
  "XY_Correlations": "avoca.qa_class.zscore",
25
26
  "TestAssigner": "avoca.qa_class.test",
@@ -77,28 +77,17 @@ def plot_yearly_plotly(
77
77
  import plotly.graph_objects as go
78
78
 
79
79
  dt_column = ("-", "datetime")
80
- serie = df[(compound, "conc")]
81
- dt = df[dt_column]
80
+
82
81
  if ("-", "type") in df.columns:
83
82
  mask_air = df[("-", "type")] == "air"
84
- serie = serie[mask_air]
85
- dt = dt[mask_air]
86
- if ("-", "type") in df_new.columns:
83
+ df = df[mask_air]
84
+ if df_new is not None and ("-", "type") in df_new.columns:
87
85
  mask_air_new = df_new[("-", "type")] == "air"
88
86
  df_new = df_new[mask_air_new]
89
87
 
88
+ dt = df[dt_column]
90
89
  x = dt.dt.day_of_year + dt.dt.hour / 24.0
91
- df_to_plot = pd.DataFrame(
92
- {
93
- "conc": serie.values,
94
- "year": dt.dt.year.values,
95
- },
96
- index=x.values,
97
- )
98
- # Break down by year, to have year as columns and conc as values
99
- df_to_plot = df_to_plot.pivot_table(
100
- index=df_to_plot.index, columns="year", values="conc"
101
- )
90
+
102
91
  fig = go.Figure()
103
92
 
104
93
  hover_template = "Timestamp: %{text}<br>Conc: %{y:.2f} ppt"
@@ -110,29 +99,44 @@ def plot_yearly_plotly(
110
99
  "hovertemplate": hover_template,
111
100
  }
112
101
 
113
- for year in df_to_plot.columns:
102
+ if (compound, "conc") in df:
103
+ serie = df[(compound, "conc")]
104
+ df_to_plot = pd.DataFrame(
105
+ {
106
+ "conc": serie.values,
107
+ "year": dt.dt.year.values,
108
+ },
109
+ index=x.values,
110
+ )
111
+ # Break down by year, to have year as columns and conc as values
112
+ df_to_plot = df_to_plot.pivot_table(
113
+ index=df_to_plot.index, columns="year", values="conc"
114
+ )
115
+ for year in df_to_plot.columns:
116
+ fig.add_trace(
117
+ go.Scatter(
118
+ x=df_to_plot.index,
119
+ y=df_to_plot[year],
120
+ name=str(year),
121
+ zorder=-year,
122
+ text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
123
+ **kwargs,
124
+ )
125
+ )
126
+
127
+ x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
128
+
129
+ if df_new is not None and (compound, "conc") in df_new:
130
+ dt_new = df_new[dt_column]
114
131
  fig.add_trace(
115
132
  go.Scatter(
116
- x=df_to_plot.index,
117
- y=df_to_plot[year],
118
- name=str(year),
119
- zorder=-year,
120
- text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
133
+ x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
134
+ y=df_new[(compound, "conc")],
135
+ name="New Data",
136
+ text=dt_new.dt.strftime("%y%m%d.%H%M"),
121
137
  **kwargs,
122
138
  )
123
139
  )
124
- x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
125
-
126
- dt_new = df_new[dt_column]
127
- fig.add_trace(
128
- go.Scatter(
129
- x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
130
- y=df_new[(compound, "conc")],
131
- name="New Data",
132
- text=dt_new.dt.strftime("%y%m%d.%H%M"),
133
- **kwargs,
134
- )
135
- )
136
140
  fig.update_layout(
137
141
  xaxis_title="Time of Year",
138
142
  yaxis_title=f"{compound} (ppt)",
@@ -28,6 +28,9 @@ class RollingWindow(ExtremeValues):
28
28
  :param only_greater: If True, only values greater than the threshold will be flagged.
29
29
  The values lower than the negative threshold will not be flagged.
30
30
  By default, this is True if use_log_normal is True, and False otherwise.
31
+ :param rolling_window: The size of the rolling window as a `timedelta` object.
32
+ See `window` parameters in pandas documentation for more details.
33
+ https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html#pandas-dataframe-rolling
31
34
  """
32
35
 
33
36
  require_datetime_index = True
@@ -13,7 +13,7 @@ def compounds_from_df(df: pd.DataFrame) -> list[str]:
13
13
  Returns:
14
14
  The compounds in the dataframe.
15
15
  """
16
- return [c for c in df.columns.get_level_values(0).unique() if c != "-"]
16
+ return [c for c in df.columns.get_level_values(0).unique() if c not in ["-", ""]]
17
17
 
18
18
 
19
19
  def runtypes_from_df(df: pd.DataFrame) -> list[str]:
@@ -56,6 +56,9 @@ def parse_enum_comments(filepath: Path, enum_class_name: str) -> dict[Enum, str]
56
56
  exec(code, module)
57
57
  enum_cls = module[enum_class_name]
58
58
  for name, comment in comment_dict.items():
59
+ if not hasattr(enum_cls, name):
60
+ # Probably somehwere else in the file
61
+ continue
59
62
  enum_member = getattr(enum_cls, name)
60
63
  enum_obj[enum_member] = comment
61
64
 
@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
5
5
 
6
6
  [project]
7
7
  name = "avoca"
8
- version = "0.14.0"
8
+ version = "0.15.1"
9
9
  authors = [
10
10
  { name="Lionel Constantin", email="lionel.constantin@empa.ch" },
11
11
  ]
@@ -0,0 +1,216 @@
1
+ from datetime import timedelta
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ import pytest
6
+
7
+ from avoca.bindings.qa_tool import export_EmpaQATool
8
+ from avoca.testing import testdata_dir
9
+ from avoca.testing.df import invalids_df, simple_df
10
+
11
+ export_path = testdata_dir / "export_empa_qa_tool"
12
+
13
+
14
+ @pytest.mark.parametrize(
15
+ "df, name",
16
+ [
17
+ (simple_df, "simple"),
18
+ (invalids_df, "invalids"),
19
+ ],
20
+ )
21
+ def test_export_EmpaQATool(df, name):
22
+ """Test the export_EmpaQATool function."""
23
+
24
+ # Create a test dataframe
25
+ df = df.copy()
26
+ df[("compA", "flag")] = 0
27
+ df[("compB", "flag")] = 0
28
+
29
+ df[("-", "datetime")] = pd.date_range(start="2025-01-01", periods=len(df), freq="h")
30
+
31
+ # Export the dataframe to a file
32
+ export_file = export_EmpaQATool(
33
+ df,
34
+ export_path,
35
+ datetime_offsets=(timedelta(minutes=-5), timedelta(minutes=0)),
36
+ station=name,
37
+ )
38
+
39
+ # Check that the file is created
40
+ assert Path(export_file).is_file()
41
+
42
+ # Read the file and check that the data is correct
43
+ df_exported = pd.read_csv(
44
+ export_file,
45
+ sep=";",
46
+ )
47
+ assert len(df_exported) == len(df)
48
+ # Check that the 'compB-Value' column is of float dtype
49
+ assert pd.api.types.is_float_dtype(df_exported["compB-Value"])
50
+ assert not pd.isna(df_exported["compB-Value"]).any(), "NAN values must be 999..."
51
+
52
+
53
+ def _prepare_df_for_export(df: pd.DataFrame) -> pd.DataFrame:
54
+ """Prepare a dataframe for export testing."""
55
+ df = df.copy()
56
+ df[("compA", "flag")] = 0
57
+ df[("compB", "flag")] = 0
58
+ df[("-", "datetime")] = pd.date_range(start="2025-01-01", periods=len(df), freq="h")
59
+ df[("-", "datetime_start")] = df[("-", "datetime")] - timedelta(minutes=5)
60
+ df[("-", "datetime_end")] = df[("-", "datetime")] + timedelta(minutes=0)
61
+ return df
62
+
63
+
64
+ def test_export_names_dict():
65
+ """test that export names from dict are used correctly."""
66
+
67
+ out_file = export_EmpaQATool(
68
+ _prepare_df_for_export(simple_df),
69
+ export_path,
70
+ export_names={"compA": "CustomCompA", "compB": "CustomCompB"},
71
+ station="TEST_DICT",
72
+ )
73
+
74
+ df_exported = pd.read_csv(
75
+ out_file,
76
+ sep=";",
77
+ )
78
+
79
+ assert "CustomCompA-Value" in df_exported.columns
80
+ assert "CustomCompB-Value" in df_exported.columns
81
+ assert "compA-Value" not in df_exported.columns
82
+ assert "compB-Value" not in df_exported.columns
83
+
84
+
85
+ def test_export_names_df():
86
+ """test that export names from dict are used correctly."""
87
+
88
+ out_file = export_EmpaQATool(
89
+ _prepare_df_for_export(simple_df),
90
+ export_path,
91
+ station="TEST_NAMES_DF",
92
+ df_substances=pd.DataFrame(
93
+ {
94
+ "substance": ["compA", "compB"],
95
+ "export_name": ["CustomCompA", "CustomCompB"],
96
+ }
97
+ ).set_index("substance"),
98
+ )
99
+
100
+ df_exported = pd.read_csv(
101
+ out_file,
102
+ sep=";",
103
+ )
104
+
105
+ assert "CustomCompA-Value" in df_exported.columns
106
+ assert "CustomCompB-Value" in df_exported.columns
107
+ assert "compA-Value" not in df_exported.columns
108
+ assert "compB-Value" not in df_exported.columns
109
+
110
+
111
+ def test_both_export_names_warns(caplog):
112
+ """test that export names from dict are used correctly."""
113
+
114
+ with caplog.at_level("WARNING"):
115
+ out_file = export_EmpaQATool(
116
+ _prepare_df_for_export(simple_df),
117
+ export_path,
118
+ station="TEST_BOTH_WARN",
119
+ export_names={"compA": "CustomCompA", "compB": "CustomCompB"},
120
+ df_substances=pd.DataFrame(
121
+ {
122
+ "substance": ["compA", "compB"],
123
+ "export_name": ["WrongCompA", "CustomCompB"],
124
+ }
125
+ ).set_index("substance"),
126
+ )
127
+
128
+ assert (
129
+ "Substance compA found in both df_substances and export_names." in caplog.text
130
+ )
131
+
132
+ df_exported = pd.read_csv(
133
+ out_file,
134
+ sep=";",
135
+ )
136
+
137
+ assert "CustomCompA-Value" in df_exported.columns
138
+ assert "CustomCompB-Value" in df_exported.columns
139
+ assert "compA-Value" not in df_exported.columns
140
+ assert "WrongCompA-Value" not in df_exported.columns
141
+ assert "compB-Value" not in df_exported.columns
142
+
143
+
144
+ def test_export_no_export_substances():
145
+ """test that substances with export=False in df_substances are not exported."""
146
+
147
+ out_file = export_EmpaQATool(
148
+ _prepare_df_for_export(simple_df),
149
+ export_path,
150
+ station="TEST_NO_EXPORT_SUBSTANCES",
151
+ df_substances=pd.DataFrame(
152
+ {
153
+ "substance": ["compA", "compB"],
154
+ "export": [True, False],
155
+ }
156
+ ).set_index("substance"),
157
+ )
158
+
159
+ df_exported = pd.read_csv(
160
+ out_file,
161
+ sep=";",
162
+ )
163
+
164
+ assert "compA-Value" in df_exported.columns
165
+ assert "compB-Value" not in df_exported.columns
166
+
167
+
168
+ def test_export_if_not_in_df_substances():
169
+ """test that substances not in df_substances are exported."""
170
+
171
+ out_file = export_EmpaQATool(
172
+ _prepare_df_for_export(simple_df),
173
+ export_path,
174
+ station="TEST_IF_NOT_IN_DF_SUBSTANCES",
175
+ df_substances=pd.DataFrame(
176
+ {
177
+ "substance": ["compA"],
178
+ "export": [True],
179
+ }
180
+ ).set_index("substance"),
181
+ )
182
+
183
+ df_exported = pd.read_csv(
184
+ out_file,
185
+ sep=";",
186
+ )
187
+
188
+ assert "compA-Value" in df_exported.columns
189
+ assert "compB-Value" in df_exported.columns
190
+
191
+
192
+ def test_export_and_rename_in_df_substances():
193
+ """test that export names from dict are used correctly."""
194
+
195
+ out_file = export_EmpaQATool(
196
+ _prepare_df_for_export(simple_df),
197
+ export_path,
198
+ station="TEST_EXPORT_AND_RENAME_IN_DF_SUBSTANCES",
199
+ df_substances=pd.DataFrame(
200
+ {
201
+ "substance": ["compA", "compB"],
202
+ "export_name": ["CustomCompA", "CustomCompB"],
203
+ "export": [True, False],
204
+ }
205
+ ).set_index("substance"),
206
+ )
207
+
208
+ df_exported = pd.read_csv(
209
+ out_file,
210
+ sep=";",
211
+ )
212
+
213
+ assert "CustomCompA-Value" in df_exported.columns
214
+ assert "compA-Value" not in df_exported.columns
215
+ assert "compB-Value" not in df_exported.columns
216
+ assert "CustomCompB-Value" not in df_exported.columns
@@ -3,6 +3,7 @@
3
3
  import pandas as pd
4
4
  import pytest
5
5
 
6
+ from avoca.manager import AssignerManager
6
7
  from avoca.qa_class.abstract import AbstractQA_Assigner
7
8
  from avoca.qa_class.invalid import InvalidValues
8
9
  from avoca.qa_class.zscore import ExtremeValues, XY_Correlations
@@ -44,6 +45,12 @@ def assigner(
44
45
  return assigner_type(variable="test_var", compounds=["compA", "compB"], **kwargs)
45
46
 
46
47
 
48
+ def test_is_in_documentation(assigner: AbstractQA_Assigner):
49
+ """Test the assigner will appear in the documentation."""
50
+
51
+ assert type(assigner).__name__ in AssignerManager._assigners_importpath
52
+
53
+
47
54
  def test_simple(assigner: AbstractQA_Assigner):
48
55
 
49
56
  df_one_extreme = df_test.df_one_extreme
@@ -1,49 +0,0 @@
1
- from datetime import timedelta
2
- from pathlib import Path
3
-
4
- import pandas as pd
5
- import pytest
6
-
7
- from avoca.bindings.qa_tool import export_EmpaQATool
8
- from avoca.testing import testdata_dir
9
- from avoca.testing.df import invalids_df, simple_df
10
-
11
-
12
- @pytest.mark.parametrize(
13
- "df, name",
14
- [
15
- (simple_df, "simple"),
16
- (invalids_df, "invalids"),
17
- ],
18
- )
19
- def test_export_EmpaQATool(df, name):
20
- """Test the export_EmpaQATool function."""
21
-
22
- # Create a test dataframe
23
- df = df.copy()
24
- df[("compA", "flag")] = 0
25
- df[("compB", "flag")] = 0
26
-
27
- df[("-", "datetime")] = pd.date_range(start="2025-01-01", periods=len(df), freq="h")
28
-
29
- # Export the dataframe to a file
30
- export_path = testdata_dir / "export_empa_qa_tool"
31
- export_file = export_EmpaQATool(
32
- df,
33
- export_path,
34
- datetime_offsets=(timedelta(minutes=-5), timedelta(minutes=0)),
35
- station=name,
36
- )
37
-
38
- # Check that the file is created
39
- assert Path(export_file).is_file()
40
-
41
- # Read the file and check that the data is correct
42
- df_exported = pd.read_csv(
43
- export_file,
44
- sep=";",
45
- )
46
- assert len(df_exported) == len(df)
47
- # Check that the 'compB-Value' column is of float dtype
48
- assert pd.api.types.is_float_dtype(df_exported["compB-Value"])
49
- assert not pd.isna(df_exported["compB-Value"]).any(), "NAN values must be 999..."
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes