PyPI - avoca - Versions diffs - 0.14.0__tar.gz → 0.15.1__tar.gz - Mend

avoca 0.14.0tar.gz → 0.15.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

{avoca-0.14.0 → avoca-0.15.1}/.gitignore RENAMED Viewed

@@ -13,3 +13,4 @@ dist/
 # Generated by pytests
 simple_df.csv
+data/tests/export_empa_qa_tool/*.csv

{avoca-0.14.0 → avoca-0.15.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: avoca
-Version: 0.14.0
+Version: 0.15.1
 Summary: @voc@: Quality assessement of measurement data
 Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
 Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues

{avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/nabel.py RENAMED Viewed

@@ -4,7 +4,6 @@ import logging
 from pathlib import Path
 import pandas as pd
 logger = logging.getLogger(__name__)
@@ -54,8 +53,8 @@ def add_nabel_data(df: pd.DataFrame, df_nabel: pd.DataFrame) -> pd.DataFrame:
     df_out = df.copy()
-    col_dt_start = ("StartEndOffsets", "datetime_start")
-    col_dt_end = ("StartEndOffsets", "datetime_end")
+    col_dt_start = ("-", "datetime_start")
+    col_dt_end = ("-", "datetime_end")
     if col_dt_start not in df.columns or col_dt_end not in df.columns:
         raise ValueError(

{avoca-0.14.0 → avoca-0.15.1}/avoca/bindings/qa_tool.py RENAMED Viewed

@@ -37,10 +37,11 @@ def export_EmpaQATool(
     station: str = "XXX",
     revision_date: datetime | None = None,
     dataset: datetime | str | None = None,
-    export_names: dict[str, str] = {},
+    export_names: dict[str, str] | None = None,
     datetime_offsets: tuple[timedelta, timedelta] | None = None,
     substances: list[str] = [],
     rounding_decimals: int = 4,
+    df_substances: pd.DataFrame | None = None,
 ) -> Path:
     """Export to the EmpaQATool format.
@@ -64,7 +65,17 @@ def export_EmpaQATool(
     :arg datetime_offsets: Tuple of two timedelta to use for the start and end datetime
     :arg substances: List of substances to export. You can also specify group names.
         If not specified, this will use the substances from `df_substances`.
+        If a substance is present here and not in `df_substances`, it will still be exported.
     :arg rounding_decimals: Number of decimals to round the values to.
+    :arg df_substances: DataFrame with substance information.
+        If provided, the substances to export will be taken from this dataframe.
+        Columns:
+        - index: substance name
+        - export: bool, whether to export the substance
+        - export_name: str, name to use in the export file
+    :returns: Path to the exported file.
     """
@@ -113,12 +124,42 @@ def export_EmpaQATool(
     logger.debug(f"df_out: {df_out.head()}")
     if not substances:
         substances = compounds_from_df(df)
+        if df_substances is not None and "export" in df_substances.columns:
+            # Remove the substances that should not be exported
+            substances = [
+                s
+                for s in substances
+                if s not in df_substances.index or df_substances.loc[s, "export"]
+            ]
     remove_infs = lambda x: x.replace([np.inf, -np.inf], np.nan)
     is_invalid = lambda x: x.isin([np.inf, -np.inf]) | pd.isna(x)
     clean_col = lambda x: remove_infs(x).round(rounding_decimals).astype(str)
+    if export_names is None:
+        export_names = {}
+    if df_substances is not None and "export_name" in df_substances.columns:
+        # Read export names from the dataframe if provided
+        for substance in substances:
+            if not substance or substance not in df_substances.index:
+                continue
+            export_name_df = df_substances.loc[substance, "export_name"]
+            if not export_name_df or pd.isna(export_name_df):
+                continue
+            if substance in export_names and export_names[substance] != export_name_df:
+                logger.warning(
+                    f"Substance {substance} found in both df_substances and"
+                    " export_names. Using the name from export_names.\n"
+                    f"  - export_names (used): {export_names[substance]}\n"
+                    f"  - df_substances: {export_name_df}"
+                )
+                continue
+            export_names[substance] = export_name_df
     for substance in substances:
+        if not substance:
+            continue
         export_name = export_names.get(substance, substance)
@@ -234,12 +275,12 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
         dt += shift
     columns[("-", "datetime")] = dt
-     # Last column is empty
-    compounds = [ '-'.join(s[:-1]) for col in df.columns if len(s:=col.split("-")) >= 2]
-    for compound in compounds:
+    # Last column is empty
+    compounds = [
+        "-".join(s[:-1]) for col in df.columns if len(s := col.split("-")) >= 2
+    ]
+    for compound in compounds:
         flag_col = f"{compound}-flag"
         value_col = f"{compound}-value"
@@ -248,8 +289,8 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
         mapping = {
             "conc": value_col,
-            "u_expanded":acc_col,
-            "u_precision":precision_col,
+            "u_expanded": acc_col,
+            "u_precision": precision_col,
         }
         flag_values = (pd.to_numeric(df[flag_col]) * 1e3).astype(int).mod(1000)
@@ -263,10 +304,10 @@ def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataF
             serie = pd.to_numeric(df[value])
             mask_nan = flags == QA_Flag.MISSING.value
             serie[mask_nan] = np.nan
-            columns[(compound, key)] = serie
+            columns[(compound, key)] = serie
         columns[(compound, "flag")] = flags
         mask_nan = columns[(compound, "conc")].isna()
         columns[(compound, "flag")][mask_nan] |= QA_Flag.MISSING.value

{avoca-0.14.0 → avoca-0.15.1}/avoca/manager.py RENAMED Viewed

@@ -20,6 +20,7 @@ class AssignerManager:
     _assigners_importpath = {
         "RetentionTimeChecker": "avoca.qa_class.rt",
         "ExtremeValues": "avoca.qa_class.zscore",
+        "RollingWindow": "avoca.qa_class.rolling",
         "ExtremeConcentrations": "avoca.qa_class.concs",
         "XY_Correlations": "avoca.qa_class.zscore",
         "TestAssigner": "avoca.qa_class.test",

{avoca-0.14.0 → avoca-0.15.1}/avoca/plots.py RENAMED Viewed

@@ -77,28 +77,17 @@ def plot_yearly_plotly(
     import plotly.graph_objects as go
     dt_column = ("-", "datetime")
-    serie = df[(compound, "conc")]
-    dt = df[dt_column]
     if ("-", "type") in df.columns:
         mask_air = df[("-", "type")] == "air"
-        serie = serie[mask_air]
-        dt = dt[mask_air]
-    if ("-", "type") in df_new.columns:
+        df = df[mask_air]
+    if df_new is not None and ("-", "type") in df_new.columns:
         mask_air_new = df_new[("-", "type")] == "air"
         df_new = df_new[mask_air_new]
+    dt = df[dt_column]
     x = dt.dt.day_of_year + dt.dt.hour / 24.0
-    df_to_plot = pd.DataFrame(
-        {
-            "conc": serie.values,
-            "year": dt.dt.year.values,
-        },
-        index=x.values,
-    )
-    # Break down by year, to have year as columns and conc as values
-    df_to_plot = df_to_plot.pivot_table(
-        index=df_to_plot.index, columns="year", values="conc"
-    )
     fig = go.Figure()
     hover_template = "Timestamp: %{text}<br>Conc: %{y:.2f} ppt"
@@ -110,29 +99,44 @@ def plot_yearly_plotly(
         "hovertemplate": hover_template,
     }
-    for year in df_to_plot.columns:
+    if (compound, "conc") in df:
+        serie = df[(compound, "conc")]
+        df_to_plot = pd.DataFrame(
+            {
+                "conc": serie.values,
+                "year": dt.dt.year.values,
+            },
+            index=x.values,
+        )
+        # Break down by year, to have year as columns and conc as values
+        df_to_plot = df_to_plot.pivot_table(
+            index=df_to_plot.index, columns="year", values="conc"
+        )
+        for year in df_to_plot.columns:
+            fig.add_trace(
+                go.Scatter(
+                    x=df_to_plot.index,
+                    y=df_to_plot[year],
+                    name=str(year),
+                    zorder=-year,
+                    text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
+                    **kwargs,
+                )
+            )
+    x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
+    if df_new is not None and (compound, "conc") in df_new:
+        dt_new = df_new[dt_column]
         fig.add_trace(
             go.Scatter(
-                x=df_to_plot.index,
-                y=df_to_plot[year],
-                name=str(year),
-                zorder=-year,
-                text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
+                x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
+                y=df_new[(compound, "conc")],
+                name="New Data",
+                text=dt_new.dt.strftime("%y%m%d.%H%M"),
                 **kwargs,
             )
         )
-    x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
-    dt_new = df_new[dt_column]
-    fig.add_trace(
-        go.Scatter(
-            x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
-            y=df_new[(compound, "conc")],
-            name="New Data",
-            text=dt_new.dt.strftime("%y%m%d.%H%M"),
-            **kwargs,
-        )
-    )
     fig.update_layout(
         xaxis_title="Time of Year",
         yaxis_title=f"{compound} (ppt)",

{avoca-0.14.0 → avoca-0.15.1}/avoca/qa_class/rolling.py RENAMED Viewed

@@ -28,6 +28,9 @@ class RollingWindow(ExtremeValues):
     :param only_greater: If True, only values greater than the threshold will be flagged.
         The values lower than the negative threshold will not be flagged.
         By default, this is True if use_log_normal is True, and False otherwise.
+    :param rolling_window: The size of the rolling window as a `timedelta` object.
+        See `window` parameters in pandas documentation for more details.
+        https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html#pandas-dataframe-rolling
     """
     require_datetime_index = True

{avoca-0.14.0 → avoca-0.15.1}/avoca/utils/__init__.py RENAMED Viewed

@@ -13,7 +13,7 @@ def compounds_from_df(df: pd.DataFrame) -> list[str]:
     Returns:
         The compounds in the dataframe.
     """
-    return [c for c in df.columns.get_level_values(0).unique() if c != "-"]
+    return [c for c in df.columns.get_level_values(0).unique() if c not in ["-", ""]]
 def runtypes_from_df(df: pd.DataFrame) -> list[str]:

{avoca-0.14.0 → avoca-0.15.1}/avoca/utils/flags_doc.py RENAMED Viewed

@@ -56,6 +56,9 @@ def parse_enum_comments(filepath: Path, enum_class_name: str) -> dict[Enum, str]
         exec(code, module)
         enum_cls = module[enum_class_name]
         for name, comment in comment_dict.items():
+            if not hasattr(enum_cls, name):
+                # Probably somehwere else in the file
+                continue
             enum_member = getattr(enum_cls, name)
             enum_obj[enum_member] = comment

{avoca-0.14.0 → avoca-0.15.1}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "avoca"
-version = "0.14.0"
+version = "0.15.1"
 authors = [
   { name="Lionel Constantin", email="lionel.constantin@empa.ch" },
 ]

avoca-0.15.1/tests/bindings/test_qatool.py ADDED Viewed

@@ -0,0 +1,216 @@
+from datetime import timedelta
+from pathlib import Path
+import pandas as pd
+import pytest
+from avoca.bindings.qa_tool import export_EmpaQATool
+from avoca.testing import testdata_dir
+from avoca.testing.df import invalids_df, simple_df
+export_path = testdata_dir / "export_empa_qa_tool"
+@pytest.mark.parametrize(
+    "df, name",
+    [
+        (simple_df, "simple"),
+        (invalids_df, "invalids"),
+    ],
+)
+def test_export_EmpaQATool(df, name):
+    """Test the export_EmpaQATool function."""
+    # Create a test dataframe
+    df = df.copy()
+    df[("compA", "flag")] = 0
+    df[("compB", "flag")] = 0
+    df[("-", "datetime")] = pd.date_range(start="2025-01-01", periods=len(df), freq="h")
+    # Export the dataframe to a file
+    export_file = export_EmpaQATool(
+        df,
+        export_path,
+        datetime_offsets=(timedelta(minutes=-5), timedelta(minutes=0)),
+        station=name,
+    )
+    # Check that the file is created
+    assert Path(export_file).is_file()
+    # Read the file and check that the data is correct
+    df_exported = pd.read_csv(
+        export_file,
+        sep=";",
+    )
+    assert len(df_exported) == len(df)
+    # Check that the 'compB-Value' column is of float dtype
+    assert pd.api.types.is_float_dtype(df_exported["compB-Value"])
+    assert not pd.isna(df_exported["compB-Value"]).any(), "NAN values must be 999..."
+def _prepare_df_for_export(df: pd.DataFrame) -> pd.DataFrame:
+    """Prepare a dataframe for export testing."""
+    df = df.copy()
+    df[("compA", "flag")] = 0
+    df[("compB", "flag")] = 0
+    df[("-", "datetime")] = pd.date_range(start="2025-01-01", periods=len(df), freq="h")
+    df[("-", "datetime_start")] = df[("-", "datetime")] - timedelta(minutes=5)
+    df[("-", "datetime_end")] = df[("-", "datetime")] + timedelta(minutes=0)
+    return df
+def test_export_names_dict():
+    """test that export names from dict are used correctly."""
+    out_file = export_EmpaQATool(
+        _prepare_df_for_export(simple_df),
+        export_path,
+        export_names={"compA": "CustomCompA", "compB": "CustomCompB"},
+        station="TEST_DICT",
+    )
+    df_exported = pd.read_csv(
+        out_file,
+        sep=";",
+    )
+    assert "CustomCompA-Value" in df_exported.columns
+    assert "CustomCompB-Value" in df_exported.columns
+    assert "compA-Value" not in df_exported.columns
+    assert "compB-Value" not in df_exported.columns
+def test_export_names_df():
+    """test that export names from dict are used correctly."""
+    out_file = export_EmpaQATool(
+        _prepare_df_for_export(simple_df),
+        export_path,
+        station="TEST_NAMES_DF",
+        df_substances=pd.DataFrame(
+            {
+                "substance": ["compA", "compB"],
+                "export_name": ["CustomCompA", "CustomCompB"],
+            }
+        ).set_index("substance"),
+    )
+    df_exported = pd.read_csv(
+        out_file,
+        sep=";",
+    )
+    assert "CustomCompA-Value" in df_exported.columns
+    assert "CustomCompB-Value" in df_exported.columns
+    assert "compA-Value" not in df_exported.columns
+    assert "compB-Value" not in df_exported.columns
+def test_both_export_names_warns(caplog):
+    """test that export names from dict are used correctly."""
+    with caplog.at_level("WARNING"):
+        out_file = export_EmpaQATool(
+            _prepare_df_for_export(simple_df),
+            export_path,
+            station="TEST_BOTH_WARN",
+            export_names={"compA": "CustomCompA", "compB": "CustomCompB"},
+            df_substances=pd.DataFrame(
+                {
+                    "substance": ["compA", "compB"],
+                    "export_name": ["WrongCompA", "CustomCompB"],
+                }
+            ).set_index("substance"),
+        )
+    assert (
+        "Substance compA found in both df_substances and export_names." in caplog.text
+    )
+    df_exported = pd.read_csv(
+        out_file,
+        sep=";",
+    )
+    assert "CustomCompA-Value" in df_exported.columns
+    assert "CustomCompB-Value" in df_exported.columns
+    assert "compA-Value" not in df_exported.columns
+    assert "WrongCompA-Value" not in df_exported.columns
+    assert "compB-Value" not in df_exported.columns
+def test_export_no_export_substances():
+    """test that substances with export=False in df_substances are not exported."""
+    out_file = export_EmpaQATool(
+        _prepare_df_for_export(simple_df),
+        export_path,
+        station="TEST_NO_EXPORT_SUBSTANCES",
+        df_substances=pd.DataFrame(
+            {
+                "substance": ["compA", "compB"],
+                "export": [True, False],
+            }
+        ).set_index("substance"),
+    )
+    df_exported = pd.read_csv(
+        out_file,
+        sep=";",
+    )
+    assert "compA-Value" in df_exported.columns
+    assert "compB-Value" not in df_exported.columns
+def test_export_if_not_in_df_substances():
+    """test that substances not in df_substances are exported."""
+    out_file = export_EmpaQATool(
+        _prepare_df_for_export(simple_df),
+        export_path,
+        station="TEST_IF_NOT_IN_DF_SUBSTANCES",
+        df_substances=pd.DataFrame(
+            {
+                "substance": ["compA"],
+                "export": [True],
+            }
+        ).set_index("substance"),
+    )
+    df_exported = pd.read_csv(
+        out_file,
+        sep=";",
+    )
+    assert "compA-Value" in df_exported.columns
+    assert "compB-Value" in df_exported.columns
+def test_export_and_rename_in_df_substances():
+    """test that export names from dict are used correctly."""
+    out_file = export_EmpaQATool(
+        _prepare_df_for_export(simple_df),
+        export_path,
+        station="TEST_EXPORT_AND_RENAME_IN_DF_SUBSTANCES",
+        df_substances=pd.DataFrame(
+            {
+                "substance": ["compA", "compB"],
+                "export_name": ["CustomCompA", "CustomCompB"],
+                "export": [True, False],
+            }
+        ).set_index("substance"),
+    )
+    df_exported = pd.read_csv(
+        out_file,
+        sep=";",
+    )
+    assert "CustomCompA-Value" in df_exported.columns
+    assert "compA-Value" not in df_exported.columns
+    assert "compB-Value" not in df_exported.columns
+    assert "CustomCompB-Value" not in df_exported.columns

{avoca-0.14.0 → avoca-0.15.1}/tests/test_assigners.py RENAMED Viewed

@@ -3,6 +3,7 @@
 import pandas as pd
 import pytest
+from avoca.manager import AssignerManager
 from avoca.qa_class.abstract import AbstractQA_Assigner
 from avoca.qa_class.invalid import InvalidValues
 from avoca.qa_class.zscore import ExtremeValues, XY_Correlations
@@ -44,6 +45,12 @@ def assigner(
     return assigner_type(variable="test_var", compounds=["compA", "compB"], **kwargs)
+def test_is_in_documentation(assigner: AbstractQA_Assigner):
+    """Test the assigner will appear in the documentation."""
+    assert type(assigner).__name__ in AssignerManager._assigners_importpath
 def test_simple(assigner: AbstractQA_Assigner):
     df_one_extreme = df_test.df_one_extreme

avoca-0.14.0/tests/bindings/test_qatool.py DELETED Viewed

@@ -1,49 +0,0 @@
-from datetime import timedelta
-from pathlib import Path
-import pandas as pd
-import pytest
-from avoca.bindings.qa_tool import export_EmpaQATool
-from avoca.testing import testdata_dir
-from avoca.testing.df import invalids_df, simple_df
-@pytest.mark.parametrize(
-    "df, name",
-    [
-        (simple_df, "simple"),
-        (invalids_df, "invalids"),
-    ],
-)
-def test_export_EmpaQATool(df, name):
-    """Test the export_EmpaQATool function."""
-    # Create a test dataframe
-    df = df.copy()
-    df[("compA", "flag")] = 0
-    df[("compB", "flag")] = 0
-    df[("-", "datetime")] = pd.date_range(start="2025-01-01", periods=len(df), freq="h")
-    # Export the dataframe to a file
-    export_path = testdata_dir / "export_empa_qa_tool"
-    export_file = export_EmpaQATool(
-        df,
-        export_path,
-        datetime_offsets=(timedelta(minutes=-5), timedelta(minutes=0)),
-        station=name,
-    )
-    # Check that the file is created
-    assert Path(export_file).is_file()
-    # Read the file and check that the data is correct
-    df_exported = pd.read_csv(
-        export_file,
-        sep=";",
-    )
-    assert len(df_exported) == len(df)
-    # Check that the 'compB-Value' column is of float dtype
-    assert pd.api.types.is_float_dtype(df_exported["compB-Value"])
-    assert not pd.isna(df_exported["compB-Value"]).any(), "NAN values must be 999..."