PyPI - avoca - Versions diffs - 0.12.0__py3-none-any.whl → 0.14.0__py3-none-any.whl - Mend

avoca 0.12.0py3-none-any.whl → 0.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

avoca/bindings/ebas.py +16 -3
avoca/bindings/ebas_flags.py +1 -1
avoca/plots.py +26 -2
avoca/qa_class/abstract.py +9 -0
avoca/qa_class/rolling.py +133 -0
avoca/testing/df.py +1 -0
avoca/testing/utils.py +9 -0
{avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/METADATA +1 -1
{avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/RECORD +11 -9
{avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/WHEEL +1 -1
{avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/licenses/LICENCE.txt +0 -0

avoca/bindings/ebas.py CHANGED Viewed

@@ -171,7 +171,6 @@ def set_dataframe(
             )
         )
     this_nan_flags = nan_flags.copy()
     if data_level in concs_data_levels and invalidate_conc_calib:
@@ -188,7 +187,9 @@ def set_dataframe(
             )
             for flag in flag_col
         ]
-        nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in this_nan_flags])
+        nan_flag = np.logical_or.reduce(
+            [flag_col & flag.value for flag in this_nan_flags]
+        )
         for var in vars_to_export[data_level]:
             ebas_name = compounds[sub]
@@ -199,6 +200,16 @@ def set_dataframe(
                 for val, isnan in zip(serie_to_export, nan_flag)
             ]
+            if var == "conc_calib":
+                # Invalidate calibration concentration for non-calibration samples
+                this_flags = [
+                    flags_ebas
+                    + ([] if (QA_Flag.CALIBRATION.value & flag_avoca) else [980])
+                    for flags_ebas, flag_avoca in zip(flags, flag_col)
+                ]
+            else:
+                this_flags = flags
             metadata = DataObject()
             metadata.comp_name = (
                 f"{ebas_name}_{ebas_compname_of_var[var]}"
@@ -214,7 +225,9 @@ def set_dataframe(
             metadata.matrix = "air"
             # add the variable
             nas.variables.append(
-                DataObject(values_=values, flags=flags, flagcol=True, metadata=metadata)
+                DataObject(
+                    values_=values, flags=this_flags, flagcol=True, metadata=metadata
+                )
             )
             if var == "conc_calib":

avoca/bindings/ebas_flags.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# https://projects.nilu.no/ccc/flags/flags.html for more info on what ebas uses
+# https://ebas-submit.nilu.no/templates/comments/fl_flag for more info on what ebas uses
 from avoca.flags import QA_Flag, nan_flags
 flags_to_ebas: dict[QA_Flag, int] = {

avoca/plots.py CHANGED Viewed

@@ -69,6 +69,8 @@ def plot_yearly_plotly(
     df: pd.DataFrame,
     compound: str,
     df_new: pd.DataFrame | None = None,
+    opacity: float = 0.5,
+    size: int = 6,
 ) -> "plotly.graph_objs._figure.Figure":
     """Plot yearly data using plotly."""
     import plotly.express as px
@@ -97,7 +99,28 @@ def plot_yearly_plotly(
     df_to_plot = df_to_plot.pivot_table(
         index=df_to_plot.index, columns="year", values="conc"
     )
-    fig = px.scatter(df_to_plot)
+    fig = go.Figure()
+    hover_template = "Timestamp: %{text}<br>Conc: %{y:.2f} ppt"
+    kwargs = {
+        "mode": "markers",
+        "opacity": opacity,
+        "marker": dict(size=size),
+        "hovertemplate": hover_template,
+    }
+    for year in df_to_plot.columns:
+        fig.add_trace(
+            go.Scatter(
+                x=df_to_plot.index,
+                y=df_to_plot[year],
+                name=str(year),
+                zorder=-year,
+                text=dt[dt.dt.year == year].dt.strftime("%y%m%d.%H%M"),
+                **kwargs,
+            )
+        )
     x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
     dt_new = df_new[dt_column]
@@ -105,8 +128,9 @@ def plot_yearly_plotly(
         go.Scatter(
             x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
             y=df_new[(compound, "conc")],
-            mode="markers",
             name="New Data",
+            text=dt_new.dt.strftime("%y%m%d.%H%M"),
+            **kwargs,
         )
     )
     fig.update_layout(

avoca/qa_class/abstract.py CHANGED Viewed

@@ -49,6 +49,7 @@ class AbstractQA_Assigner(ABC):
     flag: QA_Flag
     runtypes: list[str] | None
     required_packages: list[PythonPackageRequirement] | None = None
+    require_datetime_index: bool = False
     # Options that can be set by the user
     name: str
@@ -142,6 +143,14 @@ class AbstractQA_Assigner(ABC):
                 f"Please check the data and the settings for {self.name}"
             )
+        if self.require_datetime_index:
+            if not isinstance(df.index, pd.DatetimeIndex):
+                raise ValueError(
+                    f"Assigner {self} requires a DatetimeIndex but the dataframe"
+                    " does not have one. \n "
+                    f"Please check the data and the settings for {self.name}"
+                )
     @abstractmethod
     def fit(self, df: pd.DataFrame):
         """Fit the QA assigner on some data.

avoca/qa_class/rolling.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""Quality assurance based on statistical methods."""
+from __future__ import annotations
+from datetime import timedelta
+from typing import TYPE_CHECKING
+import numpy as np
+import pandas as pd
+from avoca.qa_class.zscore import ExtremeValues
+if TYPE_CHECKING:
+    from avoca.utils.torch_models import MultipleRegressionModel
+class RollingWindow(ExtremeValues):
+    """Detect in rolling windows.
+    The method is based on outliers in a rolling window using the median and standard deviation.
+    The training is done directly on the fitted data.
+    :param variable: The variable to check for extreme values.
+    :param threshold: The threshold for the z-score. To flag values.
+    :param use_log_normal: If True, the log of the values will be used to calculate the z-score.
+        This can be useful if the values are log-normal distributed.
+    :param only_greater: If True, only values greater than the threshold will be flagged.
+        The values lower than the negative threshold will not be flagged.
+        By default, this is True if use_log_normal is True, and False otherwise.
+    """
+    require_datetime_index = True
+    rolling_window: timedelta
+    def __init__(
+        self,
+        *args,
+        rolling_window: timedelta = timedelta(days=7),
+        threshold: float = 1.5,
+        **kwargs,
+    ):
+        super().__init__(*args, threshold=threshold, **kwargs)
+        self.rolling_window = rolling_window
+    def fit(self, df: pd.DataFrame):
+        self.check_columns_or_raise(df, columns=self._stats_columns)
+        self.df_train = df[self._stats_columns]
+    def assign(self, df: pd.DataFrame) -> dict[str, pd.Index]:
+        df = df[self._stats_columns]
+        df = self._clean_data(df)
+        if self.use_log_normal:
+            # Replace <=0 with NaN
+            df = df.where(df > 0, np.nan)
+            df = df.map(lambda x: np.log(x))
+        rolling = df.rolling(window=self.rolling_window)
+        means = rolling.median()
+        stds = rolling.std()
+        self.rolling_median = means
+        self.rolling_std = stds
+        thresholds = means + stds * self.threshold
+        df_fail = df > thresholds
+        if not self.only_greater:
+            df_fail = df_fail | (df < (means - stds * self.threshold))
+        out_dict = {}
+        for compound in self.compounds:
+            col = (compound, self.variable)
+            this_c_fail = df_fail[col]
+            out_dict[compound] = this_c_fail.loc[this_c_fail].index
+        return out_dict
+    def plot(self):
+        import matplotlib.pyplot as plt
+        fig, axes = plt.subplots(
+            len(self.compounds), 1, figsize=(6, 3 * len(self.compounds)), sharex=True
+        )
+        x = self.dt if hasattr(self, "dt") else self.df_train.index
+        x = pd.Series(x, index=self.df_train.index)
+        outliers = self.assign(self.df_train)
+        for i, compound in enumerate(self.compounds):
+            ax = axes[i]
+            col = (compound, self.variable)
+            ax.scatter(
+                x,
+                self.df_train[col],
+                s=1,
+                label="darkblue",
+            )
+            median = self.rolling_median[col]
+            std = self.rolling_std[col]
+            top, bottom = median + std * self.threshold, median - std * self.threshold
+            ax.fill_between(
+                x,
+                top,
+                bottom,
+                color="lightgray",
+                label="Rolling threshold",
+                alpha=0.5,
+            )
+            outlier_indices = outliers[compound]
+            ax.scatter(
+                x.loc[outlier_indices],
+                self.df_train.loc[outlier_indices, col],
+                s=10,
+                marker="x",
+                color="red",
+                label="Extreme values",
+            )
+            ax.set_title(
+                f"{compound} +- {self.threshold} std",
+                # Under teh top line
+                y=0.8,
+            )
+            ax.tick_params(axis="x", rotation=25)
+        return fig, axes

avoca/testing/df.py CHANGED Viewed

@@ -10,6 +10,7 @@ import numpy as np
 import pandas as pd
 empty_index = pd.Index([], dtype="int64")
+empty_index_dt = pd.DatetimeIndex([])
 simple_df = pd.DataFrame(
     np.ones((2, 4)),

avoca/testing/utils.py ADDED Viewed

@@ -0,0 +1,9 @@
+import pandas as pd
+def make_dt_index(df: pd.DataFrame | pd.Index) -> pd.DataFrame | pd.Index:
+    """Create a datetime index for the dataframe."""
+    index = pd.date_range(start="2023-01-01", periods=len(df), freq="h")
+    if isinstance(df, pd.Index):
+        return index
+    return df.set_index(index)

{avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: avoca
-Version: 0.12.0
+Version: 0.14.0
 Summary: @voc@: Quality assessement of measurement data
 Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
 Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues

{avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/RECORD RENAMED Viewed

@@ -5,12 +5,12 @@ avoca/flags.py,sha256=wobuZoIJh6dFsdiqqYJLZ_AHe4pcFE9tjuoimNXLjIQ,1428
 avoca/io.py,sha256=67D5x1qkLqWC7wWehyOfX96L4H3-tn9x2V4jMCoIRqA,729
 avoca/logging.py,sha256=BrxgZQRfnkPSoQ0ZXhOzzhIsmbyjKvaJNG55MdM9jmA,86
 avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
-avoca/plots.py,sha256=uEo0rTCwQ0iygTaycYPlbtcqNbJpDQd7xjvis686lD4,3567
+avoca/plots.py,sha256=UjfUgbfxd2veMOGHtSvJycru-w3gWsGjOVO__I-zqzQ,4205
 avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
 avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
 avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-avoca/bindings/ebas.py,sha256=48cYR-jwc3GMZCVhYYbMVUj1RgFAoQNtQC2kOpA1iAA,18827
-avoca/bindings/ebas_flags.py,sha256=nts47BB74vDlAbecUatXtjeyL3SALLCt3fRl6BfdOS4,2388
+avoca/bindings/ebas.py,sha256=vil4u4G6jGJrE12Z7nBvGpJuTAT9QyvbNNyWsWr5UaM,19306
+avoca/bindings/ebas_flags.py,sha256=N-JpmA6WCFjcYhvt7XjyOZMbR7vCdyPV6uHBlF45UJU,2397
 avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
 avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
 avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
@@ -18,19 +18,21 @@ avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
 avoca/bindings/qa_tool.py,sha256=ninHe3mrJ8GULxRCkRTZixw-vmNhqu4zwwONd5aXd1Q,9735
 avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
 avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-avoca/qa_class/abstract.py,sha256=KCK9OhKNWlMje-5D0hgMIf-g64D_kRwRsoCZ_R4VuqI,5612
+avoca/qa_class/abstract.py,sha256=CLt-6WFhZhrvKTLVHpdbJYMFM50VPOGiO-GG6IRPWzA,6011
 avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
 avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
 avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
+avoca/qa_class/rolling.py,sha256=CQ2E0qJ7FxDT4TucItkJRmkqhzMoNSnwtVQQ_HzX9Jk,4059
 avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
 avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
 avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
 avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
-avoca/testing/df.py,sha256=Nc0GUYTApZgYyUTMnHMTbSKLiA5ty9Bg7gUGtnoFYMI,1826
+avoca/testing/df.py,sha256=UQm6TdTDVRWvRNM5WnSWh6vdvDR1lqLNg0ti-B1L760,1865
+avoca/testing/utils.py,sha256=jVV0mIwLIpr0UBLMk8RjZH5J_dV_b6Gugxzo_WRgWU0,308
 avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
 avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
 avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
-avoca-0.12.0.dist-info/METADATA,sha256=4xD5mqScWJDeAnbM3vFfKLxaszOLz0GlDPvjE1Ej_vw,1570
-avoca-0.12.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-avoca-0.12.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
-avoca-0.12.0.dist-info/RECORD,,
+avoca-0.14.0.dist-info/METADATA,sha256=CayW94kozHUxF8sbKxE0pnWZnS0W5cjkEUKU7_QfgEc,1570
+avoca-0.14.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+avoca-0.14.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
+avoca-0.14.0.dist-info/RECORD,,

{avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.27.0
+Generator: hatchling 1.28.0
 Root-Is-Purelib: true
 Tag: py3-none-any

{avoca-0.12.0.dist-info → avoca-0.14.0.dist-info}/licenses/LICENCE.txt RENAMED Viewed

File without changes

avoca 0.12.0__py3-none-any.whl → 0.14.0__py3-none-any.whl

avoca 0.12.0py3-none-any.whl → 0.14.0py3-none-any.whl