PyPI - avoca - Versions diffs - 0.11.3__py3-none-any.whl → 0.11.4__py3-none-any.whl - Mend

avoca 0.11.3py3-none-any.whl → 0.11.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

avoca/bindings/ebas.py +57 -8
avoca/qa_class/abstract.py +8 -3
avoca/qa_class/rt.py +52 -17
avoca/qa_class/zscore.py +1 -1
{avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/METADATA +1 -1
{avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/RECORD +8 -8
{avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/WHEEL +0 -0
{avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/licenses/LICENCE.txt +0 -0

avoca/bindings/ebas.py CHANGED Viewed

@@ -31,6 +31,18 @@ ebas_compname_of_var = {
 ebas_compname_to_var = {v: k for k, v in ebas_compname_of_var.items()}
+# Additional variables that can be in the dataset (not compound dependant)
+additional_vars = [
+    "temperature",
+    "pressure",
+]
+titles = {
+    "temperature": "T_inlet",
+    "pressure": "P_inlet",
+}
 class DataLevel(IntEnum):
     """Values for different type of data used by ebas."""
@@ -57,6 +69,7 @@ def set_dataframe(
     start_offset: timedelta | None = None,
     end_offset: timedelta | None = None,
     flag_all: list[int] = [],
+    invalidate_conc_calib: bool = True,
 ):
     """Put the data from the export dataframe into the nas object.
@@ -68,7 +81,10 @@ def set_dataframe(
     :arg data_level: The level of the data to export.
     :arg start_offset: The offset to add to the start time
     :arg end_offset: The offset to add to the end time
+    :arg flag_all: List of flags to add to all the data
+    :arg invalidate_conc_calib: If True, the concentration calibration
+        will be invalidated (flag 980) for all calib samples.
+    :returns: A dictionary with the metadata of the compounds exported.
     """
     if ("-", "start_datetime") not in df_export.columns:
@@ -104,6 +120,8 @@ def set_dataframe(
         "rt": "s",
         "w": "s",
         "area": "area_unit",
+        "temperature": "K",
+        "pressure": "hPa",
     }
     ebas_varname_of_var = {
@@ -119,6 +137,7 @@ def set_dataframe(
     # Export calibration status if given by the user
     status_col = ("-", "status")
+    empty_flags = [[]] * len(df_export)
     if (status_col in df_export.columns) and (data_level not in concs_data_levels):
         metadata = DataObject()
         metadata.comp_name = "status"
@@ -126,13 +145,38 @@ def set_dataframe(
         metadata.matrix = "instrument"
         metadata.unit = "no unit"
         values = [val for val in df_export[status_col]]
-        flags = [[] for _ in df_export[status_col]]
         nas.variables.append(
-            DataObject(values_=values, flags=flags, flagcol=True, metadata=metadata)
+            DataObject(
+                values_=values, flags=empty_flags, flagcol=True, metadata=metadata
+            )
         )
-    if data_level in concs_data_levels:
+    for var in additional_vars:
+        var_col = ("-", var)
+        if var_col not in df_export.columns:
+            continue
+        metadata = DataObject()
+        metadata.comp_name = var
+        metadata.title = titles.get(var, var)
+        metadata.matrix = "instrument"
+        metadata.unit = unit_of_var[var]
+        metadata.cal_scale = ""
+        values = [val for val in df_export[var_col]]
+        nas.variables.append(
+            DataObject(
+                values_=values,
+                flags=empty_flags,
+                flagcol=True,
+                metadata=metadata,
+            )
+        )
+    this_nan_flags = nan_flags.copy()
+    if data_level in concs_data_levels and invalidate_conc_calib:
         # Set the flag to the invalid value instead of the valid calibration
+        this_nan_flags.append(QA_Flag.CALIBRATION)
         dict_flags_to_ebas[QA_Flag.CALIBRATION] = 980
     for sub in compounds:
@@ -144,10 +188,7 @@ def set_dataframe(
             )
             for flag in flag_col
         ]
-        nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in nan_flags])
-        if data_level == DataLevel.CONCS:
-            # Invalidate also the calibration runs
-            nan_flag |= flag_col & QA_Flag.CALIBRATION.value
+        nan_flag = np.logical_or.reduce([flag_col & flag.value for flag in this_nan_flags])
         for var in vars_to_export[data_level]:
             ebas_name = compounds[sub]
@@ -273,6 +314,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
             clean_for_df[("-", "status")] = calib_ids.astype(int)
             continue
+        if comp_name in additional_vars:
+            clean_for_df[("-", comp_name)] = np.array(values, dtype=float)
+            continue
         # Split the title on the _
         comp_name = comp_name.split("_")
         if len(comp_name) == 1:
@@ -288,6 +333,10 @@ def nas_to_avoca(nas: EbasNasaAmes) -> pd.DataFrame:
         elif len(comp_name) == 3:
             compund, var_first, var_second = comp_name
             variable = f"{var_first}_{var_second}"
+        elif len(comp_name) == 4 and comp_name[-1] == "compounds":
+            # Concentration of merged compounds
+            compund = "_".join(comp_name)
+            variable = "C"
         else:
             logger.warning(f"passing {comp_name}, could not be understood. Skipping.")
             continue

avoca/qa_class/abstract.py CHANGED Viewed

@@ -4,13 +4,17 @@ from __future__ import annotations
 import logging
 from abc import ABC, abstractmethod
-from typing import Sequence
+from typing import TYPE_CHECKING, Sequence
 import pandas as pd
 from avoca.flags import QA_Flag
 from avoca.requirements import PythonPackageRequirement
+if TYPE_CHECKING:
+    from matplotlib.axes import Axes
+    from matplotlib.figure import Figure
 class AbstractQA_Assigner(ABC):
     """Abstract class for QA assigners.
@@ -74,10 +78,11 @@ class AbstractQA_Assigner(ABC):
         stopp: pd.Timestamp = pd.Timestamp.max,
         name: str | None = None,
         runtypes: list[str] = None,
+        log_level: int = logging.INFO,
     ):
         """Create a new QA assigner."""
         self.logger = logging.getLogger(type(self).__name__)
-        self.logger.setLevel(logging.DEBUG)
+        self.logger.setLevel(log_level)
         self.name = name or type(self).__name__
@@ -161,6 +166,6 @@ class AbstractQA_Assigner(ABC):
         raise NotImplementedError
     # Optional method
-    def plot(self):
+    def plot(self) -> tuple[Figure, Sequence[Axes]]:
         """Plot the QA assigner."""
         raise NotImplementedError(f"{type(self).__name__} does not have a plot method.")

avoca/qa_class/rt.py CHANGED Viewed

@@ -17,15 +17,38 @@ class RetentionTimeChecker(AbstractQA_Assigner):
     retention times of the measurements.
     The correlation is usually very high. If one compound has a low correlation
     with the others, it probably means that is was miss-assigned at some points.
+    :param rt_threshold: The threshold for the retention time deviation.
+        Unit is time unit (minutes or seconds, as in the data).
+        This will try to fit a linear regression from the average training
+        retention times to the measured ones for each sample.
+        If after the regression a datapoint is higher than this threshold,
+        it will be removed.
+    :param rt_relative_max_deviation: The maximum relative deviation allowed
+        from the average retention time.
+        This is used to remove outliers that are too far from the average.
+        if 0.5 is given, it means that the retention time can be 50% higher or lower
+        than the average retention time.
     """
     runtypes: list[str] = ["air", "std"]
+    variable: str = "rt"
     flag = QA_Flag.SUSPICIOUS_RT
-    RT_THRESHOLD: float = 2.0
     rt_ref: pd.Series
+    def __init__(
+        self,
+        rt_threshold: float = 2.0,
+        rt_relative_max_deviation: float = 0.2,
+        poly_order: int = 1,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.rt_threshold = rt_threshold
+        self.rt_relative_max_deviation = rt_relative_max_deviation
+        self.poly_order = poly_order
     def fit(self, df: pd.DataFrame):
         cols = [(compound, "rt") for compound in self.compounds]
@@ -49,6 +72,7 @@ class RetentionTimeChecker(AbstractQA_Assigner):
         # Get a dataframe for a mean reference
         self.rt_ref = df_rt.median(axis="index")
+        self.rt_std = df_rt.std(axis="index")
     def assign(self, df: pd.DataFrame) -> dict[str, pd.Index]:
         """Assing flags when expected rt values does not match the measured ones."""
@@ -58,27 +82,29 @@ class RetentionTimeChecker(AbstractQA_Assigner):
         df_rt = df[rt_cols]
         # Take the reference retention times
         x = self.rt_ref.loc[rt_cols].to_numpy()
+        std = self.rt_std.loc[rt_cols].to_numpy()
         outliers = {}
         for t, row in df_rt.iterrows():
             # Make a lin reg line
             y = row.to_numpy()
-            mask_not_nan = ~np.isnan(y)
-            if np.sum(mask_not_nan) < 3:
-                self.logger.warning(
-                    f"{self} skipping {t} because there are not enough compounds"
-                    " measured"
-                )
-                continue
+            # Remove the points that are too far from the reference
+            mask_bad = (
+                (np.abs(y - x) / x) > self.rt_relative_max_deviation
+            ) | np.isnan(y)
-            params = np.polyfit(x[mask_not_nan], y[mask_not_nan], 1)
-            f = np.poly1d(params)
-            y_lin_reg = f(x)
+            if np.sum(~mask_bad) > self.poly_order + 2:
-            # Get the points which are too far from the reg line
-            mask_bad = np.abs(y - y_lin_reg) > self.RT_THRESHOLD
-            if np.any(mask_bad):
+                params = np.polyfit(x[~mask_bad], y[~mask_bad], self.poly_order)
+                f = np.poly1d(params)
+                y_lin_reg = f(x)
+                # Get the points which are too far from the reg line
+                error = y - y_lin_reg
+                mask_bad |= np.abs(error) > self.rt_threshold
+            if any(mask_bad):
                 outliers[t] = mask_bad
         # Create a dataframe with the flags
@@ -95,12 +121,12 @@ class RetentionTimeChecker(AbstractQA_Assigner):
         import matplotlib.pyplot as plt
-        fig, ax = plt.subplots()
+        fig, ax = plt.subplots(figsize=(16, 9))
         assigned = self.assign(self.df_train)
         for compound in self.compounds:
-            ax.scatter(
+            points = ax.scatter(
                 self.df_train.index,
                 self.df_train[(compound, "rt")],
                 label=compound,
@@ -115,6 +141,15 @@ class RetentionTimeChecker(AbstractQA_Assigner):
                     color="red",
                     marker="x",
                 )
+            # Line for the mean retention time
+            ax.axhline(
+                self.rt_ref[(compound, "rt")],
+                color=points.get_facecolor()[0],
+                linestyle="--",
+            )
+        ax.set_ylabel("Retention time")
+        ax.set_xlabel("Sample")
         ax.legend()
         plt.show()

avoca/qa_class/zscore.py CHANGED Viewed

@@ -128,7 +128,7 @@ class ExtremeValues(AbstractQA_Assigner):
         )
         x = self.dt if hasattr(self, "dt") else self.df_train.index
-        x = pd.Series(x, index=x)
+        x = pd.Series(x, index=self.df_train.index)
         outliers = self.assign(self.df_train)

{avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: avoca
-Version: 0.11.3
+Version: 0.11.4
 Summary: @voc@: Quality assessement of measurement data
 Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
 Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues

{avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/RECORD RENAMED Viewed

@@ -8,7 +8,7 @@ avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
 avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
 avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
 avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-avoca/bindings/ebas.py,sha256=s9274kwymZs0EO-2UMEUHV1iLgbWv7YR_r2e-O5m0SI,17286
+avoca/bindings/ebas.py,sha256=48cYR-jwc3GMZCVhYYbMVUj1RgFAoQNtQC2kOpA1iAA,18827
 avoca/bindings/ebas_flags.py,sha256=uzPrd45OoULycCRYWCwHQG1exUDoWSe8JmULOAsEHRs,2537
 avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
 avoca/bindings/gcwerks.py,sha256=a5n9Iot3r_ejnCEdILk4hE2uioONB75Soq5fvSLlDoo,14879
@@ -17,19 +17,19 @@ avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
 avoca/bindings/qa_tool.py,sha256=ZPtQo8dHo6wooIlc9Vzk8y91Qgso-RBtGR_h2TAZQ24,7583
 avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
 avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-avoca/qa_class/abstract.py,sha256=4s8GgkeC3WbMnNxygajhawh7TU9v13i-SP0j5b5YOMc,5432
+avoca/qa_class/abstract.py,sha256=KCK9OhKNWlMje-5D0hgMIf-g64D_kRwRsoCZ_R4VuqI,5612
 avoca/qa_class/concs.py,sha256=TcQic69I1Kr40RJgCILTtyjVLn0K6_q6I5Y1Vi3dKwk,813
 avoca/qa_class/generate_classes_doc.py,sha256=osz01SRZ5SrwJXVlmbcainVwVjmealSSIdbzXzUEGKQ,1915
 avoca/qa_class/invalid.py,sha256=PDZHN0RZ8jND3QY09UcbwJYjjT6VqS4a0klO3QYiFig,2650
-avoca/qa_class/rt.py,sha256=t927H_o0Kn-VwEkG9TW33MSDW_2of2-pBX_gTMuaXA0,3817
+avoca/qa_class/rt.py,sha256=Bgv0DSSR-hIJ9kI6AdUkV6sXVS65gBxbASkk4TUHbnQ,5293
 avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
-avoca/qa_class/zscore.py,sha256=HqOxV45smhXqcv2XrB7W7plE9RoHzBGVEAbmuwsiv7w,16696
+avoca/qa_class/zscore.py,sha256=jDw2UBmf7KBkskGOD5bgFy3RgNYUjc-9tYjSU-3L1ws,16714
 avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
 avoca/testing/df.py,sha256=Nc0GUYTApZgYyUTMnHMTbSKLiA5ty9Bg7gUGtnoFYMI,1826
 avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
 avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
 avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
-avoca-0.11.3.dist-info/METADATA,sha256=NsJF7yn7mtQVgkOnDjo-ARqRa3hzvauGhp9BJ-kmTAk,1570
-avoca-0.11.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-avoca-0.11.3.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
-avoca-0.11.3.dist-info/RECORD,,
+avoca-0.11.4.dist-info/METADATA,sha256=tx6uIcmzGJU-Gf1RKfxW1crR6hhYc5AwDBkVks4iuHU,1570
+avoca-0.11.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+avoca-0.11.4.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
+avoca-0.11.4.dist-info/RECORD,,

{avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{avoca-0.11.3.dist-info → avoca-0.11.4.dist-info}/licenses/LICENCE.txt RENAMED Viewed

File without changes

avoca 0.11.3__py3-none-any.whl → 0.11.4__py3-none-any.whl

avoca 0.11.3py3-none-any.whl → 0.11.4py3-none-any.whl