PyPI - avoca - Versions diffs - 0.10.4__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

avoca 0.10.4py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

avoca/bindings/ebas.py +1 -40
avoca/bindings/ebas_flags.py +59 -0
avoca/bindings/qa_tool.py +209 -0
avoca/testing/df.py +10 -0
{avoca-0.10.4.dist-info → avoca-0.11.0.dist-info}/METADATA +1 -1
{avoca-0.10.4.dist-info → avoca-0.11.0.dist-info}/RECORD +8 -6
{avoca-0.10.4.dist-info → avoca-0.11.0.dist-info}/WHEEL +0 -0
{avoca-0.10.4.dist-info → avoca-0.11.0.dist-info}/licenses/LICENCE.txt +0 -0

avoca/bindings/ebas.py CHANGED Viewed

@@ -17,50 +17,11 @@ from ebas.io.file.nasa_ames import EbasNasaAmes
 from nilutility.datatypes import DataObject
 from nilutility.datetime_helper import DatetimeInterval
+from avoca.bindings.ebas_flags import ebas_flag_to_avoca, flags_to_ebas
 from avoca.flags import QA_Flag
 logger = logging.getLogger(__name__)
-# https://projects.nilu.no/ccc/flags/flags.html for more info on what ebas uses
-flags_to_ebas: dict[QA_Flag, int] = {
-    QA_Flag.MISSING: 999,  # 	M 	Missing measurement, unspecified reason
-    QA_Flag.ZERO_NEG_CONC_EXT: 999,
-    QA_Flag.INVALIDATED_EXT: 900,  # 	H 	Hidden and invalidated by data originator
-    # V Extremely high value, outside four times standard deviation in a lognormal distribution
-    QA_Flag.EXTREME_VALUE: 458,
-    QA_Flag.CALIBRATION: 683,  # 	I 	Invalid due to calibration. Used for Level 0.
-    QA_Flag.BLANK: 684,  #  	Invalid due to zero/span check. Used for Level 0.
-    QA_Flag.HEIGHT_INTEGRATION: 0,  # 	Valid
-    QA_Flag.UNCORRELATED: 0,  # 	Valid
-    QA_Flag.MET_OFFICE_BASELINE: 0,  # 	Valid
-    QA_Flag.BELOW_DETECTION_LIMIT: 147,  # 	B 	Below detection limit
-    QA_Flag.POLLUTION: 900,
-    QA_Flag.SUSPICIOUS_RT: 900,
-    QA_Flag.INVALID_VALUES: 999,  # 	M 	Missing measurement, unspecified reason
-}
-ebas_flag_to_avoca: dict[int, QA_Flag] = {
-    ebas_flag: avoca_flag for avoca_flag, ebas_flag in flags_to_ebas.items()
-}
-# Set some flags with Multiple values to the same value
-ebas_flag_to_avoca.pop(0)  # 0 is valid in avoca
-ebas_flag_to_avoca[999] = QA_Flag.MISSING
-ebas_flag_to_avoca[900] = QA_Flag.INVALIDATED_EXT
-# Unspecified contamination or local influence, but considered valid
-ebas_flag_to_avoca[559] = QA_Flag.POLLUTION
-ebas_flag_to_avoca[685] = (
-    QA_Flag.CALIBRATION
-)  #  	Invalid due to secondary standard gas measurement. Used for Level 0.
-ebas_flag_to_avoca[980] = (
-    QA_Flag.CALIBRATION
-)  # Missing due to calibration or zero/span check
-missing_flags = set(QA_Flag) - set(flags_to_ebas.keys())
-if missing_flags:
-    raise RuntimeError(
-        f"Not all QA flags are mapped to Ebas flags. Missing: {missing_flags}"
-    )
 ebas_compname_of_var = {
     "rt": "retention_time",

avoca/bindings/ebas_flags.py ADDED Viewed

@@ -0,0 +1,59 @@
+# https://projects.nilu.no/ccc/flags/flags.html for more info on what ebas uses
+from avoca.flags import QA_Flag
+flags_to_ebas: dict[QA_Flag, int] = {
+    QA_Flag.MISSING: 999,  # 	M 	Missing measurement, unspecified reason
+    QA_Flag.ZERO_NEG_CONC_EXT: 999,
+    QA_Flag.INVALIDATED_EXT: 900,  # 	H 	Hidden and invalidated by data originator
+    # V Extremely high value, outside four times standard deviation in a lognormal distribution
+    QA_Flag.EXTREME_VALUE: 458,
+    QA_Flag.CALIBRATION: 683,  # 	I 	Invalid due to calibration. Used for Level 0.
+    QA_Flag.BLANK: 684,  #  	Invalid due to zero/span check. Used for Level 0.
+    QA_Flag.HEIGHT_INTEGRATION: 0,  # 	Valid
+    QA_Flag.UNCORRELATED: 0,  # 	Valid
+    QA_Flag.MET_OFFICE_BASELINE: 0,  # 	Valid
+    QA_Flag.BELOW_DETECTION_LIMIT: 147,  # 	B 	Below detection limit
+    QA_Flag.POLLUTION: 900,
+    QA_Flag.SUSPICIOUS_RT: 900,
+    QA_Flag.INVALID_VALUES: 999,  # 	M 	Missing measurement, unspecified reason
+}
+ebas_flag_to_avoca: dict[int, QA_Flag] = {
+    ebas_flag: avoca_flag for avoca_flag, ebas_flag in flags_to_ebas.items()
+}
+# Set some flags with Multiple values to the same value
+ebas_flag_to_avoca.pop(0)  # 0 is valid in avoca
+ebas_flag_to_avoca[999] = QA_Flag.MISSING
+ebas_flag_to_avoca[900] = QA_Flag.INVALIDATED_EXT
+# Unspecified contamination or local influence, but considered valid
+ebas_flag_to_avoca[559] = QA_Flag.POLLUTION
+ebas_flag_to_avoca[685] = (
+    QA_Flag.CALIBRATION
+)  #  	Invalid due to secondary standard gas measurement. Used for Level 0.
+ebas_flag_to_avoca[980] = (
+    QA_Flag.CALIBRATION
+)  # Missing due to calibration or zero/span check
+missing_flags = set(QA_Flag) - set(flags_to_ebas.keys())
+if missing_flags:
+    raise RuntimeError(
+        f"Not all QA flags are mapped to Ebas flags. Missing: {missing_flags}"
+    )
+# priority of the flag to appear in the output
+# Useful when you can select only one flag value
+flag_order = [
+    QA_Flag.CALIBRATION,
+    QA_Flag.BLANK,
+    QA_Flag.HEIGHT_INTEGRATION,
+    QA_Flag.MET_OFFICE_BASELINE,
+    QA_Flag.BELOW_DETECTION_LIMIT,
+    QA_Flag.POLLUTION,
+    QA_Flag.SUSPICIOUS_RT,
+    QA_Flag.UNCORRELATED,
+    QA_Flag.EXTREME_VALUE,
+    QA_Flag.INVALIDATED_EXT,
+    QA_Flag.ZERO_NEG_CONC_EXT,
+    QA_Flag.MISSING,
+    QA_Flag.INVALID_VALUES,
+]

avoca/bindings/qa_tool.py ADDED Viewed

@@ -0,0 +1,209 @@
+"""Few modules for importing and exporting from https://voc-qc.nilu.no/
+Originally taken from tucavoc.
+"""
+import logging
+import warnings
+from datetime import datetime, timedelta
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import pandas.errors
+from avoca.bindings.ebas_flags import flag_order, flags_to_ebas
+from avoca.flags import QA_Flag
+from avoca.utils import compounds_from_df
+def number_of_digits_required(serie: pd.Series) -> int:
+    """Return the number of digits required for the calculation"""
+    # TODO: need to check if we need the actual int  value, we can put a .9 at the end
+    if all(pd.isna(serie) | (serie == 0)):
+        # Only 2 will be required
+        return 2
+    else:
+        number_of_digits = np.log10(serie[serie > 0])
+        max_digits = number_of_digits[number_of_digits != np.inf]
+        if len(max_digits) == 0:
+            return 2
+        return int(max(np.max(max_digits), 0) + 2)
+def export_EmpaQATool(
+    df: pd.DataFrame,
+    export_path: Path,
+    station: str = "XXX",
+    revision_date: datetime | None = None,
+    dataset: datetime | str | None = None,
+    export_names: dict[str, str] = {},
+    datetime_offsets: tuple[timedelta, timedelta] | None = None,
+    substances: list[str] = [],
+    rounding_decimals: int = 4,
+) -> Path:
+    """Export to the EmpaQATool format.
+    The exported file from the program can then be imported to
+    the tool on https://voc-qc.nilu.no/Import
+    The specs fro that file can be found in
+    https://voc-qc.nilu.no/doc/CSVImport_FormatSpecifications.pdf
+    This will add the additional data from the dataframe.
+    The file genereated will be named:
+    export_path/[station]_[dataset]_[revision].csv
+    :arg df: Calculation dataframe
+    :arg export_path: Path (directory) to export the file
+    :arg station: Station name to use in the file name
+    :arg revision_date: Revision date as datetime to use in the file name
+    :arg dataset: Dataset name as datetime or string to use in the file name
+    :arg export_names: Dictionary of substance names to use in the file name
+        The keys are the substance names and the values are the names to use in the file.
+    :arg datetime_offsets: Tuple of two timedelta to use for the start and end datetime
+    :arg substances: List of substances to export. You can also specify group names.
+        If not specified, this will use the substances from `df_substances`.
+    :arg rounding_decimals: Number of decimals to round the values to.
+    """
+    logger = logging.getLogger(__name__)
+    warnings.filterwarnings(
+        action="ignore",
+        category=pandas.errors.PerformanceWarning,
+        module="pandas",
+    )
+    # fmt = "%Y-%m-%d %H:%M:%S"
+    fmt = "%d.%m.%Y %H:%M:%S"
+    need_datetime_col = ("-", "datetime_start") not in df.columns and (
+        "-",
+        "datetime_end",
+    ) not in df.columns
+    if need_datetime_col:
+        if ("-", "datetime") not in df.columns:
+            df[("-", "datetime")] = df.index
+        # Check type of the datetime column
+        if not pd.api.types.is_datetime64_any_dtype(df[("-", "datetime")]):
+            raise ValueError(
+                "The datetime column is not of type datetime64. "
+                "Please convert it to datetime64."
+                "Or provide ()"
+            )
+        if datetime_offsets is None:
+            raise ValueError(
+                "No datetime_start or datetime_end column in the dataframe. "
+                "Please provide the datetime_offsets to specify."
+            )
+        df[("-", "datetime_start")] = df[("-", "datetime")] + datetime_offsets[0]
+        df[("-", "datetime_end")] = df[("-", "datetime")] + datetime_offsets[1]
+    df_out = pd.DataFrame(
+        {
+            "start": df[("-", "datetime_start")].dt.strftime(fmt),
+            "end": df[("-", "datetime_end")].dt.strftime(fmt),
+        },
+        index=df.index,
+    )
+    logger.debug(f"df_out: {df_out.head()}")
+    if not substances:
+        substances = compounds_from_df(df)
+    remove_infs = lambda x: x.replace([np.inf, -np.inf], np.nan)
+    is_invalid = lambda x: x.isin([np.inf, -np.inf]) | pd.isna(x)
+    clean_col = lambda x: remove_infs(x).round(rounding_decimals).astype(str)
+    for substance in substances:
+        export_name = export_names.get(substance, substance)
+        conc_col = (
+            (substance, "conc")
+            if (substance, "conc") in df.columns
+            else (substance, "C")
+        )
+        u_expanded_col = (substance, "u_expanded")
+        u_precision_col = (substance, "u_precision")
+        flag_col = (substance, "flag")
+        mask_invalid = (
+            (
+                df[flag_col] & (QA_Flag.MISSING.value + QA_Flag.INVALIDATED_EXT.value)
+            ).astype(bool)
+            | is_invalid(df[conc_col])
+            | (
+                is_invalid(df[u_expanded_col])
+                if u_expanded_col in df.columns
+                else False
+            )
+            | (
+                is_invalid(df[u_precision_col])
+                if u_precision_col in df.columns
+                else False
+            )
+        )
+        logger.debug(f"mask_invalid: {mask_invalid}")
+        # Flag the invalids
+        df.loc[mask_invalid, flag_col] ^= QA_Flag.INVALID_VALUES.value
+        # Convert to str so we can control the formatting
+        df_out[f"{export_name}-Value"] = clean_col(df[conc_col])
+        # Input the missing values as 9. see issue #7 gitlab.empa.ch
+        df_out.loc[mask_invalid, f"{export_name}-Value"] = (
+            "9" * number_of_digits_required(df[conc_col])
+        )
+        if u_expanded_col in df.columns:
+            # Convert to str so we can control the formatting
+            df_out[f"{export_name}-Accuracy"] = clean_col(df[u_expanded_col])
+            # Input the missing values as 9. see issue #7 gitlab.empa.ch
+            df_out.loc[mask_invalid, f"{export_name}-Accuracy"] = (
+                "9" * number_of_digits_required(df[u_expanded_col])
+            )
+        if u_precision_col in df.columns:
+            # Convert to str so we can control the formatting
+            df_out[f"{export_name}-Precision"] = clean_col(df[u_precision_col])
+            # Input the missing values as 9. see issue #7 gitlab.empa.ch
+            df_out.loc[mask_invalid, f"{export_name}-Precision"] = (
+                "9" * number_of_digits_required(df[u_precision_col])
+            )
+        flag_col_out = f"{export_name}-Flag"
+        df_out[flag_col_out] = 0.0
+        for flag in flag_order:
+            df_out.loc[
+                (df[flag_col].values & flag.value).astype(bool), flag_col_out
+            ] = flags_to_ebas[flag]
+        df_out[flag_col_out] = (df_out[flag_col_out] * 1e-3).map("{:.3f}".format)
+    export_path.mkdir(exist_ok=True)
+    dt_format = "%Y%m%d"
+    if dataset is None:
+        dataset = datetime.strptime(df_out["start"].iloc[0], fmt).strftime(dt_format)
+    if revision_date is None:
+        revision_date = datetime.now().strftime(dt_format)
+    # [station]_[dataset]_[revision]
+    file_name = f"{station}_{dataset}_{revision_date}"
+    out_filepath = Path(export_path, file_name).with_suffix(".csv")
+    df_out.to_csv(
+        out_filepath,
+        sep=";",
+        index=False,
+        encoding="utf-8",
+    )
+    logger.info(f"Exported to `{out_filepath}`")
+    return out_filepath

avoca/testing/df.py CHANGED Viewed

@@ -23,6 +23,16 @@ simple_df = pd.DataFrame(
     ),
 )
+invalids_df = pd.DataFrame(
+    np.transpose([[1.0, 1.1, 0.8, 0.9], [1.0, np.inf, np.nan, -0.3]]),
+    columns=pd.MultiIndex.from_tuples(
+        [
+            ("compA", "C"),
+            ("compB", "C"),
+        ]
+    ),
+)
 compab_multiindex = pd.MultiIndex.from_tuples(
     [
         ("compA", "test_var"),

{avoca-0.10.4.dist-info → avoca-0.11.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: avoca
-Version: 0.10.4
+Version: 0.11.0
 Summary: @voc@: Quality assessement of measurement data
 Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
 Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues

{avoca-0.10.4.dist-info → avoca-0.11.0.dist-info}/RECORD RENAMED Viewed

@@ -8,11 +8,13 @@ avoca/manager.py,sha256=ET-ATrSLi2rSV7PjBzwpjj0V_60MFxSIZqQ03aEIbdA,5284
 avoca/requirements.py,sha256=q4z6bJ6iW5jSy10Y0elfE9BoEcAZC2-kUqYi4zA6TGE,563
 avoca/settings.py,sha256=Px-sCGIlRyWI2RBJaGUY0K1V60kOZY9n41eft92bjN4,2112
 avoca/bindings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-avoca/bindings/ebas.py,sha256=U5RhjQuEvYcEQQ17XDj2HOQ_Wr1WX03aR_kL3BVmsO8,18975
+avoca/bindings/ebas.py,sha256=Xe0TkV4fAm0KJGsEUAlkRyHsohL_2DSZFc5pPE41OS0,17217
+avoca/bindings/ebas_flags.py,sha256=ls8cEKPC2QjlkWyqiytyjFfemqM8ot8suys_Qi5Xx1o,2352
 avoca/bindings/gcwerks-report.conf,sha256=jO0I62DfgzrXXS1FuiW8ds-oc1_j8kpFCO61Fk-erBw,230
 avoca/bindings/gcwerks.py,sha256=pTwZhSuoD4usER1-JhQJtgj1KUcZR1ZN1loZMCSd3TQ,14651
 avoca/bindings/gcwerks_gui.py,sha256=Fj3p8obFq3lWrWW0LlA8WBALP8-U70hvps5vZEt4NaM,9458
 avoca/bindings/nabel.py,sha256=VbC_ARvtso8onILAD8gROt5Y2URdx6NfAqMn4p1mUWU,3020
+avoca/bindings/qa_tool.py,sha256=ZPtQo8dHo6wooIlc9Vzk8y91Qgso-RBtGR_h2TAZQ24,7583
 avoca/bindings/synspec.py,sha256=W5RnBu-6eetmwjM8iMBe4wNwVNIaVpNW3bwa2ykGM2U,1733
 avoca/qa_class/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 avoca/qa_class/abstract.py,sha256=4s8GgkeC3WbMnNxygajhawh7TU9v13i-SP0j5b5YOMc,5432
@@ -23,11 +25,11 @@ avoca/qa_class/rt.py,sha256=t927H_o0Kn-VwEkG9TW33MSDW_2of2-pBX_gTMuaXA0,3817
 avoca/qa_class/test.py,sha256=Xc88_Vwf3hvPiKKl4ILxZ2N985SY8eujUdnAoQu4mbo,591
 avoca/qa_class/zscore.py,sha256=HqOxV45smhXqcv2XrB7W7plE9RoHzBGVEAbmuwsiv7w,16696
 avoca/testing/__init__.py,sha256=CzkugadVit48-eMoMVtojZLHeSKgnmMMen6sGu6Q42Y,108
-avoca/testing/df.py,sha256=D1ONXe2b6vuM68sTIh318dcRRqodQ5KQioiVMl6OJYo,1592
+avoca/testing/df.py,sha256=Nc0GUYTApZgYyUTMnHMTbSKLiA5ty9Bg7gUGtnoFYMI,1826
 avoca/utils/__init__.py,sha256=LEA2jJsqwSK2DBzXg00DbPhM1fXXREJ0XxLeuJtKapY,1398
 avoca/utils/flags_doc.py,sha256=cS7yKpxVh_SA6EdH3lSy9UpcIvhGwzAELRbkXN3CxO8,4168
 avoca/utils/torch_models.py,sha256=53TgOgSPMOOSGYy2cm1EGSK7qQkYMGEOq319KKM_Ir0,1015
-avoca-0.10.4.dist-info/METADATA,sha256=usLBL-Kyms4faFat9-TYcYtw-U-jSqFDgMCQeZCFdjc,1570
-avoca-0.10.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-avoca-0.10.4.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
-avoca-0.10.4.dist-info/RECORD,,
+avoca-0.11.0.dist-info/METADATA,sha256=sYa3FgsjU0xh8NOCmHVr78aVcMCu-hOvGi-b6-H4c00,1570
+avoca-0.11.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+avoca-0.11.0.dist-info/licenses/LICENCE.txt,sha256=4MY53j3v7tEKwjyuriVz9YjB4Dscm2nDMB2CcG9lOmk,1059
+avoca-0.11.0.dist-info/RECORD,,

{avoca-0.10.4.dist-info → avoca-0.11.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{avoca-0.10.4.dist-info → avoca-0.11.0.dist-info}/licenses/LICENCE.txt RENAMED Viewed

File without changes

avoca 0.10.4__py3-none-any.whl → 0.11.0__py3-none-any.whl

avoca 0.10.4py3-none-any.whl → 0.11.0py3-none-any.whl