PyPI - avoca - Versions diffs - 0.11.4__tar.gz → 0.12.0__tar.gz - Mend

avoca 0.11.4tar.gz → 0.12.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

{avoca-0.11.4 → avoca-0.12.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: avoca
-Version: 0.11.4
+Version: 0.12.0
 Summary: @voc@: Quality assessement of measurement data
 Project-URL: Homepage, https://gitlab.com/empa503/atmospheric-measurements/avoca
 Project-URL: Bug Tracker, https://gitlab.com/empa503/atmospheric-measurements/avoca/-/issues

{avoca-0.11.4 → avoca-0.12.0}/avoca/bindings/ebas_flags.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # https://projects.nilu.no/ccc/flags/flags.html for more info on what ebas uses
-from avoca.flags import QA_Flag
+from avoca.flags import QA_Flag, nan_flags
 flags_to_ebas: dict[QA_Flag, int] = {
     QA_Flag.MISSING: 999,  # 	M 	Missing measurement, unspecified reason
@@ -40,13 +40,7 @@ if missing_flags:
         f"Not all QA flags are mapped to Ebas flags. Missing: {missing_flags}"
     )
-# Flags that are considered to have missing values
-nan_flags = [
-    QA_Flag.MISSING,
-    QA_Flag.ZERO_NEG_CONC_EXT,
-    QA_Flag.INVALIDATED_EXT,
-    QA_Flag.INVALID_VALUES,
-]
+nan_flags = nan_flags
 # priority of the flag to appear in the output
 # Useful when you can select only one flag value

{avoca-0.11.4 → avoca-0.12.0}/avoca/bindings/qa_tool.py RENAMED Viewed

@@ -12,7 +12,7 @@ import numpy as np
 import pandas as pd
 import pandas.errors
-from avoca.bindings.ebas_flags import flag_order, flags_to_ebas
+from avoca.bindings.ebas_flags import flag_order, flags_to_ebas, ebas_flag_to_avoca
 from avoca.flags import QA_Flag
 from avoca.utils import compounds_from_df
@@ -207,3 +207,67 @@ def export_EmpaQATool(
     logger.info(f"Exported to `{out_filepath}`")
     return out_filepath
+def read_empaqatool(file_path: Path, shift: timedelta | None = None) -> pd.DataFrame:
+    """Read an EmpaQATool export file.
+    Data is exported through : https://voc-qc.nilu.no/ExportData
+    :arg file_path: Path to the EmpaQATool export file.
+    :returns: DataFrame with the data.
+    """
+    # Pandas skips the 2 empty rows
+    df = pd.read_csv(file_path, sep=";", header=2)
+    # Convert the datetime columns
+    columns = {}
+    to_datetime = lambda x: pd.to_datetime(x, format="%Y-%m-%d %H:%M:%S")
+    columns[("-", "datetime_start")] = to_datetime(df["Start"])
+    columns[("-", "datetime_end")] = to_datetime(df["End"])
+    # Get the datetime column as the start time
+    dt = columns[("-", "datetime_start")].copy()
+    if shift is not None:
+        dt += shift
+    columns[("-", "datetime")] = dt
+     # Last column is empty
+    compounds = [ '-'.join(s[:-1]) for col in df.columns if len(s:=col.split("-")) >= 2]
+    for compound in compounds:
+        flag_col = f"{compound}-flag"
+        value_col = f"{compound}-value"
+        acc_col = f"{compound}-accuracy"
+        precision_col = f"{compound}-precision"
+        mapping = {
+            "conc": value_col,
+            "u_expanded":acc_col,
+            "u_precision":precision_col,
+        }
+        flag_values = (pd.to_numeric(df[flag_col]) * 1e3).astype(int).mod(1000)
+        # Flags are adding 1000 for specifying when set by qa tool or not
+        flags = flag_values.apply(
+            lambda x: ebas_flag_to_avoca[x].value if x else int(0)
+        )
+        for key, value in mapping.items():
+            # Since the nan flags values are set to 9999, we need to set them to nan
+            serie = pd.to_numeric(df[value])
+            mask_nan = flags == QA_Flag.MISSING.value
+            serie[mask_nan] = np.nan
+            columns[(compound, key)] = serie
+        columns[(compound, "flag")] = flags
+        mask_nan = columns[(compound, "conc")].isna()
+        columns[(compound, "flag")][mask_nan] |= QA_Flag.MISSING.value
+    return pd.DataFrame(columns)

{avoca-0.11.4 → avoca-0.12.0}/avoca/flags.py RENAMED Viewed

@@ -46,6 +46,14 @@ class QA_Flag(Flag):
     # Invalid Values
     INVALID_VALUES = auto()
+# Flags that are considered to have missing values
+nan_flags = [
+    QA_Flag.MISSING,
+    QA_Flag.ZERO_NEG_CONC_EXT,
+    QA_Flag.INVALIDATED_EXT,
+    QA_Flag.INVALID_VALUES,
+]
 if __name__ == "__main__":
     # Print the flages and their values

avoca-0.12.0/avoca/plots.py ADDED Viewed

@@ -0,0 +1,122 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+def plot_historical_comparison(
+    df_new: pd.DataFrame, df_hist: pd.DataFrame, compound: str, ax=None
+) -> tuple[plt.Figure, plt.Axes]:
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(10, 6))
+    dt_column = ("-", "datetime")
+    for data_type, df in zip(["Historical", "New"], [df_hist, df_new]):
+        if data_type == "Historical":
+            color = "blue"
+        else:
+            color = "red"
+        serie = df[(compound, "conc")]
+        dt = df[dt_column]
+        if ("-", "type") in df.columns:
+            mask_air = df[("-", "type")] == "air"
+            serie = serie[mask_air]
+            dt = dt[mask_air]
+        ax.scatter(dt, serie, label=data_type, color=color, alpha=0.5, s=4)
+    ax.set_title(compound)
+    ax.set_xlabel("Date")
+    ax.set_ylabel("Concentration (ppt)")
+    ax.legend()
+    return fig, ax
+def plot_yearly_data(
+    df: pd.DataFrame, compound: str, ax=None
+) -> tuple[plt.Figure, plt.Axes]:
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(10, 6))
+    dt_column = ("-", "datetime")
+    serie = df[(compound, "conc")]
+    dt = df[dt_column]
+    if ("-", "type") in df.columns:
+        mask_air = df[("-", "type")] == "air"
+        serie = serie[mask_air]
+        dt = dt[mask_air]
+    years = dt.dt.year.unique()
+    x = dt.dt.day_of_year + dt.dt.hour / 24.0
+    for year in years:
+        mask_year = dt.dt.year == year
+        ax.scatter(x[mask_year], serie[mask_year], label=str(year), alpha=0.5, s=4)
+    ax.set_title(compound)
+    ax.set_xlabel("Time of Year")
+    ax.set_ylabel("Concentration (ppt)")
+    # Add ticks with the mounths
+    month_starts = pd.date_range(start="2024-01-01", end="2025-01-01", freq="MS")
+    month_days = month_starts.dayofyear
+    month_labels = month_starts.strftime("%b")
+    ax.set_xticks(month_days)
+    ax.set_xticklabels(month_labels)
+    ax.legend()
+    return fig, ax
+def plot_yearly_plotly(
+    df: pd.DataFrame,
+    compound: str,
+    df_new: pd.DataFrame | None = None,
+) -> "plotly.graph_objs._figure.Figure":
+    """Plot yearly data using plotly."""
+    import plotly.express as px
+    import plotly.graph_objects as go
+    dt_column = ("-", "datetime")
+    serie = df[(compound, "conc")]
+    dt = df[dt_column]
+    if ("-", "type") in df.columns:
+        mask_air = df[("-", "type")] == "air"
+        serie = serie[mask_air]
+        dt = dt[mask_air]
+    if ("-", "type") in df_new.columns:
+        mask_air_new = df_new[("-", "type")] == "air"
+        df_new = df_new[mask_air_new]
+    x = dt.dt.day_of_year + dt.dt.hour / 24.0
+    df_to_plot = pd.DataFrame(
+        {
+            "conc": serie.values,
+            "year": dt.dt.year.values,
+        },
+        index=x.values,
+    )
+    # Break down by year, to have year as columns and conc as values
+    df_to_plot = df_to_plot.pivot_table(
+        index=df_to_plot.index, columns="year", values="conc"
+    )
+    fig = px.scatter(df_to_plot)
+    x_values = pd.date_range(start="2024-01-01", end="2024-12-31", freq="MS")
+    dt_new = df_new[dt_column]
+    fig.add_trace(
+        go.Scatter(
+            x=dt_new.dt.dayofyear + dt_new.dt.hour / 24.0,
+            y=df_new[(compound, "conc")],
+            mode="markers",
+            name="New Data",
+        )
+    )
+    fig.update_layout(
+        xaxis_title="Time of Year",
+        yaxis_title=f"{compound} (ppt)",
+        xaxis=dict(
+            tickmode="array",
+            tickvals=x_values.dayofyear,
+            ticktext=x_values.strftime("%b"),
+        ),
+    )
+    return fig

{avoca-0.11.4 → avoca-0.12.0}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "avoca"
-version = "0.11.4"
+version = "0.12.0"
 authors = [
   { name="Lionel Constantin", email="lionel.constantin@empa.ch" },
 ]