PyPI - disdrodb - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

disdrodb 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

disdrodb/__init__.py +1 -1
disdrodb/_version.py +2 -2
disdrodb/api/io.py +12 -2
disdrodb/data_transfer/download_data.py +145 -14
disdrodb/l0/check_standards.py +15 -10
disdrodb/l0/configs/LPM/bins_diameter.yml +3 -3
disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
disdrodb/l0/configs/PARSIVEL2/l0a_encodings.yml +4 -0
disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +20 -4
disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +41 -0
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +50 -10
disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
disdrodb/l0/l0b_nc_processing.py +1 -1
disdrodb/l0/l0b_processing.py +12 -10
disdrodb/l0/manuals/SWS250.pdf +0 -0
disdrodb/l0/manuals/VPF730.pdf +0 -0
disdrodb/l0/manuals/VPF750.pdf +0 -0
disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +107 -0
disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +125 -0
disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +133 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +138 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +9 -0
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +67 -0
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +291 -0
disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
disdrodb/l0/standards.py +7 -4
disdrodb/l0/template_tools.py +2 -2
disdrodb/l1/encoding_attrs.py +30 -8
disdrodb/l1/processing.py +6 -4
disdrodb/l1/resampling.py +1 -1
disdrodb/l1/routines.py +9 -7
disdrodb/l2/empirical_dsd.py +100 -2
disdrodb/l2/event.py +3 -3
disdrodb/l2/processing.py +21 -12
disdrodb/l2/processing_options.py +7 -7
disdrodb/l2/routines.py +3 -3
disdrodb/metadata/checks.py +15 -6
disdrodb/metadata/manipulation.py +2 -2
disdrodb/metadata/standards.py +83 -79
disdrodb/metadata/writer.py +2 -2
disdrodb/routines.py +246 -10
disdrodb/scattering/routines.py +1 -1
disdrodb/utils/dataframe.py +342 -0
disdrodb/utils/directories.py +14 -2
disdrodb/utils/xarray.py +83 -0
{disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/METADATA +34 -61
{disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/RECORD +77 -54
{disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/WHEEL +1 -1
{disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/entry_points.txt +3 -3
{disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.0.dist-info → disdrodb-0.1.2.dist-info}/top_level.txt +0 -0

disdrodb/utils/dataframe.py ADDED Viewed

@@ -0,0 +1,342 @@
+#!/usr/bin/env python3
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------.
+"""Dataframe utilities."""
+import numpy as np
+import pandas as pd
+def log_arange(start, stop, log_step=0.1, base=10):
+    """
+    Return numbers spaced evenly on a log scale (similar to np.arange but in log space).
+    Parameters
+    ----------
+    start : float
+        The starting value of the sequence (must be > 0).
+    stop : float
+        The end value of the sequence (must be > 0).
+    log_step : float
+        The step size in log-space (default is 0.1).
+    base : float
+        The logarithmic base (default is 10).
+    Returns
+    -------
+    np.ndarray
+        Array of values spaced in log scale.
+    """
+    if start <= 0 or stop <= 0:
+        raise ValueError("Both start and stop must be > 0 for log spacing.")
+    log_start = np.log(start) / np.log(base)
+    log_stop = np.log(stop) / np.log(base)
+    log_values = np.arange(log_start, log_stop, log_step)
+    return base**log_values
+def compute_1d_histogram(df, column, variables=None, bins=10, labels=None, prefix_name=True, include_quantiles=False):
+    """Compute conditional univariate statistics.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        Input dataframe
+    column : str
+        Column name to be binned.
+    variables : str or list, optional
+        Column names for which conditional statistics will be computed.
+        If None, only counts are computed.
+    bins : int or array-like
+        Number of bins or bin edges.
+    labels : array-like, optional
+        Labels for the column bins. If None, uses bin centers.
+    Returns
+    -------
+    pandas.DataFrame
+    """
+    # Copy data
+    df = df.copy()
+    # Ensure `variables` is a list of variables
+    # - If no variable specified, create dummy variable
+    if variables is None:
+        variables = ["dummy"]
+        df["dummy"] = np.ones(df[column].shape)
+        variables_specified = False
+    elif isinstance(variables, str):
+        variables = [variables]
+        variables_specified = True
+    elif isinstance(variables, list):
+        variables_specified = True
+    else:
+        raise TypeError("`variables` must be a string, list of strings, or None.")
+    variables = np.unique(variables)
+    # Handle column binning
+    if isinstance(bins, int):
+        bins = np.linspace(df[column].min(), df[column].max(), bins + 1)
+    # Drop rows where any of the key columns have NaN
+    df = df.dropna(subset=[column, *variables])
+    if len(df) == 0:
+        raise ValueError("No valid data points after removing NaN values")
+    # Create binned columns with explicit handling of out-of-bounds values
+    df[f"{column}_binned"] = pd.cut(df[column], bins=bins, include_lowest=True)
+    # Create complete IntervalIndex for both dimensions
+    intervals = df[f"{column}_binned"].cat.categories
+    # Create IntervalIndex with all possible combinations
+    full_index = pd.Index(intervals, name=f"{column}_binned")
+    # Define grouping object
+    df_grouped = df.groupby([f"{column}_binned"], observed=False)
+    # Compute statistics for specified variables
+    variables_stats = []
+    for i, var in enumerate(variables):
+        # Prepare prefix
+        prefix = f"{var}_" if prefix_name and variables_specified else ""
+        # Define statistics to compute
+        if variables_specified:
+            # Compute quantiles
+            quantiles = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]
+            df_stats_quantiles = df_grouped[var].quantile(quantiles).unstack(level=-1)  # noqa: PD010
+            df_stats_quantiles.columns = [f"{prefix}Q{int(q*100)}" for q in df_stats_quantiles.columns]
+            df_stats_quantiles = df_stats_quantiles.rename(
+                columns={
+                    f"{prefix}Q50": f"{prefix}median",
+                },
+            )
+            # Define other stats to compute
+            list_stats = [
+                (f"{prefix}std", "std"),
+                (f"{prefix}min", "min"),
+                (f"{prefix}max", "max"),
+                (f"{prefix}mad", lambda s: np.median(np.abs(s - np.median(s)))),
+            ]
+            if i == 0:
+                list_stats.append(("count", "count"))
+        else:
+            list_stats = [("count", "count")]
+        # Compute statistics
+        df_stats = df_grouped[var].agg(list_stats)
+        # Compute other variable statistics
+        if variables_specified:
+            df_stats[f"{prefix}range"] = df_stats[f"{prefix}max"] - df_stats[f"{prefix}min"]
+            df_stats[f"{prefix}iqr"] = df_stats_quantiles[f"{prefix}Q75"] - df_stats_quantiles[f"{prefix}Q25"]
+            df_stats[f"{prefix}ipr80"] = df_stats_quantiles[f"{prefix}Q90"] - df_stats_quantiles[f"{prefix}Q10"]
+            df_stats[f"{prefix}ipr90"] = df_stats_quantiles[f"{prefix}Q95"] - df_stats_quantiles[f"{prefix}Q5"]
+            df_stats[f"{prefix}ipr98"] = df_stats_quantiles[f"{prefix}Q99"] - df_stats_quantiles[f"{prefix}Q1"]
+            if include_quantiles:
+                df_stats = pd.concat((df_stats, df_stats_quantiles), axis=1)
+            else:
+                df_stats[f"{prefix}median"] = df_stats_quantiles[f"{prefix}median"]
+        variables_stats.append(df_stats)
+    # Combine all statistics into a single DataFrame
+    df_stats = pd.concat(variables_stats, axis=1)
+    # Reindex to include all interval combinations
+    df_stats = df_stats.reindex(full_index)
+    # Determine bin centers
+    centers = intervals.mid
+    # Use provided labels if available
+    coords = labels if labels is not None else centers
+    # Reset index and add coordinates/labels
+    df_stats = df_stats.reset_index()
+    df_stats[f"{column}"] = pd.Categorical(df_stats[f"{column}_binned"].map(dict(zip(intervals, coords, strict=False))))
+    df_stats = df_stats.drop(columns=f"{column}_binned")
+    return df_stats
+def compute_2d_histogram(
+    df,
+    x,
+    y,
+    variables=None,
+    x_bins=10,
+    y_bins=10,
+    x_labels=None,
+    y_labels=None,
+    prefix_name=True,
+    include_quantiles=False,
+):
+    """Compute conditional bivariate statistics.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        Input dataframe
+    x : str
+        Column name for x-axis binning (will be rounded to integers)
+    y : str
+        Column name for y-axis binning
+    variables : str or list, optional
+        Column names for which statistics will be computed.
+        If None, only counts are computed.
+    x_bins : int or array-like
+        Number of bins or bin edges for x
+    y_bins : int or array-like
+        Number of bins or bin edges for y
+    x_labels : array-like, optional
+        Labels for x bins. If None, uses bin centers
+    y_labels : array-like, optional
+        Labels for y bins. If None, uses bin centers
+    Returns
+    -------
+    xarray.Dataset
+        Dataset with dimensions corresponding to binned variables and
+        data variables for each statistic
+    """
+    # # If polars, cast to pandas
+    # if isinstance(df, pl.DataFrame):
+    #     df = df.to_pandas()
+    # Copy data
+    df = df.copy()
+    # Ensure `variables` is a list of variables
+    # - If no variable specified, create dummy variable
+    if variables is None:
+        variables = ["dummy"]
+        df["dummy"] = np.ones(df[x].shape)
+        variables_specified = False
+    elif isinstance(variables, str):
+        variables = [variables]
+        variables_specified = True
+    elif isinstance(variables, list):
+        variables_specified = True
+    else:
+        raise TypeError("`variables` must be a string, list of strings, or None.")
+    variables = np.unique(variables)
+    # Handle x-axis binning
+    if isinstance(x_bins, int):
+        x_bins = np.linspace(df[x].min(), df[x].max(), x_bins + 1)
+    # Handle y-axis binning
+    if isinstance(y_bins, int):
+        y_bins = np.linspace(df[y].min(), df[y].max(), y_bins + 1)
+    # Drop rows where any of the key columns have NaN
+    df = df.dropna(subset=[x, y, *variables])
+    if len(df) == 0:
+        raise ValueError("No valid data points after removing NaN values")
+    # Create binned columns with explicit handling of out-of-bounds values
+    df[f"{x}_binned"] = pd.cut(df[x], bins=x_bins, include_lowest=True)
+    df[f"{y}_binned"] = pd.cut(df[y], bins=y_bins, include_lowest=True)
+    # Create complete IntervalIndex for both dimensions
+    x_intervals = df[f"{x}_binned"].cat.categories
+    y_intervals = df[f"{y}_binned"].cat.categories
+    # Create MultiIndex with all possible combinations
+    full_index = pd.MultiIndex.from_product([x_intervals, y_intervals], names=[f"{x}_binned", f"{y}_binned"])
+    # Define grouping object
+    df_grouped = df.groupby([f"{x}_binned", f"{y}_binned"], observed=False)
+    # Compute statistics for specified variables
+    variables_stats = []
+    for i, var in enumerate(variables):
+        # Prepare prefix
+        prefix = f"{var}_" if prefix_name and variables_specified else ""
+        # Define statistics to compute
+        if variables_specified:
+            # Compute quantiles
+            quantiles = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99]
+            df_stats_quantiles = df_grouped[var].quantile(quantiles).unstack(level=-1)  # noqa: PD010
+            df_stats_quantiles.columns = [f"{prefix}Q{int(q*100)}" for q in df_stats_quantiles.columns]
+            df_stats_quantiles = df_stats_quantiles.rename(
+                columns={
+                    f"{prefix}Q50": f"{prefix}median",
+                },
+            )
+            # Define other stats to compute
+            list_stats = [
+                (f"{prefix}std", "std"),
+                (f"{prefix}min", "min"),
+                (f"{prefix}max", "max"),
+                (f"{prefix}mad", lambda s: np.median(np.abs(s - np.median(s)))),
+            ]
+            if i == 0:
+                list_stats.append(("count", "count"))
+        else:
+            list_stats = [("count", "count")]
+        # Compute statistics
+        df_stats = df_grouped[var].agg(list_stats)
+        # Compute other variable statistics
+        if variables_specified:
+            df_stats[f"{prefix}range"] = df_stats[f"{prefix}max"] - df_stats[f"{prefix}min"]
+            df_stats[f"{prefix}iqr"] = df_stats_quantiles[f"{prefix}Q75"] - df_stats_quantiles[f"{prefix}Q25"]
+            df_stats[f"{prefix}ipr80"] = df_stats_quantiles[f"{prefix}Q90"] - df_stats_quantiles[f"{prefix}Q10"]
+            df_stats[f"{prefix}ipr90"] = df_stats_quantiles[f"{prefix}Q95"] - df_stats_quantiles[f"{prefix}Q5"]
+            df_stats[f"{prefix}ipr98"] = df_stats_quantiles[f"{prefix}Q99"] - df_stats_quantiles[f"{prefix}Q1"]
+            if include_quantiles:
+                df_stats = pd.concat((df_stats, df_stats_quantiles), axis=1)
+            else:
+                df_stats[f"{prefix}median"] = df_stats_quantiles[f"{prefix}median"]
+        variables_stats.append(df_stats)
+    # Combine all statistics into a single DataFrame
+    df_stats = pd.concat(variables_stats, axis=1)
+    # Reindex to include all interval combinations
+    df_stats = df_stats.reindex(full_index)
+    # Determine coordinates
+    x_centers = x_intervals.mid
+    y_centers = y_intervals.mid
+    # Use provided labels if available
+    x_coords = x_labels if x_labels is not None else x_centers
+    y_coords = y_labels if y_labels is not None else y_centers
+    # Reset index and set new coordinates
+    df_stats = df_stats.reset_index()
+    df_stats[f"{x}"] = pd.Categorical(df_stats[f"{x}_binned"].map(dict(zip(x_intervals, x_coords, strict=False))))
+    df_stats[f"{y}"] = pd.Categorical(df_stats[f"{y}_binned"].map(dict(zip(y_intervals, y_coords, strict=False))))
+    # Set new MultiIndex with coordinates
+    df_stats = df_stats.set_index([f"{x}", f"{y}"])
+    df_stats = df_stats.drop(columns=[f"{x}_binned", f"{y}_binned"])
+    # Convert to dataset
+    ds = df_stats.to_xarray()
+    # Transpose arrays
+    ds = ds.transpose(y, x)
+    return ds

disdrodb/utils/directories.py CHANGED Viewed

@@ -17,12 +17,12 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
 """Define utilities for Directory/File Checks/Creation/Deletion."""
 import glob
 import logging
 import os
 import pathlib
 import shutil
+import subprocess
 from typing import Union
 from disdrodb.utils.list import flatten_list
@@ -207,10 +207,22 @@ def _remove_file_or_directories(path, logger=None):
         log_info(logger, msg=f"Deleted the empty directory {path}")
     # If not empty directory
     else:
-        shutil.rmtree(path)
+        # If not window use shutil.rmtree
+        if os.name != "nt":  # Check if not Windows
+            shutil.rmtree(path)
+        else:
+            rmtree_windows(path)
         log_info(logger, msg=f"Deleted directories within {path}")
+def rmtree_windows(path):
+    """Remove a directory tree on Windows."""
+    if not os.path.isdir(path):
+        raise FileNotFoundError(f"{path!r} is not a valid directory")
+    # Use rd (alias rmdir) with /S (remove all subdirectories/files) and /Q (quiet)
+    subprocess.check_call(["cmd", "/c", "rd", "/S", "/Q", path])
 def remove_if_exists(path: str, force: bool = False, logger=None) -> None:
     """Remove file or directory if exists and ``force=True``.

disdrodb/utils/xarray.py CHANGED Viewed

@@ -97,6 +97,89 @@ def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
     return last_idx
+####-------------------------------------------------------------------
+#### Unstacking dimension
+def _check_coord_handling(coord_handling):
+    if coord_handling not in {"keep", "drop", "unstack"}:
+        raise ValueError("coord_handling must be one of 'keep', 'drop', or 'unstack'.")
+def _unstack_coordinates(xr_obj, dim, prefix, suffix):
+    # Identify coordinates that share the target dimension
+    coords_with_dim = _get_non_dimensional_coordinates(xr_obj, dim=dim)
+    ds = xr.Dataset()
+    for coord_name in coords_with_dim:
+        coord_da = xr_obj[coord_name]
+        # Split the coordinate DataArray along the target dimension, drop coordinate and merge
+        split_ds = unstack_datarray_dimension(coord_da, coord_handling="drop", dim=dim, prefix=prefix, suffix=suffix)
+        ds.update(split_ds)
+    return ds
+def _handle_unstack_non_dim_coords(ds, source_xr_obj, coord_handling, dim, prefix, suffix):
+    # Deal with coordinates sharing the target dimension
+    if coord_handling == "keep":
+        return ds
+    if coord_handling == "unstack":
+        ds_coords = _unstack_coordinates(source_xr_obj, dim=dim, prefix=prefix, suffix=suffix)
+        ds.update(ds_coords)
+    # Remove non dimensional coordinates (unstack and drop coord_handling)
+    ds = ds.drop_vars(_get_non_dimensional_coordinates(ds, dim=dim))
+    return ds
+def _get_non_dimensional_coordinates(xr_obj, dim):
+    return [coord_name for coord_name, coord_da in xr_obj.coords.items() if dim in coord_da.dims and coord_name != dim]
+def unstack_datarray_dimension(da, dim, coord_handling="keep", prefix="", suffix=""):
+    """
+    Split a DataArray along a specified dimension into a Dataset with separate prefixed and suffixed variables.
+    Parameters
+    ----------
+    da : xarray.DataArray
+        The DataArray to split.
+    dim : str
+        The dimension along which to split the DataArray.
+    coord_handling : str, optional
+        Option to handle coordinates sharing the target dimension.
+        Choices are 'keep', 'drop', or 'unstack'. Defaults to 'keep'.
+    prefix : str, optional
+        String to prepend to each new variable name.
+    suffix : str, optional
+        String to append to each new variable name.
+    Returns
+    -------
+    xarray.Dataset
+        A Dataset with each variable split along the specified dimension.
+        The Dataset variables are named  "{prefix}{name}{suffix}{dim_value}".
+        Coordinates sharing the target dimension are handled based on `coord_handling`.
+    """
+    # Retrieve DataArray name
+    name = da.name
+    # Unstack variables
+    ds = da.to_dataset(dim=dim)
+    rename_dict = {dim_value: f"{prefix}{name}{suffix}{dim_value}" for dim_value in list(ds.data_vars)}
+    ds = ds.rename_vars(rename_dict)
+    # Deal with coordinates sharing the target dimension
+    return _handle_unstack_non_dim_coords(
+        ds=ds,
+        source_xr_obj=da,
+        coord_handling=coord_handling,
+        dim=dim,
+        prefix=prefix,
+        suffix=suffix,
+    )
+####--------------------------------------------------------------------------
+#### Fill Values Utilities
 def define_dataarray_fill_value(da):
     """Define the fill value for a numerical xarray.DataArray."""
     if np.issubdtype(da.dtype, np.floating):

disdrodb 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

disdrodb 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl