PyPI - disdrodb - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

disdrodb 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (142) hide show

disdrodb/__init__.py +68 -34
disdrodb/_config.py +5 -4
disdrodb/_version.py +16 -3
disdrodb/accessor/__init__.py +20 -0
disdrodb/accessor/methods.py +125 -0
disdrodb/api/checks.py +177 -24
disdrodb/api/configs.py +3 -3
disdrodb/api/info.py +13 -13
disdrodb/api/io.py +281 -22
disdrodb/api/path.py +184 -195
disdrodb/api/search.py +18 -9
disdrodb/cli/disdrodb_create_summary.py +103 -0
disdrodb/cli/disdrodb_create_summary_station.py +91 -0
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0_station.py +1 -1
disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +3 -3
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l0c_station.py +3 -3
disdrodb/cli/disdrodb_run_l1_station.py +2 -2
disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
disdrodb/configs.py +149 -4
disdrodb/constants.py +61 -0
disdrodb/data_transfer/download_data.py +127 -11
disdrodb/etc/configs/attributes.yaml +339 -0
disdrodb/etc/configs/encodings.yaml +473 -0
disdrodb/etc/products/L1/global.yaml +13 -0
disdrodb/etc/products/L2E/10MIN.yaml +12 -0
disdrodb/etc/products/L2E/1MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +22 -0
disdrodb/etc/products/L2M/10MIN.yaml +12 -0
disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +26 -0
disdrodb/issue/writer.py +2 -0
disdrodb/l0/__init__.py +13 -0
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
disdrodb/l0/l0a_processing.py +37 -32
disdrodb/l0/l0b_nc_processing.py +118 -8
disdrodb/l0/l0b_processing.py +30 -65
disdrodb/l0/l0c_processing.py +369 -259
disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → MPI/BCO_PARSIVEL2.py} +41 -71
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +5 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +146 -0
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
disdrodb/l1/__init__.py +5 -0
disdrodb/l1/fall_velocity.py +46 -0
disdrodb/l1/filters.py +34 -20
disdrodb/l1/processing.py +46 -45
disdrodb/l1/resampling.py +77 -66
disdrodb/l1_env/routines.py +18 -3
disdrodb/l2/__init__.py +7 -0
disdrodb/l2/empirical_dsd.py +58 -10
disdrodb/l2/processing.py +268 -117
disdrodb/metadata/checks.py +132 -125
disdrodb/metadata/standards.py +3 -1
disdrodb/psd/fitting.py +631 -345
disdrodb/psd/models.py +9 -6
disdrodb/routines/__init__.py +54 -0
disdrodb/{l0/routines.py → routines/l0.py} +316 -355
disdrodb/{l1/routines.py → routines/l1.py} +76 -116
disdrodb/routines/l2.py +1019 -0
disdrodb/{routines.py → routines/wrappers.py} +98 -10
disdrodb/scattering/__init__.py +16 -4
disdrodb/scattering/axis_ratio.py +61 -37
disdrodb/scattering/permittivity.py +504 -0
disdrodb/scattering/routines.py +746 -184
disdrodb/summary/__init__.py +17 -0
disdrodb/summary/routines.py +4196 -0
disdrodb/utils/archiving.py +434 -0
disdrodb/utils/attrs.py +68 -125
disdrodb/utils/cli.py +5 -5
disdrodb/utils/compression.py +30 -1
disdrodb/utils/dask.py +121 -9
disdrodb/utils/dataframe.py +61 -7
disdrodb/utils/decorators.py +31 -0
disdrodb/utils/directories.py +35 -15
disdrodb/utils/encoding.py +37 -19
disdrodb/{l2 → utils}/event.py +15 -173
disdrodb/utils/logger.py +14 -7
disdrodb/utils/manipulations.py +81 -0
disdrodb/utils/routines.py +166 -0
disdrodb/utils/subsetting.py +214 -0
disdrodb/utils/time.py +35 -177
disdrodb/utils/writer.py +20 -7
disdrodb/utils/xarray.py +5 -4
disdrodb/viz/__init__.py +13 -0
disdrodb/viz/plots.py +398 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/METADATA +4 -3
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/RECORD +139 -98
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +2 -0
disdrodb/l1/encoding_attrs.py +0 -642
disdrodb/l2/processing_options.py +0 -213
disdrodb/l2/routines.py +0 -868
/disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0

disdrodb/utils/subsetting.py ADDED Viewed

@@ -0,0 +1,214 @@
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------.
+"""This module contains functions for subsetting and aligning DISDRODB products."""
+import numpy as np
+from xarray.core.utils import either_dict_or_kwargs
+from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
+def is_1d_non_dimensional_coord(xr_obj, coord):
+    """Checks if a coordinate is a 1d, non-dimensional coordinate."""
+    if coord not in xr_obj.coords:
+        return False
+    if xr_obj[coord].ndim != 1:
+        return False
+    is_1d_dim_coord = xr_obj[coord].dims[0] == coord
+    return not is_1d_dim_coord
+def _get_dim_of_1d_non_dimensional_coord(xr_obj, coord):
+    """Get the dimension of a 1D non-dimension coordinate."""
+    if not is_1d_non_dimensional_coord(xr_obj, coord):
+        raise ValueError(f"'{coord}' is not a dimension or a 1D non-dimensional coordinate.")
+    dim = xr_obj[coord].dims[0]
+    return dim
+def _get_dim_isel_on_non_dim_coord_from_isel(xr_obj, coord, isel_indices):
+    """Get dimension and isel_indices related to a 1D non-dimension coordinate.
+    Parameters
+    ----------
+    xr_obj : (xr.Dataset, xr.DataArray)
+        A xarray object.
+    coord : str
+        Name of the coordinate wishing to subset with .sel
+    isel_indices : (str, int, float, list, np.array)
+        Coordinate indices wishing to be selected.
+    Returns
+    -------
+    dim : str
+        Dimension related to the 1D non-dimension coordinate.
+    isel_indices : (int, list, slice)
+        Indices for index-based selection.
+    """
+    dim = _get_dim_of_1d_non_dimensional_coord(xr_obj, coord)
+    return dim, isel_indices
+def _get_dim_isel_indices_from_isel_indices(xr_obj, key, indices, method="dummy"):  # noqa
+    """Return the dimension and isel_indices related to the dimension position indices of a coordinate."""
+    # Non-dimensional coordinate case
+    if key not in xr_obj.dims:
+        key, indices = _get_dim_isel_on_non_dim_coord_from_isel(xr_obj, coord=key, isel_indices=indices)
+    return key, indices
+def _get_isel_indices_from_sel_indices(xr_obj, coord, sel_indices, method):
+    """Get isel_indices corresponding to sel_indices."""
+    da_coord = xr_obj[coord]
+    dim = da_coord.dims[0]
+    da_coord = da_coord.assign_coords({"isel_indices": (dim, np.arange(0, da_coord.size))})
+    da_subset = da_coord.swap_dims({dim: coord}).sel({coord: sel_indices}, method=method)
+    isel_indices = da_subset["isel_indices"].data
+    return isel_indices
+def _get_dim_isel_on_non_dim_coord_from_sel(xr_obj, coord, sel_indices, method):
+    """
+    Return the dimension and isel_indices related to a 1D non-dimension coordinate.
+    Parameters
+    ----------
+    xr_obj : (xr.Dataset, xr.DataArray)
+        A xarray object.
+    coord : str
+        Name of the coordinate wishing to subset with .sel
+    sel_indices : (str, int, float, list, np.array)
+        Coordinate values wishing to be selected.
+    Returns
+    -------
+    dim : str
+        Dimension related to the 1D non-dimension coordinate.
+    isel_indices : np.ndarray
+        Indices for index-based selection.
+    """
+    dim = _get_dim_of_1d_non_dimensional_coord(xr_obj, coord)
+    isel_indices = _get_isel_indices_from_sel_indices(xr_obj, coord=coord, sel_indices=sel_indices, method=method)
+    return dim, isel_indices
+def _get_dim_isel_indices_from_sel_indices(xr_obj, key, indices, method):
+    """Return the dimension and isel_indices related to values of a coordinate."""
+    # Dimension case
+    if key in xr_obj.dims:
+        if key not in xr_obj.coords:
+            raise ValueError(f"Can not subset with disdrodb.sel the dimension '{key}' if it is not also a coordinate.")
+        isel_indices = _get_isel_indices_from_sel_indices(xr_obj, coord=key, sel_indices=indices, method=method)
+    # Non-dimensional coordinate case
+    else:
+        key, isel_indices = _get_dim_isel_on_non_dim_coord_from_sel(
+            xr_obj,
+            coord=key,
+            sel_indices=indices,
+            method=method,
+        )
+    return key, isel_indices
+def _get_dim_isel_indices_function(func):
+    func_dict = {
+        "sel": _get_dim_isel_indices_from_sel_indices,
+        "isel": _get_dim_isel_indices_from_isel_indices,
+    }
+    return func_dict[func]
+def _subset(xr_obj, indexers=None, func="isel", drop=False, method=None, **indexers_kwargs):
+    """Perform selection with isel or isel."""
+    # Retrieve indexers
+    indexers = either_dict_or_kwargs(indexers, indexers_kwargs, func)
+    # Get function returning isel_indices
+    get_dim_isel_indices = _get_dim_isel_indices_function(func)
+    # Define isel_dict
+    isel_dict = {}
+    for key, indices in indexers.items():
+        key, isel_indices = get_dim_isel_indices(xr_obj, key=key, indices=indices, method=method)
+        if key in isel_dict:
+            raise ValueError(f"Multiple indexers point to the '{key}' dimension.")
+        isel_dict[key] = isel_indices
+    # Subset and update area
+    xr_obj = xr_obj.isel(isel_dict, drop=drop)
+    return xr_obj
+def isel(xr_obj, indexers=None, drop=False, **indexers_kwargs):
+    """Perform index-based dimension selection."""
+    return _subset(xr_obj, indexers=indexers, func="isel", drop=drop, **indexers_kwargs)
+def sel(xr_obj, indexers=None, drop=False, method=None, **indexers_kwargs):
+    """Perform value-based coordinate selection.
+    Slices are treated as inclusive of both the start and stop values, unlike normal Python indexing.
+    The disdrodb `sel` method is empowered to:
+    - slice by disdrodb-id strings !
+    - slice by any xarray coordinate value !
+    You can use string shortcuts for datetime coordinates (e.g., '2000-01' to select all values in January 2000).
+    """
+    return _subset(xr_obj, indexers=indexers, func="sel", drop=drop, method=method, **indexers_kwargs)
+def align(*args):
+    """Align DISDRODB products over time, velocity and diameter dimensions."""
+    list_xr_obj = args
+    # Check input
+    if len(list_xr_obj) <= 1:
+        raise ValueError("At least two xarray object are required for alignment.")
+    # Define dimensions used for alignment
+    dims_to_align = ["time", DIAMETER_DIMENSION, VELOCITY_DIMENSION]
+    # Check which dimensions and coordinates are available across all datasets
+    coords = [coord for coord in dims_to_align if all(coord in xr_obj.coords for xr_obj in list_xr_obj)]
+    if not coords:
+        raise ValueError("No common coordinates found among the input datasets for alignment.")
+    # Start with the input datasets
+    list_aligned = list(list_xr_obj)
+    # Loop over the dimensions which are available
+    for coord in coords:
+        # Retrieve list of coordinate values
+        list_coord_values = [xr_obj[coord].data for xr_obj in list_aligned]
+        # Retrieve intersection of coordinates values
+        # - np.atleast_1d ensure that the dimension is not dropped if only 1 value
+        # - np.intersect1d returns the sorted array of common unique elements
+        common_values = list_coord_values[0]
+        for coord_values in list_coord_values[1:]:
+            common_values = np.intersect1d(common_values, coord_values)
+        sel_indices = np.atleast_1d(common_values)
+        # Check there are common coordinate values
+        if len(sel_indices) == 0:
+            raise ValueError(f"No common {coord} values across input objects.")
+        # Subset dataset
+        new_list_aligned = [sel(xr_obj, {coord: sel_indices}) for xr_obj in list_aligned]
+        list_aligned = new_list_aligned
+    return list_aligned

disdrodb/utils/time.py CHANGED Viewed

@@ -29,11 +29,12 @@ from disdrodb.utils.xarray import define_fill_value_dictionary
 logger = logging.getLogger(__name__)
 ####------------------------------------------------------------------------------------.
 #### Sampling Interval Acronyms
-def seconds_to_acronym(seconds):
+def seconds_to_temporal_resolution(seconds):
     """
     Convert a duration in seconds to a readable string format (e.g., "1H30", "1D2H").
@@ -57,27 +58,27 @@ def seconds_to_acronym(seconds):
         parts.append(f"{components.minutes}MIN")
     if components.seconds > 0:
         parts.append(f"{components.seconds}S")
-    acronym = "".join(parts)
-    return acronym
+    temporal_resolution = "".join(parts)
+    return temporal_resolution
-def get_resampling_information(sample_interval_acronym):
+def get_resampling_information(temporal_resolution):
     """
-    Extract resampling information from the sample interval acronym.
+    Extract resampling information from the temporal_resolution string.
     Parameters
     ----------
-    sample_interval_acronym: str
-      A string representing the sample interval: e.g., "1H30MIN", "ROLL1H30MIN".
+    temporal_resolution: str
+      A string representing the product temporal resolution: e.g., "1H30MIN", "ROLL1H30MIN".
     Returns
     -------
     sample_interval_seconds, rolling: tuple
         Sample_interval in seconds and whether rolling is enabled.
     """
-    rolling = sample_interval_acronym.startswith("ROLL")
+    rolling = temporal_resolution.startswith("ROLL")
     if rolling:
-        sample_interval_acronym = sample_interval_acronym[4:]  # Remove "ROLL"
+        temporal_resolution = temporal_resolution[4:]  # Remove "ROLL"
     # Allowed pattern: one or more occurrences of "<number><unit>"
     # where unit is exactly one of D, H, MIN, or S.
@@ -85,15 +86,15 @@ def get_resampling_information(sample_interval_acronym):
     pattern = r"^(\d+(?:D|H|MIN|S))+$"
     # Check if the entire string matches the pattern
-    if not re.match(pattern, sample_interval_acronym):
+    if not re.match(pattern, temporal_resolution):
         raise ValueError(
-            f"Invalid sample interval acronym '{sample_interval_acronym}'. "
+            f"Invalid temporal resolution '{temporal_resolution}'. "
             "Must be composed of one or more <number><unit> groups, where unit is D, H, MIN, or S.",
         )
     # Regular expression to match duration components and extract all (value, unit) pairs
     pattern = r"(\d+)(D|H|MIN|S)"
-    matches = re.findall(pattern, sample_interval_acronym)
+    matches = re.findall(pattern, temporal_resolution)
     # Conversion factors for each unit
     unit_to_seconds = {
@@ -112,21 +113,21 @@ def get_resampling_information(sample_interval_acronym):
     return sample_interval, rolling
-def acronym_to_seconds(acronym):
+def temporal_resolution_to_seconds(temporal_resolution):
     """
-    Extract the interval in seconds from the duration acronym.
+    Extract the measurement interval in seconds from the temporal resolution string.
     Parameters
     ----------
-    acronym: str
-      A string representing a duration: e.g., "1H30MIN", "ROLL1H30MIN".
+    temporal_resolution: str
+      A string representing the product measurement interval: e.g., "1H30MIN", "ROLL1H30MIN".
     Returns
     -------
     seconds
         Duration in seconds.
     """
-    seconds, _ = get_resampling_information(acronym)
+    seconds, _ = get_resampling_information(temporal_resolution)
     return seconds
@@ -262,6 +263,7 @@ def regularize_dataset(
         Regularized dataset.
     """
+    attrs = xr_obj.attrs.copy()
     xr_obj = _check_time_sorted(xr_obj, time_dim=time_dim)
     start_time, end_time = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
@@ -289,11 +291,14 @@ def regularize_dataset(
         # tolerance=tolerance,  # mismatch in seconds
         fill_value=fill_value,
     )
+    # Ensure attributes are preserved
+    xr_obj.attrs = attrs
     return xr_obj
 ####------------------------------------------
-#### Sampling interval utilities
+#### Interval utilities
 def ensure_sample_interval_in_seconds(sample_interval):  # noqa: PLR0911
@@ -376,7 +381,7 @@ def ensure_sample_interval_in_seconds(sample_interval):  # noqa: PLR0911
             raise TypeError("Float array sample_interval must contain only whole numbers.")
         return sample_interval.astype(int)
-    # Deal with xarray.DataArrayy of floats that are all integer-valued (with optionally some NaN)
+    # Deal with xarray.DataArray of floats that are all integer-valued (with optionally some NaN)
     if isinstance(sample_interval, xr.DataArray) and np.issubdtype(sample_interval.dtype, np.floating):
         arr = sample_interval.copy()
         data = arr.data
@@ -397,6 +402,17 @@ def ensure_sample_interval_in_seconds(sample_interval):  # noqa: PLR0911
     )
+def ensure_timedelta_seconds(interval):
+    """Return an a scalar value/array in seconds or timedelta object as numpy.timedelta64 in seconds."""
+    if isinstance(interval, (xr.DataArray, np.ndarray)):
+        return ensure_sample_interval_in_seconds(interval).astype("m8[s]")
+    return np.array(ensure_sample_interval_in_seconds(interval), dtype="m8[s]")
+####------------------------------------------
+#### Sample Interval Utilities
 def infer_sample_interval(ds, robust=False, verbose=False, logger=None):
     """Infer the sample interval of a dataset.
@@ -497,161 +513,3 @@ def infer_sample_interval(ds, robust=False, verbose=False, logger=None):
         )
         log_warning(logger=logger, msg=msg, verbose=verbose)
     return int(sample_interval)
-####---------------------------------------------------------------------------------
-#### Timesteps regularization
-def get_problematic_timestep_indices(timesteps, sample_interval):
-    """Identify timesteps with missing previous or following timesteps."""
-    previous_time = timesteps - pd.Timedelta(seconds=sample_interval)
-    next_time = timesteps + pd.Timedelta(seconds=sample_interval)
-    idx_previous_missing = np.where(~np.isin(previous_time, timesteps))[0][1:]
-    idx_next_missing = np.where(~np.isin(next_time, timesteps))[0][:-1]
-    idx_isolated_missing = np.intersect1d(idx_previous_missing, idx_next_missing)
-    idx_previous_missing = idx_previous_missing[np.isin(idx_previous_missing, idx_isolated_missing, invert=True)]
-    idx_next_missing = idx_next_missing[np.isin(idx_next_missing, idx_isolated_missing, invert=True)]
-    return idx_previous_missing, idx_next_missing, idx_isolated_missing
-def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=True, logger=None, verbose=True):
-    """Ensure timesteps match with the sample_interval.
-    This function:
-    - drop dataset indices with duplicated timesteps,
-    - but does not add missing timesteps to the dataset.
-    """
-    # Check sorted by time and sort if necessary
-    ds = ensure_sorted_by_time(ds)
-    # Convert time to pandas.DatetimeIndex for easier manipulation
-    times = pd.to_datetime(ds["time"].to_numpy())
-    # Determine the start and end times
-    start_time = times[0].floor(f"{sample_interval}s")
-    end_time = times[-1].ceil(f"{sample_interval}s")
-    # Create the expected time grid
-    expected_times = pd.date_range(start=start_time, end=end_time, freq=f"{sample_interval}s")
-    # Convert to numpy arrays
-    times = times.to_numpy(dtype="M8[s]")
-    expected_times = expected_times.to_numpy(dtype="M8[s]")
-    # Map original times to the nearest expected times
-    # Calculate the difference between original times and expected times
-    time_deltas = np.abs(times - expected_times[:, None]).astype(int)
-    # Find the index of the closest expected time for each original time
-    nearest_indices = np.argmin(time_deltas, axis=0)
-    adjusted_times = expected_times[nearest_indices]
-    # Check for duplicates in adjusted times
-    unique_times, counts = np.unique(adjusted_times, return_counts=True)
-    duplicates = unique_times[counts > 1]
-    # Initialize time quality flag
-    # - 0 when ok or just rounded to closest 00
-    # - 1 if previous timestep is missing
-    # - 2 if next timestep is missing
-    # - 3 if previous and next timestep is missing
-    # - 4 if solved duplicated timesteps
-    # - 5 if needed to drop duplicated timesteps and select the last
-    flag_previous_missing = 1
-    flag_next_missing = 2
-    flag_isolated_timestep = 3
-    flag_solved_duplicated_timestep = 4
-    flag_dropped_duplicated_timestep = 5
-    qc_flag = np.zeros(adjusted_times.shape)
-    # Initialize list with the duplicated timesteps index to drop
-    # - We drop the first occurrence because is likely the shortest interval
-    idx_to_drop = []
-    # Attempt to resolve for duplicates
-    if duplicates.size > 0:
-        # Handle duplicates
-        for dup_time in duplicates:
-            # Indices of duplicates
-            dup_indices = np.where(adjusted_times == dup_time)[0]
-            n_duplicates = len(dup_indices)
-            # Define previous and following timestep
-            prev_time = dup_time - pd.Timedelta(seconds=sample_interval)
-            next_time = dup_time + pd.Timedelta(seconds=sample_interval)
-            # Try to find missing slots before and after
-            # - If more than 3 duplicates, impossible to solve !
-            count_solved = 0
-            # If the previous timestep is available, set that one
-            if n_duplicates == 2:
-                if prev_time not in adjusted_times:
-                    adjusted_times[dup_indices[0]] = prev_time
-                    qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
-                    count_solved += 1
-                elif next_time not in adjusted_times:
-                    adjusted_times[dup_indices[-1]] = next_time
-                    qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
-                    count_solved += 1
-                else:
-                    pass
-            elif n_duplicates == 3:
-                if prev_time not in adjusted_times:
-                    adjusted_times[dup_indices[0]] = prev_time
-                    qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
-                    count_solved += 1
-                if next_time not in adjusted_times:
-                    adjusted_times[dup_indices[-1]] = next_time
-                    qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
-                    count_solved += 1
-            if count_solved != n_duplicates - 1:
-                idx_to_drop = np.append(idx_to_drop, dup_indices[0:-1])
-                qc_flag[dup_indices[-1]] = flag_dropped_duplicated_timestep
-                msg = (
-                    f"Cannot resolve {n_duplicates} duplicated timesteps "
-                    f"(after trailing seconds correction) around {dup_time}."
-                )
-                log_warning(logger=logger, msg=msg, verbose=verbose)
-                if robust:
-                    raise ValueError(msg)
-    # Update the time coordinate (Convert to ns for xarray compatibility)
-    ds = ds.assign_coords({"time": adjusted_times.astype("datetime64[ns]")})
-    # Update quality flag values for next and previous timestep is missing
-    if add_quality_flag:
-        idx_previous_missing, idx_next_missing, idx_isolated_missing = get_problematic_timestep_indices(
-            adjusted_times,
-            sample_interval,
-        )
-        qc_flag[idx_previous_missing] = np.maximum(qc_flag[idx_previous_missing], flag_previous_missing)
-        qc_flag[idx_next_missing] = np.maximum(qc_flag[idx_next_missing], flag_next_missing)
-        qc_flag[idx_isolated_missing] = np.maximum(qc_flag[idx_isolated_missing], flag_isolated_timestep)
-        # If the first timestep is at 00:00 and currently flagged as previous missing (1), reset to 0
-        # first_time = pd.to_datetime(adjusted_times[0]).time()
-        # first_expected_time = pd.Timestamp("00:00:00").time()
-        # if first_time == first_expected_time and qc_flag[0] == flag_previous_missing:
-        #     qc_flag[0] = 0
-        # # If the last timestep is flagged and currently flagged as next missing (2), reset it to 0
-        # last_time = pd.to_datetime(adjusted_times[-1]).time()
-        # last_time_expected = (pd.Timestamp("00:00:00") - pd.Timedelta(30, unit="seconds")).time()
-        # # Check if adding one interval would go beyond the end_time
-        # if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
-        #     qc_flag[-1] = 0
-        # Assign time quality flag coordinate
-        ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
-        ds = ds.set_coords("time_qc")
-    # Drop duplicated timesteps
-    # - Using ds =  ds.drop_isel({"time": idx_to_drop.astype(int)}) raise:
-    #   --> pandas.errors.InvalidIndexError: Reindexing only valid with uniquely valued Index objects
-    #   --> https://github.com/pydata/xarray/issues/6605
-    if len(idx_to_drop) > 0:
-        idx_to_drop = idx_to_drop.astype(int)
-        idx_valid_timesteps = np.arange(0, ds["time"].size)
-        idx_valid_timesteps = np.delete(idx_valid_timesteps, idx_to_drop)
-        ds = ds.isel(time=idx_valid_timesteps)
-    # Return dataset
-    return ds

disdrodb/utils/writer.py CHANGED Viewed

@@ -22,11 +22,29 @@ import os
 import xarray as xr
-from disdrodb.utils.attrs import set_disdrodb_attrs
+from disdrodb.utils.attrs import get_attrs_dict, set_attrs, set_disdrodb_attrs
 from disdrodb.utils.directories import create_directory, remove_if_exists
+from disdrodb.utils.encoding import get_encodings_dict, set_encodings
-def write_product(ds: xr.Dataset, filepath: str, product: str, force: bool = False) -> None:
+def finalize_product(ds, product=None) -> xr.Dataset:
+    """Finalize DISDRODB product."""
+    # Add variables attributes
+    attrs_dict = get_attrs_dict()
+    ds = set_attrs(ds, attrs_dict=attrs_dict)
+    # Add variables encoding
+    encodings_dict = get_encodings_dict()
+    ds = set_encodings(ds, encodings_dict=encodings_dict)
+    # Add DISDRODB global attributes
+    # - e.g. in generate_l2_radar it inherit from input dataset !
+    if product is not None:
+        ds = set_disdrodb_attrs(ds, product=product)
+    return ds
+def write_product(ds: xr.Dataset, filepath: str, force: bool = False) -> None:
     """Save the xarray dataset into a NetCDF file.
     Parameters
@@ -35,8 +53,6 @@ def write_product(ds: xr.Dataset, filepath: str, product: str, force: bool = Fal
         Input xarray dataset.
     filepath : str
         Output file path.
-    product: str
-        DISDRODB product name.
     force : bool, optional
         Whether to overwrite existing data.
         If ``True``, overwrite existing data into destination directories.
@@ -50,8 +66,5 @@ def write_product(ds: xr.Dataset, filepath: str, product: str, force: bool = Fal
     # - If force=False --> Raise error
     remove_if_exists(filepath, force=force)
-    # Update attributes
-    ds = set_disdrodb_attrs(ds, product=product)
     # Write netcdf
     ds.to_netcdf(filepath, engine="netcdf4")

disdrodb/utils/xarray.py CHANGED Viewed

@@ -21,6 +21,8 @@ import numpy as np
 import xarray as xr
 from xarray.core import dtypes
+from disdrodb.constants import DIAMETER_COORDS, VELOCITY_COORDS
 def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
     """
@@ -104,6 +106,7 @@ def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
 def _check_coord_handling(coord_handling):
     if coord_handling not in {"keep", "drop", "unstack"}:
         raise ValueError("coord_handling must be one of 'keep', 'drop', or 'unstack'.")
+    return coord_handling
 def _unstack_coordinates(xr_obj, dim, prefix, suffix):
@@ -161,6 +164,8 @@ def unstack_datarray_dimension(da, dim, coord_handling="keep", prefix="", suffix
     """
     # Retrieve DataArray name
     name = da.name
+    coord_handling = _check_coord_handling(coord_handling)
     # Unstack variables
     ds = da.to_dataset(dim=dim)
     rename_dict = {dim_value: f"{prefix}{name}{suffix}{dim_value}" for dim_value in list(ds.data_vars)}
@@ -246,13 +251,9 @@ def define_fill_value_dictionary(xr_obj):
 def remove_diameter_coordinates(xr_obj):
     """Drop diameter coordinates from xarray object."""
-    from disdrodb import DIAMETER_COORDS
     return xr_obj.drop_vars(DIAMETER_COORDS, errors="ignore")
 def remove_velocity_coordinates(xr_obj):
     """Drop velocity coordinates from xarray object."""
-    from disdrodb import VELOCITY_COORDS
     return xr_obj.drop_vars(VELOCITY_COORDS, errors="ignore")

disdrodb/viz/__init__.py CHANGED Viewed

@@ -15,3 +15,16 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
 """DISDRODB Visualization Module."""
+from disdrodb.viz.plots import (
+    compute_dense_lines,
+    max_blend_images,
+    plot_nd,
+    to_rgba,
+)
+__all__ = [
+    "compute_dense_lines",
+    "max_blend_images",
+    "plot_nd",
+    "to_rgba",
+]

disdrodb 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

disdrodb 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl