PyPI - disdrodb - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

disdrodb 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

disdrodb/__init__.py +4 -0
disdrodb/_version.py +2 -2
disdrodb/api/checks.py +70 -47
disdrodb/api/configs.py +0 -2
disdrodb/api/create_directories.py +0 -2
disdrodb/api/info.py +3 -3
disdrodb/api/io.py +48 -8
disdrodb/api/path.py +116 -133
disdrodb/api/search.py +12 -3
disdrodb/cli/disdrodb_create_summary.py +113 -0
disdrodb/cli/disdrodb_create_summary_station.py +11 -1
disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
disdrodb/cli/disdrodb_run_l1_station.py +2 -2
disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
disdrodb/constants.py +1 -1
disdrodb/data_transfer/download_data.py +123 -7
disdrodb/etc/products/L1/global.yaml +1 -1
disdrodb/etc/products/L2E/5MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +1 -1
disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
disdrodb/etc/products/L2M/global.yaml +11 -3
disdrodb/issue/writer.py +2 -0
disdrodb/l0/check_configs.py +49 -16
disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
disdrodb/l0/l0a_processing.py +10 -5
disdrodb/l0/l0b_nc_processing.py +10 -6
disdrodb/l0/l0b_processing.py +92 -72
disdrodb/l0/l0c_processing.py +369 -251
disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
disdrodb/l1/beard_model.py +31 -129
disdrodb/l1/fall_velocity.py +156 -57
disdrodb/l1/filters.py +25 -28
disdrodb/l1/processing.py +12 -14
disdrodb/l1_env/routines.py +46 -17
disdrodb/l2/empirical_dsd.py +6 -0
disdrodb/l2/processing.py +3 -3
disdrodb/metadata/checks.py +132 -125
disdrodb/metadata/geolocation.py +0 -2
disdrodb/psd/fitting.py +180 -210
disdrodb/psd/models.py +1 -1
disdrodb/routines/__init__.py +54 -0
disdrodb/{l0/routines.py → routines/l0.py} +288 -418
disdrodb/{l1/routines.py → routines/l1.py} +60 -92
disdrodb/{l2/routines.py → routines/l2.py} +284 -485
disdrodb/{routines.py → routines/wrappers.py} +100 -7
disdrodb/scattering/axis_ratio.py +95 -85
disdrodb/scattering/permittivity.py +24 -0
disdrodb/scattering/routines.py +56 -36
disdrodb/summary/routines.py +147 -45
disdrodb/utils/archiving.py +434 -0
disdrodb/utils/attrs.py +2 -0
disdrodb/utils/cli.py +5 -5
disdrodb/utils/dask.py +62 -1
disdrodb/utils/decorators.py +31 -0
disdrodb/utils/encoding.py +10 -1
disdrodb/{l2 → utils}/event.py +1 -66
disdrodb/utils/logger.py +1 -1
disdrodb/utils/manipulations.py +22 -12
disdrodb/utils/routines.py +166 -0
disdrodb/utils/time.py +5 -293
disdrodb/utils/xarray.py +3 -0
disdrodb/viz/plots.py +109 -15
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0

disdrodb/{l2 → utils}/event.py RENAMED Viewed

@@ -19,8 +19,7 @@
 import numpy as np
 import pandas as pd
-from disdrodb.api.info import get_start_end_time_from_filepaths
-from disdrodb.utils.time import ensure_timedelta_seconds_interval, temporal_resolution_to_seconds
+from disdrodb.utils.time import temporal_resolution_to_seconds
 def group_timesteps_into_event(
@@ -229,67 +228,3 @@ def group_timesteps_into_events(timesteps, event_max_time_gap):
 ####-----------------------------------------------------------------------------------.
-def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling):  # noqa: ARG001
-    """
-    Provide information about the required files for each event.
-    For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
-    overlap with the event period, adjusted by the `accumulation_interval`. The event period is
-    extended backward or forward based on the `rolling` parameter.
-    Parameters
-    ----------
-    list_partitions : list of dict
-        List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
-        keys with `numpy.datetime64` values.
-    filepaths : list of str
-        List of file paths corresponding to data files.
-    sample_interval : numpy.timedelta64 or int
-        The sample interval of the input dataset.
-    accumulation_interval : numpy.timedelta64 or int
-        Time interval to adjust the event period for accumulation. If an integer is provided, it is
-        assumed to be in seconds.
-    rolling : bool
-        If True, adjust the event period backward by `accumulation_interval` (rolling backward).
-        If False, adjust forward (aggregate forward).
-    Returns
-    -------
-    list of dict
-        A list where each element is a dictionary containing:
-        - 'start_time': Adjusted start time of the event (`numpy.datetime64`).
-        - 'end_time': Adjusted end time of the event (`numpy.datetime64`).
-        - 'filepaths': List of file paths overlapping with the adjusted event period.
-    """
-    # Ensure sample_interval and accumulation_interval is numpy.timedelta64
-    accumulation_interval = ensure_timedelta_seconds_interval(accumulation_interval)
-    sample_interval = ensure_timedelta_seconds_interval(sample_interval)
-    # Retrieve file start_time and end_time
-    files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
-    # Retrieve information for each event
-    event_info = []
-    for event_dict in list_partitions:
-        # Retrieve event time period
-        event_start_time = event_dict["start_time"]
-        event_end_time = event_dict["end_time"]
-        # Adapt event_end_time if accumulation interval different from sample interval
-        if sample_interval != accumulation_interval:
-            event_end_time = event_end_time + accumulation_interval
-        # Derive event filepaths
-        overlaps = (files_start_time <= event_end_time) & (files_end_time >= event_start_time)
-        event_filepaths = np.array(filepaths)[overlaps].tolist()
-        # Create dictionary
-        if len(event_filepaths) > 0:
-            event_info.append(
-                {"start_time": event_start_time, "end_time": event_end_time, "filepaths": event_filepaths},
-            )
-    return event_info

disdrodb/utils/logger.py CHANGED Viewed

@@ -42,7 +42,7 @@ def create_logger_file(logs_dir, filename, parallel):
     format_type = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
     handler.setFormatter(logging.Formatter(format_type))
     logger.addHandler(handler)
-    logger.setLevel(logging.DEBUG)
+    logger.setLevel(logging.INFO)
     # Define logger filepath
     # - LogCaptureHandler of pytest does not have baseFilename attribute --> So set None

disdrodb/utils/manipulations.py CHANGED Viewed

@@ -20,6 +20,7 @@
 import numpy as np
+from disdrodb.constants import DIAMETER_DIMENSION
 from disdrodb.utils.xarray import unstack_datarray_dimension
@@ -53,19 +54,28 @@ def unstack_radar_variables(ds):
     return ds
-def resample_drop_number_concentration(da, diameter_bin_edges, method="linear"):
-    """Resample drop number concentration N(D) DataArray to high resolution diameter bins."""
-    diameters_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
-    da = da.interp(coords={"diameter_bin_center": diameters_bin_center}, method=method)
+def get_diameter_coords_dict_from_bin_edges(diameter_bin_edges):
+    """Get dictionary with all relevant diameter coordinates."""
+    if np.size(diameter_bin_edges) < 2:
+        raise ValueError("Expecting at least 2 values defining bin edges.")
+    diameter_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
     diameter_bin_width = np.diff(diameter_bin_edges)
     diameter_bin_lower = diameter_bin_edges[:-1]
     diameter_bin_upper = diameter_bin_edges[1:]
-    da = da.assign_coords(
-        {
-            "diameter_bin_width": ("diameter_bin_center", diameter_bin_width),
-            "diameter_bin_lower": ("diameter_bin_center", diameter_bin_lower),
-            "diameter_bin_upper": ("diameter_bin_center", diameter_bin_upper),
-        },
-    )
+    coords_dict = {
+        "diameter_bin_center": (DIAMETER_DIMENSION, diameter_bin_center),
+        "diameter_bin_width": (DIAMETER_DIMENSION, diameter_bin_width),
+        "diameter_bin_lower": (DIAMETER_DIMENSION, diameter_bin_lower),
+        "diameter_bin_upper": (DIAMETER_DIMENSION, diameter_bin_upper),
+    }
+    return coords_dict
+def resample_drop_number_concentration(drop_number_concentration, diameter_bin_edges, method="linear"):
+    """Resample drop number concentration N(D) DataArray to high resolution diameter bins."""
+    diameters_bin_center = diameter_bin_edges[:-1] + np.diff(diameter_bin_edges) / 2
+    da = drop_number_concentration.interp(coords={"diameter_bin_center": diameters_bin_center}, method=method)
+    coords_dict = get_diameter_coords_dict_from_bin_edges(diameter_bin_edges)
+    da = da.assign_coords(coords_dict)
     return da

disdrodb/utils/routines.py ADDED Viewed

@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------.
+"""Utilities for DISDRODB processing routines."""
+import os
+import shutil
+import tempfile
+from disdrodb.api.io import find_files
+from disdrodb.api.path import define_file_folder_path, define_temporal_resolution
+from disdrodb.utils.logger import (
+    close_logger,
+    create_logger_file,
+    log_error,
+    log_info,
+)
+def is_possible_product(accumulation_interval, sample_interval, rolling):
+    """Assess if production is possible given the requested accumulation interval and source sample_interval."""
+    # Avoid rolling product generation at source sample interval
+    if rolling and accumulation_interval == sample_interval:
+        return False
+    # Avoid product generation if the accumulation_interval is less than the sample interval
+    if accumulation_interval < sample_interval:
+        return False
+    # Avoid producti generation if accumulation_interval is not multiple of sample_interval
+    return accumulation_interval % sample_interval == 0
+def try_get_required_filepaths(
+    product,
+    data_archive_dir,
+    data_source,
+    campaign_name,
+    station_name,
+    debugging_mode,
+    **product_kwargs,
+):
+    """Try to retrieve required filepaths for a product, or return None if unavailable."""
+    try:
+        filepaths = find_files(
+            data_archive_dir=data_archive_dir,
+            data_source=data_source,
+            campaign_name=campaign_name,
+            station_name=station_name,
+            product=product,
+            debugging_mode=debugging_mode,
+            **product_kwargs,
+        )
+        return filepaths
+    # If no files available, print informative message
+    except Exception as e:
+        temporal_resolution = ""
+        if "sample_interval" in product_kwargs and "rolling" in product_kwargs:
+            temporal_resolution = define_temporal_resolution(
+                seconds=product_kwargs["sample_interval"],
+                rolling=product_kwargs["rolling"],
+            )
+        print(str(e))
+        msg = (
+            f"{product} processing of {data_source} {campaign_name} {station_name} "
+            f"has not been launched because of missing {product} {temporal_resolution} data."
+        )
+        print(msg)
+        return None
+def run_product_generation(
+    product: str,
+    logs_dir: str,
+    logs_filename: str,
+    parallel: bool,
+    verbose: bool,
+    folder_partitioning: str,
+    core_func: callable,
+    core_func_kwargs: dict,
+    pass_logger=False,
+):
+    """
+    Generic wrapper for DISDRODB product generation.
+    Parameters
+    ----------
+    product : str
+        Product name (e.g., "L0A", "L0B", ...).
+    logs_dir : str
+        Logs directory.
+    logs_filename : str
+        Logs filename.
+    parallel : bool
+        Parallel flag (for logger).
+    verbose : bool
+        Verbose logging flag.
+    folder_partitioning : str
+        Partitioning scheme.
+    core_func : callable
+        Function with signature `core_func(logger)` that does the product-specific work.
+        Must return an xarray.Dataset or pandas.DataFrame (used to determine log subdir).
+    """
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # Initialize log file
+        logger, tmp_logger_filepath = create_logger_file(
+            logs_dir=tmpdir,
+            filename=logs_filename,
+            parallel=parallel,
+        )
+        # Inform that product creation has started
+        log_info(logger, f"{product} processing of {logs_filename} has started.", verbose=verbose)
+        # Initialize object
+        obj = None  # if None, means the product creation failed
+        # Add logger to core_func_kwargs if specified
+        if pass_logger:
+            core_func_kwargs["logger"] = logger
+        # Try product creation
+        try:
+            # Run product creation
+            obj = core_func(**core_func_kwargs)
+            # Inform that product creation has ended
+            log_info(logger, f"{product} processing of {logs_filename} has ended.", verbose=verbose)
+        # Report error if the case
+        except Exception as e:
+            log_error(logger, f"{type(e).__name__}: {e}", verbose=verbose)
+        finally:
+            # Close logger
+            close_logger(logger)
+            # Move log file to final logs directory
+            success_flag = obj is not None
+            if success_flag:  # and "time" in obj and len(obj["time"]) > 0:
+                logs_dir = define_file_folder_path(obj, dir_path=logs_dir, folder_partitioning=folder_partitioning)
+            os.makedirs(logs_dir, exist_ok=True)
+            if tmp_logger_filepath is not None:  # (when running pytest, tmp_logger_filepath is None)
+                logger_filepath = os.path.join(logs_dir, os.path.basename(tmp_logger_filepath))
+                shutil.move(tmp_logger_filepath, logger_filepath)
+            else:
+                logger_filepath = None
+            # Free memory
+            del obj
+    # Return logger filepath
+    return logger_filepath

disdrodb/utils/time.py CHANGED Viewed

@@ -29,6 +29,7 @@ from disdrodb.utils.xarray import define_fill_value_dictionary
 logger = logging.getLogger(__name__)
 ####------------------------------------------------------------------------------------.
 #### Sampling Interval Acronyms
@@ -61,7 +62,7 @@ def seconds_to_temporal_resolution(seconds):
     return temporal_resolution
-def get_resampling_information(temporal_resolution):
+def get_sampling_information(temporal_resolution):
     """
     Extract resampling information from the temporal_resolution string.
@@ -126,7 +127,7 @@ def temporal_resolution_to_seconds(temporal_resolution):
     seconds
         Duration in seconds.
     """
-    seconds, _ = get_resampling_information(temporal_resolution)
+    seconds, _ = get_sampling_information(temporal_resolution)
     return seconds
@@ -401,8 +402,8 @@ def ensure_sample_interval_in_seconds(sample_interval):  # noqa: PLR0911
     )
-def ensure_timedelta_seconds_interval(interval):
-    """Return interval as numpy.timedelta64 in seconds."""
+def ensure_timedelta_seconds(interval):
+    """Return an a scalar value/array in seconds or timedelta object as numpy.timedelta64 in seconds."""
     if isinstance(interval, (xr.DataArray, np.ndarray)):
         return ensure_sample_interval_in_seconds(interval).astype("m8[s]")
     return np.array(ensure_sample_interval_in_seconds(interval), dtype="m8[s]")
@@ -512,292 +513,3 @@ def infer_sample_interval(ds, robust=False, verbose=False, logger=None):
         )
         log_warning(logger=logger, msg=msg, verbose=verbose)
     return int(sample_interval)
-####---------------------------------------------------------------------------------
-#### Timesteps regularization
-def get_problematic_timestep_indices(timesteps, sample_interval):
-    """Identify timesteps with missing previous or following timesteps."""
-    previous_time = timesteps - pd.Timedelta(seconds=sample_interval)
-    next_time = timesteps + pd.Timedelta(seconds=sample_interval)
-    idx_previous_missing = np.where(~np.isin(previous_time, timesteps))[0][1:]
-    idx_next_missing = np.where(~np.isin(next_time, timesteps))[0][:-1]
-    idx_isolated_missing = np.intersect1d(idx_previous_missing, idx_next_missing)
-    idx_previous_missing = idx_previous_missing[np.isin(idx_previous_missing, idx_isolated_missing, invert=True)]
-    idx_next_missing = idx_next_missing[np.isin(idx_next_missing, idx_isolated_missing, invert=True)]
-    return idx_previous_missing, idx_next_missing, idx_isolated_missing
-def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=True, logger=None, verbose=True):
-    """Ensure timesteps match with the sample_interval.
-    This function:
-    - drop dataset indices with duplicated timesteps,
-    - but does not add missing timesteps to the dataset.
-    """
-    # Check sorted by time and sort if necessary
-    ds = ensure_sorted_by_time(ds)
-    # Convert time to pandas.DatetimeIndex for easier manipulation
-    times = pd.to_datetime(ds["time"].to_numpy())
-    # Determine the start and end times
-    start_time = times[0].floor(f"{sample_interval}s")
-    end_time = times[-1].ceil(f"{sample_interval}s")
-    # Create the expected time grid
-    expected_times = pd.date_range(start=start_time, end=end_time, freq=f"{sample_interval}s")
-    # Convert to numpy arrays
-    times = times.to_numpy(dtype="M8[s]")
-    expected_times = expected_times.to_numpy(dtype="M8[s]")
-    # Map original times to the nearest expected times
-    # Calculate the difference between original times and expected times
-    time_deltas = np.abs(times - expected_times[:, None]).astype(int)
-    # Find the index of the closest expected time for each original time
-    nearest_indices = np.argmin(time_deltas, axis=0)
-    adjusted_times = expected_times[nearest_indices]
-    # Check for duplicates in adjusted times
-    unique_times, counts = np.unique(adjusted_times, return_counts=True)
-    duplicates = unique_times[counts > 1]
-    # Initialize time quality flag
-    # - 0 when ok or just rounded to closest 00
-    # - 1 if previous timestep is missing
-    # - 2 if next timestep is missing
-    # - 3 if previous and next timestep is missing
-    # - 4 if solved duplicated timesteps
-    # - 5 if needed to drop duplicated timesteps and select the last
-    flag_previous_missing = 1
-    flag_next_missing = 2
-    flag_isolated_timestep = 3
-    flag_solved_duplicated_timestep = 4
-    flag_dropped_duplicated_timestep = 5
-    qc_flag = np.zeros(adjusted_times.shape)
-    # Initialize list with the duplicated timesteps index to drop
-    # - We drop the first occurrence because is likely the shortest interval
-    idx_to_drop = []
-    # Attempt to resolve for duplicates
-    if duplicates.size > 0:
-        # Handle duplicates
-        for dup_time in duplicates:
-            # Indices of duplicates
-            dup_indices = np.where(adjusted_times == dup_time)[0]
-            n_duplicates = len(dup_indices)
-            # Define previous and following timestep
-            prev_time = dup_time - pd.Timedelta(seconds=sample_interval)
-            next_time = dup_time + pd.Timedelta(seconds=sample_interval)
-            # Try to find missing slots before and after
-            # - If more than 3 duplicates, impossible to solve !
-            count_solved = 0
-            # If the previous timestep is available, set that one
-            if n_duplicates == 2:
-                if prev_time not in adjusted_times:
-                    adjusted_times[dup_indices[0]] = prev_time
-                    qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
-                    count_solved += 1
-                elif next_time not in adjusted_times:
-                    adjusted_times[dup_indices[-1]] = next_time
-                    qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
-                    count_solved += 1
-                else:
-                    pass
-            elif n_duplicates == 3:
-                if prev_time not in adjusted_times:
-                    adjusted_times[dup_indices[0]] = prev_time
-                    qc_flag[dup_indices[0]] = flag_solved_duplicated_timestep
-                    count_solved += 1
-                if next_time not in adjusted_times:
-                    adjusted_times[dup_indices[-1]] = next_time
-                    qc_flag[dup_indices[-1]] = flag_solved_duplicated_timestep
-                    count_solved += 1
-            if count_solved != n_duplicates - 1:
-                idx_to_drop = np.append(idx_to_drop, dup_indices[0:-1])
-                qc_flag[dup_indices[-1]] = flag_dropped_duplicated_timestep
-                msg = (
-                    f"Cannot resolve {n_duplicates} duplicated timesteps "
-                    f"(after trailing seconds correction) around {dup_time}."
-                )
-                log_warning(logger=logger, msg=msg, verbose=verbose)
-                if robust:
-                    raise ValueError(msg)
-    # Update the time coordinate (Convert to ns for xarray compatibility)
-    ds = ds.assign_coords({"time": adjusted_times.astype("datetime64[ns]")})
-    # Update quality flag values for next and previous timestep is missing
-    if add_quality_flag:
-        idx_previous_missing, idx_next_missing, idx_isolated_missing = get_problematic_timestep_indices(
-            adjusted_times,
-            sample_interval,
-        )
-        qc_flag[idx_previous_missing] = np.maximum(qc_flag[idx_previous_missing], flag_previous_missing)
-        qc_flag[idx_next_missing] = np.maximum(qc_flag[idx_next_missing], flag_next_missing)
-        qc_flag[idx_isolated_missing] = np.maximum(qc_flag[idx_isolated_missing], flag_isolated_timestep)
-        # If the first timestep is at 00:00 and currently flagged as previous missing (1), reset to 0
-        # first_time = pd.to_datetime(adjusted_times[0]).time()
-        # first_expected_time = pd.Timestamp("00:00:00").time()
-        # if first_time == first_expected_time and qc_flag[0] == flag_previous_missing:
-        #     qc_flag[0] = 0
-        # # If the last timestep is flagged and currently flagged as next missing (2), reset it to 0
-        # last_time = pd.to_datetime(adjusted_times[-1]).time()
-        # last_time_expected = (pd.Timestamp("00:00:00") - pd.Timedelta(30, unit="seconds")).time()
-        # # Check if adding one interval would go beyond the end_time
-        # if last_time == last_time_expected and qc_flag[-1] == flag_next_missing:
-        #     qc_flag[-1] = 0
-        # Assign time quality flag coordinate
-        ds["time_qc"] = xr.DataArray(qc_flag, dims="time")
-        ds = ds.set_coords("time_qc")
-    # Drop duplicated timesteps
-    # - Using ds =  ds.drop_isel({"time": idx_to_drop.astype(int)}) raise:
-    #   --> pandas.errors.InvalidIndexError: Reindexing only valid with uniquely valued Index objects
-    #   --> https://github.com/pydata/xarray/issues/6605
-    if len(idx_to_drop) > 0:
-        idx_to_drop = idx_to_drop.astype(int)
-        idx_valid_timesteps = np.arange(0, ds["time"].size)
-        idx_valid_timesteps = np.delete(idx_valid_timesteps, idx_to_drop)
-        ds = ds.isel(time=idx_valid_timesteps)
-    # Return dataset
-    return ds
-####---------------------------------------------------------------------------------
-#### Time blocks
-def check_freq(freq: str) -> None:
-    """Check validity of freq argument."""
-    valid_freq = ["none", "year", "season", "quarter", "month", "day", "hour"]
-    if not isinstance(freq, str):
-        raise TypeError("'freq' must be a string.")
-    if freq not in valid_freq:
-        raise ValueError(
-            f"'freq' '{freq}' is not possible. Must be one of: {valid_freq}.",
-        )
-    return freq
-def generate_time_blocks(start_time: np.datetime64, end_time: np.datetime64, freq: str) -> np.ndarray:  # noqa: PLR0911
-    """Generate time blocks between `start_time` and `end_time` for a given frequency.
-    Parameters
-    ----------
-    start_time : numpy.datetime64
-        Inclusive start of the overall time range.
-    end_time : numpy.datetime64
-        Inclusive end of the overall time range.
-    freq : str
-        Frequency specifier. Accepted values are:
-        - 'none'    : return a single block [start_time, end_time]
-        - 'day'     : split into daily blocks
-        - 'month'   : split into calendar months
-        - 'quarter' : split into calendar quarters
-        - 'year'    : split into calendar years
-        - 'season'  : split into meteorological seasons (MAM, JJA, SON, DJF)
-    Returns
-    -------
-    numpy.ndarray
-        Array of shape (n, 2) with dtype datetime64[s], where each row is [block_start, block_end].
-    """
-    freq = check_freq(freq)
-    if freq == "none":
-        return np.array([[start_time, end_time]], dtype="datetime64[s]")
-    if freq == "hour":
-        periods = pd.period_range(start=start_time, end=end_time, freq="h")
-        blocks = np.array(
-            [
-                [
-                    period.start_time.to_datetime64().astype("datetime64[s]"),
-                    period.end_time.to_datetime64().astype("datetime64[s]"),
-                ]
-                for period in periods
-            ],
-            dtype="datetime64[s]",
-        )
-        return blocks
-    if freq == "day":
-        periods = pd.period_range(start=start_time, end=end_time, freq="d")
-        blocks = np.array(
-            [
-                [
-                    period.start_time.to_datetime64().astype("datetime64[s]"),
-                    period.end_time.to_datetime64().astype("datetime64[s]"),
-                ]
-                for period in periods
-            ],
-            dtype="datetime64[s]",
-        )
-        return blocks
-    if freq == "month":
-        periods = pd.period_range(start=start_time, end=end_time, freq="M")
-        blocks = np.array(
-            [
-                [
-                    period.start_time.to_datetime64().astype("datetime64[s]"),
-                    period.end_time.to_datetime64().astype("datetime64[s]"),
-                ]
-                for period in periods
-            ],
-            dtype="datetime64[s]",
-        )
-        return blocks
-    if freq == "year":
-        periods = pd.period_range(start=start_time, end=end_time, freq="Y")
-        blocks = np.array(
-            [
-                [
-                    period.start_time.to_datetime64().astype("datetime64[s]"),
-                    period.end_time.to_datetime64().astype("datetime64[s]"),
-                ]
-                for period in periods
-            ],
-            dtype="datetime64[s]",
-        )
-        return blocks
-    if freq == "quarter":
-        periods = pd.period_range(start=start_time, end=end_time, freq="Q")
-        blocks = np.array(
-            [
-                [
-                    period.start_time.to_datetime64().astype("datetime64[s]"),
-                    period.end_time.floor("s").to_datetime64().astype("datetime64[s]"),
-                ]
-                for period in periods
-            ],
-            dtype="datetime64[s]",
-        )
-        return blocks
-    if freq == "season":
-        # Fiscal quarter frequency ending in Feb → seasons DJF, MAM, JJA, SON
-        periods = pd.period_range(start=start_time, end=end_time, freq="Q-FEB")
-        blocks = np.array(
-            [
-                [
-                    period.start_time.to_datetime64().astype("datetime64[s]"),
-                    period.end_time.to_datetime64().astype("datetime64[s]"),
-                ]
-                for period in periods
-            ],
-            dtype="datetime64[s]",
-        )
-        return blocks
-    raise NotImplementedError(f"Frequency '{freq}' is not implemented.")

disdrodb/utils/xarray.py CHANGED Viewed

@@ -106,6 +106,7 @@ def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
 def _check_coord_handling(coord_handling):
     if coord_handling not in {"keep", "drop", "unstack"}:
         raise ValueError("coord_handling must be one of 'keep', 'drop', or 'unstack'.")
+    return coord_handling
 def _unstack_coordinates(xr_obj, dim, prefix, suffix):
@@ -163,6 +164,8 @@ def unstack_datarray_dimension(da, dim, coord_handling="keep", prefix="", suffix
     """
     # Retrieve DataArray name
     name = da.name
+    coord_handling = _check_coord_handling(coord_handling)
     # Unstack variables
     ds = da.to_dataset(dim=dim)
     rename_dict = {dim_value: f"{prefix}{name}{suffix}{dim_value}" for dim_value in list(ds.data_vars)}

disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

disdrodb 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl