PyPI - disdrodb - Versions diffs - 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

disdrodb 0.1.5py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

disdrodb/__init__.py +1 -5
disdrodb/_version.py +2 -2
disdrodb/accessor/methods.py +22 -4
disdrodb/api/checks.py +10 -0
disdrodb/api/io.py +20 -18
disdrodb/api/path.py +42 -77
disdrodb/api/search.py +89 -23
disdrodb/cli/disdrodb_create_summary.py +1 -1
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0a.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l1.py +1 -1
disdrodb/cli/disdrodb_run_l2e.py +1 -1
disdrodb/cli/disdrodb_run_l2m.py +1 -1
disdrodb/configs.py +30 -83
disdrodb/constants.py +4 -3
disdrodb/data_transfer/download_data.py +4 -2
disdrodb/docs.py +2 -2
disdrodb/etc/products/L1/1MIN.yaml +13 -0
disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
disdrodb/etc/products/L1/global.yaml +6 -0
disdrodb/etc/products/L2E/10MIN.yaml +1 -12
disdrodb/etc/products/L2E/global.yaml +1 -1
disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +1 -1
disdrodb/issue/checks.py +2 -2
disdrodb/l0/check_configs.py +1 -1
disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
disdrodb/l0/l0_reader.py +2 -2
disdrodb/l0/l0a_processing.py +6 -2
disdrodb/l0/l0b_processing.py +26 -19
disdrodb/l0/l0c_processing.py +17 -3
disdrodb/l0/manuals/LPM_V0.pdf +0 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
disdrodb/l0/readers/{PARSIVEL2 → PARSIVEL}/NASA/LPVEX.py +16 -28
disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +3 -3
disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -1
disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/{GPM/GCPEX.py → NORWAY/UIB.py} +54 -29
disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/{PANGASA.py → PAGASA.py} +6 -3
disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -1
disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
disdrodb/l0/readers/{PARSIVEL/GPM/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +51 -24
disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
disdrodb/l1/beard_model.py +45 -1
disdrodb/l1/fall_velocity.py +1 -6
disdrodb/l1/filters.py +2 -0
disdrodb/l1/processing.py +6 -5
disdrodb/l1/resampling.py +101 -38
disdrodb/l2/empirical_dsd.py +12 -8
disdrodb/l2/processing.py +4 -3
disdrodb/metadata/search.py +3 -4
disdrodb/routines/l0.py +4 -4
disdrodb/routines/l1.py +173 -60
disdrodb/routines/l2.py +121 -269
disdrodb/routines/options.py +347 -0
disdrodb/routines/wrappers.py +9 -1
disdrodb/scattering/axis_ratio.py +3 -0
disdrodb/scattering/routines.py +1 -1
disdrodb/summary/routines.py +765 -724
disdrodb/utils/archiving.py +51 -44
disdrodb/utils/attrs.py +1 -1
disdrodb/utils/compression.py +4 -2
disdrodb/utils/dask.py +35 -15
disdrodb/utils/dict.py +33 -0
disdrodb/utils/encoding.py +1 -1
disdrodb/utils/manipulations.py +7 -1
disdrodb/utils/routines.py +9 -8
disdrodb/utils/time.py +9 -1
disdrodb/viz/__init__.py +0 -13
disdrodb/viz/plots.py +209 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/RECORD +124 -95
disdrodb/l0/readers/PARSIVEL/GPM/LPVEX.py +0 -85
/disdrodb/etc/products/L2M/{GAMMA_GS_ND_MAE.yaml → MODELS/GAMMA_GS_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +0 -0
/disdrodb/etc/products/L2M/{LOGNORMAL_GS_LOG_ND_MAE.yaml → MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{LOGNORMAL_GS_ND_MAE.yaml → MODELS/LOGNORMAL_GS_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{LOGNORMAL_ML.yaml → MODELS/LOGNORMAL_ML.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
/disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0

disdrodb/utils/archiving.py CHANGED Viewed

@@ -23,10 +23,7 @@ import pandas as pd
 from disdrodb.api.info import get_start_end_time_from_filepaths
 from disdrodb.api.io import open_netcdf_files
 from disdrodb.utils.event import group_timesteps_into_event
-from disdrodb.utils.time import (
-    ensure_sorted_by_time,
-    ensure_timedelta_seconds,
-)
+from disdrodb.utils.time import ensure_sorted_by_time, temporal_resolution_to_seconds
 ####---------------------------------------------------------------------------------
 #### Time blocks
@@ -140,6 +137,7 @@ def identify_events(
     neighbor_time_interval : str
         The time interval around a given a timestep defining the neighborhood.
         Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
+        The neighbor_time_interval must be at least equal to the dataset sampling interval!
     neighbor_min_size : int, optional
         The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
         timestep to be considered non-isolated.  Isolated timesteps are removed !
@@ -171,6 +169,12 @@ def identify_events(
     # Define candidate timesteps to group into events
     idx_valid = ds["N"].to_numpy() > min_drops
     timesteps = ds["time"].to_numpy()[idx_valid]
+    if "sample_interval" in ds:
+        sample_interval = ds["sample_interval"].compute().item()
+        if temporal_resolution_to_seconds(neighbor_time_interval) < sample_interval:
+            msg = "'neighbor_time_interval' must be at least equal to the dataset sample interval ({sample_interval} s)"
+            raise ValueError(msg)
     # Define event list
     event_list = group_timesteps_into_event(
         timesteps=timesteps,
@@ -326,29 +330,32 @@ def _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_star
     return results
-def get_files_partitions(list_partitions, filepaths, sample_interval, accumulation_interval, rolling):  # noqa: ARG001
+def group_files_by_temporal_partitions(
+    temporal_partitions,
+    filepaths,
+    block_starts_offset=0,
+    block_ends_offset=0,
+):
     """
     Provide information about the required files for each event.
-    For each event in `list_partitions`, this function identifies the file paths from `filepaths` that
-    overlap with the event period, adjusted by the `accumulation_interval`. The event period is
-    extended backward or forward based on the `rolling` parameter.
+    For each time block in `temporal_partitions`, the function identifies the `filepaths` that
+    overlap such time period. The time blocks of `temporal_partitions` can be adjusted using
+    block_starts_offset and block_ends_offset e.g. for resampling applications.
     Parameters
     ----------
-    list_partitions : list of dict
-        List of events, where each event is a dictionary containing at least 'start_time' and 'end_time'
+    temporal_partitions : list of dict
+        List of time blocks, where each time blocks is a dictionary containing at least 'start_time' and 'end_time'
         keys with `numpy.datetime64` values.
     filepaths : list of str
         List of file paths corresponding to data files.
-    sample_interval : numpy.timedelta64 or int
-        The sample interval of the input dataset.
-    accumulation_interval : numpy.timedelta64 or int
-        Time interval to adjust the event period for accumulation. If an integer is provided, it is
-        assumed to be in seconds.
-    rolling : bool
-        If True, adjust the event period backward by `accumulation_interval` (rolling backward).
-        If False, adjust forward (aggregate forward).
+    block_starts_offset: int
+        Optional offset (in seconds) to add to time blocks starts.
+        Provide negative offset to go back in time.
+    block_ends_offset: int
+        Optional offset (in seconds) to add to time blocks ends.
+        Provide negative offset to go back in time.
     Returns
     -------
@@ -359,54 +366,54 @@ def get_files_partitions(list_partitions, filepaths, sample_interval, accumulati
         - 'filepaths': List of file paths overlapping with the adjusted event period.
     """
-    if len(filepaths) == 0 or len(list_partitions) == 0:
+    if len(filepaths) == 0 or len(temporal_partitions) == 0:
         return []
-    # Ensure sample_interval and accumulation_interval is numpy.timedelta64
-    accumulation_interval = ensure_timedelta_seconds(accumulation_interval)
-    sample_interval = ensure_timedelta_seconds(sample_interval)
-    # Define offset on event_end_time
-    offset = accumulation_interval if sample_interval != accumulation_interval else ensure_timedelta_seconds(0)
     # Retrieve file start_time and end_time
     files_start_time, files_end_time = get_start_end_time_from_filepaths(filepaths)
     # Retrieve partitions blocks start and end time arrays
-    block_starts = np.array([p["start_time"] for p in list_partitions]).astype("M8[s]")
-    block_ends = np.array([p["end_time"] for p in list_partitions]).astype("M8[s]")
+    block_starts = np.array([p["start_time"] for p in temporal_partitions]).astype("M8[s]")
+    block_ends = np.array([p["end_time"] for p in temporal_partitions]).astype("M8[s]")
-    # Add optional offset for resampling
-    # TODO: expanding partition time should be done only at L1 stage when resampling
-    # In disdrodb, the time reported is time at the start of the accumulation period !
-    # If sensors report time at the end of measurement interval, we might being reporting time
-    #  with an inaccuracy equals to the sensor measurement interval.
-    # We could correct for that at L0C stage already !
-    block_ends = block_ends + offset
+    # Add optional offset to blocks' starts/ends (e.g. for resampling)
+    block_starts = block_starts + block_starts_offset
+    block_ends = block_ends + block_ends_offset
     # Map filepaths to corresponding time blocks
     list_event_info = _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends)
     return list_event_info
-def get_files_per_time_block(filepaths, freq="day", tolerance_seconds=120):
+def group_files_by_time_block(filepaths, freq="day", tolerance_seconds=120):
     """
-    Organize files by the days they cover based on their start and end times.
+    Organize files by time blocks based on their start and end times.
+    If tolerance_seconds is specified, it adds some tolerance to files start and end_time.
+    This means that files starting/ending next to the time blocks boundaries will be included in both
+    time blocks. This can be useful to deal with imprecise time within files.
     Parameters
     ----------
     filepaths : list of str
         List of file paths to be processed.
+    freq: str
+        Frequency of the time block. The default frequency is 'day'.
+    tolerance_seconds: int
+        Tolerance in seconds to subtract/add to files start time and end time.
     Returns
     -------
-    dict
-        Dictionary where keys are days (as strings) and values are lists of file paths
-        that cover those days.
+    list of dict
+        A list where each element is a dictionary containing:
+        - 'start_time': Adjusted start time of the event (`datetime.datetime64`).
+        - 'end_time': Adjusted end time of the event (`datetime.datetime64`).
+        - 'filepaths': List of file paths overlapping with the adjusted event period.
     Notes
     -----
-    This function adds a tolerance of 60 seconds to account for imprecise time logging by the sensors.
+    In the DISDRODB L0C processing chain, a tolerance of 120 seconds is used to account
+    for the possible imprecise/drifting time logged by the sensors before it is corrected.
     """
     # Empty filepaths list return a dictionary
     if len(filepaths) == 0:
@@ -421,13 +428,13 @@ def get_files_per_time_block(filepaths, freq="day", tolerance_seconds=120):
     files_end_time = files_end_time + np.array(tolerance_seconds, dtype="m8[s]")
     # Identify candidate blocks
-    list_partitions = identify_time_partitions(
+    temporal_partitions = identify_time_partitions(
         start_times=files_start_time,
         end_times=files_end_time,
         freq=freq,
     )
-    block_starts = np.array([b["start_time"] for b in list_partitions]).astype("M8[s]")
-    block_ends = np.array([b["end_time"] for b in list_partitions]).astype("M8[s]")
+    block_starts = np.array([b["start_time"] for b in temporal_partitions]).astype("M8[s]")
+    block_ends = np.array([b["end_time"] for b in temporal_partitions]).astype("M8[s]")
     # Map filepaths to corresponding time blocks
     list_event_info = _map_files_to_blocks(files_start_time, files_end_time, filepaths, block_starts, block_ends)

disdrodb/utils/attrs.py CHANGED Viewed

@@ -31,7 +31,7 @@ def get_attrs_dict():
     """Get attributes dictionary for DISDRODB product variables and coordinates."""
     import disdrodb
-    configs_path = os.path.join(disdrodb.__root_path__, "disdrodb", "etc", "configs")
+    configs_path = os.path.join(disdrodb.package_dir, "etc", "configs")
     attrs_dict = read_yaml(os.path.join(configs_path, "attributes.yaml"))
     return attrs_dict

disdrodb/utils/compression.py CHANGED Viewed

@@ -82,7 +82,7 @@ def unzip_file_on_terminal(filepath: str, dest_path: str) -> str:
     subprocess.run(cmd, check=True)
-def _zip_dir(dir_path: str) -> str:
+def _zip_dir(dir_path: str, dst_dir=None) -> str:
     """Zip a directory into a file located in the same directory.
     Parameters
@@ -95,7 +95,9 @@ def _zip_dir(dir_path: str) -> str:
     str
         Path of the zip archive.
     """
-    output_path_without_extension = os.path.join(tempfile.gettempdir(), os.path.basename(dir_path))
+    if dst_dir is None:
+        dst_dir = tempfile.gettempdir()
+    output_path_without_extension = os.path.join(dst_dir, os.path.basename(dir_path))
     output_path = output_path_without_extension + ".zip"
     shutil.make_archive(output_path_without_extension, "zip", dir_path)
     return output_path

disdrodb/utils/dask.py CHANGED Viewed

@@ -113,7 +113,13 @@ def close_dask_cluster(cluster, client):
         logger.setLevel(original_level)
-def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
+def _batch_iterable(iterable, n):
+    """Yield successive n-sized chunks from iterable."""
+    for i in range(0, len(iterable), n):
+        yield iterable[i : i + n]
+def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str, max_tasks_per_batch=5_000):
     """
     Execute Dask tasks and skip failed ones.
@@ -125,6 +131,9 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
         Whether to execute in parallel with Dask or not.
     logs_dir : str
         Directory to store FAILED_TASKS.log.
+    max_tasks_per_batch : int or None, optional
+     Maximum number of tasks to submit to `client.compute()` at once.
+     The default is 5000. Dask struggle if more than 10_000 tasks are submitted.
     Returns
     -------
@@ -134,34 +143,45 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
     """
     from dask.distributed import get_client
+    if not parallel:
+        # Non-parallel mode: just return results directly
+        return list_tasks
     # Ensure logs_dir exists
     os.makedirs(logs_dir, exist_ok=True)
     # Define file name where to log failed dask tasks
     failed_log_path = os.path.join(logs_dir, "FAILED_DASK_TASKS.log")
-    if not parallel:
-        # Non-parallel mode: just return results directly
-        return list_tasks
     # Ensure we have a Dask client
     try:
         client = get_client()
     except ValueError:
         raise ValueError("No Dask Distributed Client found.")
-    # Compute tasks (all concurrently)
-    # - Runs tasks == num_workers * threads_per_worker (which is 1 for DISDRODB)
-    # - If errors occurs in some, skip it
-    futures = client.compute(list_tasks)
-    results = client.gather(futures, errors="skip")
+    all_results = []
+    failed_futures = []
+    # Batch execution
+    task_batches = list(_batch_iterable(list_tasks, max_tasks_per_batch)) if max_tasks_per_batch else [list_tasks]
+    for batch in task_batches:
+        # Compute tasks (all concurrently)
+        # - Runs tasks == num_workers * threads_per_worker (which is 1 for DISDRODB)
+        # - If errors occurs in some, skip it
+        futures = client.compute(batch)
+        results = client.gather(futures, errors="skip")
+        # Identify and collect failed futures
+        batch_failed = [f for f in futures if f.status != "finished"]
+        failed_futures.extend(batch_failed)
-    # Collect failed futures
-    failed_futures = [f for f in futures if f.status != "finished"]  # "error"
+        # Collect results from successful tasks
+        all_results.extend(results)
     # If no tasks failed, return results
     if not failed_futures:
-        return results
+        return all_results
     # Otherwise define log file listing failed tasks
     with open(failed_log_path, "w") as f:
@@ -170,5 +190,5 @@ def execute_tasks_safely(list_tasks, parallel: bool, logs_dir: str):
             f.write(f"ERROR - DASK TASK FAILURE - Task {fut.key} failed: {err}\n")
     # Append to list of log filepaths (results) the dask failing log
-    results.append(failed_log_path)
-    return results
+    all_results.append(failed_log_path)
+    return all_results

disdrodb/utils/dict.py ADDED Viewed

@@ -0,0 +1,33 @@
+# -----------------------------------------------------------------------------.
+# Copyright (c) 2021-2023 DISDRODB developers
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+# -----------------------------------------------------------------------------.
+"""This module contains functions for manipulating dictionaries."""
+def extract_product_kwargs(kwargs, product):
+    """Infer product kwargs dictionary."""
+    from disdrodb.api.checks import check_product
+    from disdrodb.constants import PRODUCTS_ARGUMENTS
+    check_product(product)
+    product_kwargs_keys = set(PRODUCTS_ARGUMENTS.get(product, []))
+    return extract_dictionary(kwargs, keys=product_kwargs_keys)
+def extract_dictionary(dictionary, keys):
+    """Extract a subset of keys from the dictionary, removing them from the input dictionary."""
+    return {k: dictionary.pop(k) for k in keys if k in dictionary}

disdrodb/utils/encoding.py CHANGED Viewed

@@ -31,7 +31,7 @@ def get_encodings_dict():
     """Get encoding dictionary for DISDRODB product variables and coordinates."""
     import disdrodb
-    configs_path = os.path.join(disdrodb.__root_path__, "disdrodb", "etc", "configs")
+    configs_path = os.path.join(disdrodb.package_dir, "etc", "configs")
     encodings_dict = read_yaml(os.path.join(configs_path, "encodings.yaml"))
     return encodings_dict

disdrodb/utils/manipulations.py CHANGED Viewed

@@ -26,7 +26,13 @@ from disdrodb.utils.xarray import unstack_datarray_dimension
 def get_diameter_bin_edges(ds):
     """Retrieve diameter bin edges."""
-    bin_edges = np.append(ds["diameter_bin_lower"].compute().data, ds["diameter_bin_upper"].compute().data[-1])
+    bin_edges = np.append(ds["diameter_bin_lower"].to_numpy(), ds["diameter_bin_upper"].to_numpy()[-1])
+    return bin_edges
+def get_velocity_bin_edges(ds):
+    """Retrieve velocity bin edges."""
+    bin_edges = np.append(ds["velocity_bin_lower"].to_numpy(), ds["velocity_bin_upper"].to_numpy()[-1])
     return bin_edges

disdrodb/utils/routines.py CHANGED Viewed

@@ -22,24 +22,28 @@ import shutil
 import tempfile
 from disdrodb.api.io import find_files
-from disdrodb.api.path import define_file_folder_path, define_temporal_resolution
+from disdrodb.api.path import define_file_folder_path
 from disdrodb.utils.logger import (
     close_logger,
     create_logger_file,
     log_error,
     log_info,
 )
+from disdrodb.utils.time import get_sampling_information
-def is_possible_product(accumulation_interval, sample_interval, rolling):
+def is_possible_product(temporal_resolution, sample_interval):
     """Assess if production is possible given the requested accumulation interval and source sample_interval."""
+    # Retrieve accumulation_interval and rolling option
+    accumulation_interval, rolling = get_sampling_information(temporal_resolution)
     # Avoid rolling product generation at source sample interval
     if rolling and accumulation_interval == sample_interval:
         return False
     # Avoid product generation if the accumulation_interval is less than the sample interval
     if accumulation_interval < sample_interval:
         return False
-    # Avoid producti generation if accumulation_interval is not multiple of sample_interval
+    # Avoid product generation if accumulation_interval is not multiple of sample_interval
     return accumulation_interval % sample_interval == 0
@@ -67,11 +71,8 @@ def try_get_required_filepaths(
     # If no files available, print informative message
     except Exception as e:
         temporal_resolution = ""
-        if "sample_interval" in product_kwargs and "rolling" in product_kwargs:
-            temporal_resolution = define_temporal_resolution(
-                seconds=product_kwargs["sample_interval"],
-                rolling=product_kwargs["rolling"],
-            )
+        if "temporal_resolution" in product_kwargs:
+            temporal_resolution = product_kwargs["temporal_resolution"]
         print(str(e))
         msg = (
             f"{product} processing of {data_source} {campaign_name} {station_name} "

disdrodb/utils/time.py CHANGED Viewed

@@ -235,6 +235,8 @@ def regularize_dataset(
     time_dim: str = "time",
     method: Optional[str] = None,
     fill_value=None,
+    start_time=None,
+    end_time=None,
 ):
     """Regularize a dataset across time dimension with uniform resolution.
@@ -265,7 +267,13 @@ def regularize_dataset(
     """
     attrs = xr_obj.attrs.copy()
     xr_obj = _check_time_sorted(xr_obj, time_dim=time_dim)
-    start_time, end_time = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
+    # Define start time and end_time
+    start, end = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
+    if start_time is None:
+        start_time = start
+    if end_time is None:
+        end_time = end
     # Define new time index
     new_time_index = pd.date_range(

disdrodb/viz/__init__.py CHANGED Viewed

@@ -15,16 +15,3 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
 """DISDRODB Visualization Module."""
-from disdrodb.viz.plots import (
-    compute_dense_lines,
-    max_blend_images,
-    plot_nd,
-    to_rgba,
-)
-__all__ = [
-    "compute_dense_lines",
-    "max_blend_images",
-    "plot_nd",
-    "to_rgba",
-]

disdrodb/viz/plots.py CHANGED Viewed

@@ -20,6 +20,13 @@ import numpy as np
 import psutil
 import xarray as xr
 from matplotlib.colors import LogNorm, Normalize
+from matplotlib.gridspec import GridSpec
+from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
+from disdrodb.l2.empirical_dsd import get_drop_average_velocity
+####-------------------------------------------------------------------------------------------------------
+#### N(D) visualizations
 def _single_plot_nd_distribution(drop_number_concentration, diameter, diameter_bin_width):
@@ -76,6 +83,208 @@ def plot_nd(ds, var="drop_number_concentration", cmap=None, norm=None):
     return p
+####-------------------------------------------------------------------------------------------------------
+#### Spectra visualizations
+def _check_has_diameter_and_velocity_dims(da):
+    if DIAMETER_DIMENSION not in da.dims or VELOCITY_DIMENSION not in da.dims:
+        raise ValueError(f"The DataArray must have both '{DIAMETER_DIMENSION}' and '{VELOCITY_DIMENSION}' dimensions.")
+    return da
+def _get_spectrum_variable(xr_obj, variable):
+    if not isinstance(xr_obj, (xr.Dataset, xr.DataArray)):
+        raise TypeError("Expecting xarray object as input.")
+    if VELOCITY_DIMENSION not in xr_obj.dims:
+        raise ValueError("2D spectrum not available.")
+    if isinstance(xr_obj, xr.Dataset):
+        if variable not in xr_obj:
+            raise ValueError(f"The dataset do not include {variable=}.")
+        xr_obj = xr_obj[variable]
+    xr_obj = _check_has_diameter_and_velocity_dims(xr_obj)
+    return xr_obj
+def plot_spectrum(
+    xr_obj,
+    variable="raw_drop_number",
+    ax=None,
+    cmap=None,
+    norm=None,
+    extend="max",
+    add_colorbar=True,
+    cbar_kwargs=None,
+    title="Drop Spectrum",
+    **plot_kwargs,
+):
+    """Plot the spectrum.
+    Parameters
+    ----------
+    xr_obj : xarray.Dataset or xarray.DataArray
+        Input xarray object. If Dataset, the variable to plot must be specified.
+        If DataArray, it must have both diameter and velocity dimensions.
+    variable : str
+        Name of the variable to plot if xr_obj is a Dataset.
+    ax : matplotlib.axes.Axes, optional
+        Axes to plot on. If None, uses current axes or creates a new one.
+    cmap : Colormap, optional
+        Colormap to use. If None, uses 'Spectral_r' with 'under' set to 'none'.
+    norm : matplotlib.colors.Normalize, optional
+        Normalization for colormap. If None, uses LogNorm with vmin=1.
+    extend : {'neither', 'both', 'min', 'max'}, optional
+        Whether to draw arrows on the colorbar to indicate out-of-range values.
+        Default is 'max'.
+    add_colorbar : bool, optional
+        Whether to add a colorbar. Default is True.
+    cbar_kwargs : dict, optional
+        Additional keyword arguments for colorbar. If None, uses {'label': 'Number of particles '}.
+    title : str, optional
+        Title of the plot. Default is 'Drop Spectrum'.
+    **plot_kwargs : dict
+        Additional keyword arguments passed to xarray's plot.pcolormesh method.
+    Notes
+    -----
+    - If the input DataArray has a time dimension, it is summed over time before plotting
+        unless FacetGrid options (e.g., col, row) are specified in plot_kwargs.
+    - If FacetGrid options are used, the plot will create a grid of subplots for each time slice.
+      To create a FacetGrid plot, use:
+      ds.isel(time=slice(0, 9)).disdrodb.plot_spectrum(col="time", col_wrap=3)
+    """
+    # Retrieve spectrum
+    drop_number = _get_spectrum_variable(xr_obj, variable)
+    # Check if FacetGrid
+    is_facetgrid = "col" in plot_kwargs or "row" in plot_kwargs
+    # Sum over time dimension if still present
+    # - Unless FacetGrid options in plot_kwargs
+    if "time" in drop_number.dims and not is_facetgrid:
+        drop_number = drop_number.sum(dim="time")
+    # Define default cbar_kwargs if not specified
+    if cbar_kwargs is None:
+        cbar_kwargs = {"label": "Number of particles"}
+    # Define cmap and norm
+    if cmap is None:
+        cmap = plt.get_cmap("Spectral_r").copy()
+        cmap.set_under("none")
+    if norm is None:
+        norm = LogNorm(vmin=1, vmax=None) if drop_number.sum() > 0 else None
+    # Remove cbar_kwargs if add_colorbar=False
+    if not add_colorbar:
+        cbar_kwargs = None
+    # Plot
+    p = drop_number.plot.pcolormesh(
+        ax=ax,
+        x=DIAMETER_DIMENSION,
+        y=VELOCITY_DIMENSION,
+        cmap=cmap,
+        extend=extend,
+        norm=norm,
+        add_colorbar=add_colorbar,
+        cbar_kwargs=cbar_kwargs,
+        **plot_kwargs,
+    )
+    if not is_facetgrid:
+        p.axes.set_xlabel("Diamenter [mm]")
+        p.axes.set_ylabel("Fall velocity [m/s]")
+        p.axes.set_title(title)
+    else:
+        p.set_axis_labels("Diameter [mm]", "Fall velocity [m/s]")
+    return p
+def plot_raw_and_filtered_spectra(
+    ds,
+    cmap=None,
+    norm=None,
+    extend="max",
+    add_theoretical_average_velocity=True,
+    add_measured_average_velocity=True,
+    figsize=(8, 4),
+    dpi=300,
+):
+    """Plot raw and filtered drop spectrum."""
+    # Retrieve spectrum arrays
+    drop_number = _get_spectrum_variable(ds, variable="drop_number")
+    if "time" in drop_number.dims:
+        drop_number = drop_number.sum(dim="time")
+    drop_number = drop_number.compute()
+    raw_drop_number = _get_spectrum_variable(ds, variable="raw_drop_number")
+    if "time" in raw_drop_number.dims:
+        raw_drop_number = raw_drop_number.sum(dim="time")
+    raw_drop_number = raw_drop_number.compute()
+    # Compute theoretical and measured average velocity if asked
+    if add_theoretical_average_velocity:
+        theoretical_average_velocity = ds["fall_velocity"]
+        if "time" in theoretical_average_velocity.dims:
+            theoretical_average_velocity = theoretical_average_velocity.mean(dim="time")
+    if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
+        measured_average_velocity = get_drop_average_velocity(drop_number)
+    # Define norm if not specified
+    if norm is None:
+        norm = LogNorm(1, raw_drop_number.max())
+    # Initialize figure
+    fig = plt.figure(figsize=figsize, dpi=dpi)
+    gs = GridSpec(1, 2, width_ratios=[1, 1.15], wspace=0.05)  # More space for ax2
+    ax1 = fig.add_subplot(gs[0])
+    ax2 = fig.add_subplot(gs[1])
+    # Plot raw_drop_number
+    plot_spectrum(raw_drop_number, ax=ax1, cmap=cmap, norm=norm, extend=extend, add_colorbar=False, title="")
+    # Add velocities if asked
+    if add_theoretical_average_velocity:
+        theoretical_average_velocity.plot(ax=ax1, c="k", linestyle="dashed")
+    if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
+        measured_average_velocity.plot(ax=ax1, c="k", linestyle="dotted")
+    # Improve plot appearance
+    ax1.set_xlabel("Diamenter [mm]")
+    ax1.set_ylabel("Fall velocity [m/s]")
+    ax1.set_title("Raw Spectrum")
+    # Plot drop_number
+    plot_spectrum(drop_number, ax=ax2, cmap=cmap, norm=norm, extend=extend, add_colorbar=True, title="")
+    # Add velocities if asked
+    if add_theoretical_average_velocity:
+        theoretical_average_velocity.plot(ax=ax2, c="k", linestyle="dashed", label="Theoretical velocity")
+    if add_measured_average_velocity and VELOCITY_DIMENSION in drop_number.dims:
+        measured_average_velocity.plot(ax=ax2, c="k", linestyle="dotted", label="Measured average velocity")
+    # Improve plot appearance
+    ax2.set_yticks([])
+    ax2.set_yticklabels([])
+    ax2.set_xlabel("Diamenter [mm]")
+    ax2.set_ylabel("")
+    ax2.set_title("Filtered Spectrum")
+    # Add legend
+    if add_theoretical_average_velocity or add_measured_average_velocity:
+        ax2.legend(loc="lower right", frameon=False)
+    return fig
+####-------------------------------------------------------------------------------------------------------
+#### DenseLines
 def normalize_array(arr, method="max"):
     """Normalize a NumPy array according to the chosen method.

disdrodb 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

disdrodb 0.1.5py3-none-any.whl → 0.2.1py3-none-any.whl