PyPI - disdrodb - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

disdrodb 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

disdrodb/__init__.py +4 -0
disdrodb/_version.py +2 -2
disdrodb/api/checks.py +70 -47
disdrodb/api/configs.py +0 -2
disdrodb/api/create_directories.py +0 -2
disdrodb/api/info.py +3 -3
disdrodb/api/io.py +48 -8
disdrodb/api/path.py +116 -133
disdrodb/api/search.py +12 -3
disdrodb/cli/disdrodb_create_summary.py +113 -0
disdrodb/cli/disdrodb_create_summary_station.py +11 -1
disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
disdrodb/cli/disdrodb_run_l1_station.py +2 -2
disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
disdrodb/constants.py +1 -1
disdrodb/data_transfer/download_data.py +123 -7
disdrodb/etc/products/L1/global.yaml +1 -1
disdrodb/etc/products/L2E/5MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +1 -1
disdrodb/etc/products/L2M/GAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/GAMMA_ML.yaml +1 -1
disdrodb/etc/products/L2M/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/LOGNORMAL_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/LOGNORMAL_ML.yaml +8 -0
disdrodb/etc/products/L2M/global.yaml +11 -3
disdrodb/issue/writer.py +2 -0
disdrodb/l0/check_configs.py +49 -16
disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
disdrodb/l0/l0a_processing.py +10 -5
disdrodb/l0/l0b_nc_processing.py +10 -6
disdrodb/l0/l0b_processing.py +92 -72
disdrodb/l0/l0c_processing.py +369 -251
disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +8 -1
disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +5 -14
disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PANGASA.py +232 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +120 -0
disdrodb/l0/readers/PARSIVEL2/USA/C3WE.py +7 -25
disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
disdrodb/l1/beard_model.py +31 -129
disdrodb/l1/fall_velocity.py +156 -57
disdrodb/l1/filters.py +25 -28
disdrodb/l1/processing.py +12 -14
disdrodb/l1_env/routines.py +46 -17
disdrodb/l2/empirical_dsd.py +6 -0
disdrodb/l2/processing.py +3 -3
disdrodb/metadata/checks.py +132 -125
disdrodb/metadata/geolocation.py +0 -2
disdrodb/psd/fitting.py +180 -210
disdrodb/psd/models.py +1 -1
disdrodb/routines/__init__.py +54 -0
disdrodb/{l0/routines.py → routines/l0.py} +288 -418
disdrodb/{l1/routines.py → routines/l1.py} +60 -92
disdrodb/{l2/routines.py → routines/l2.py} +284 -485
disdrodb/{routines.py → routines/wrappers.py} +100 -7
disdrodb/scattering/axis_ratio.py +95 -85
disdrodb/scattering/permittivity.py +24 -0
disdrodb/scattering/routines.py +56 -36
disdrodb/summary/routines.py +147 -45
disdrodb/utils/archiving.py +434 -0
disdrodb/utils/attrs.py +2 -0
disdrodb/utils/cli.py +5 -5
disdrodb/utils/dask.py +62 -1
disdrodb/utils/decorators.py +31 -0
disdrodb/utils/encoding.py +10 -1
disdrodb/{l2 → utils}/event.py +1 -66
disdrodb/utils/logger.py +1 -1
disdrodb/utils/manipulations.py +22 -12
disdrodb/utils/routines.py +166 -0
disdrodb/utils/time.py +5 -293
disdrodb/utils/xarray.py +3 -0
disdrodb/viz/plots.py +109 -15
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/METADATA +3 -2
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/RECORD +124 -96
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/entry_points.txt +1 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/WHEEL +0 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.5.dist-info}/top_level.txt +0 -0

disdrodb/{l2/routines.py → routines/l2.py} RENAMED Viewed

@@ -21,12 +21,9 @@ import datetime
 import json
 import logging
 import os
-import shutil
 import time
 from typing import Optional
-import dask
-import numpy as np
 import pandas as pd
 from disdrodb.api.checks import check_station_inputs
@@ -34,8 +31,8 @@ from disdrodb.api.create_directories import (
     create_logs_directory,
     create_product_directory,
 )
-from disdrodb.api.info import get_start_end_time_from_filepaths, group_filepaths
-from disdrodb.api.io import find_files, open_netcdf_files
+from disdrodb.api.info import group_filepaths
+from disdrodb.api.io import open_netcdf_files
 from disdrodb.api.path import (
     define_file_folder_path,
     define_l2e_filename,
@@ -51,29 +48,31 @@ from disdrodb.configs import (
     get_product_temporal_resolutions,
 )
 from disdrodb.l1.resampling import resample_dataset
-from disdrodb.l2.event import get_files_partitions, group_timesteps_into_event
 from disdrodb.l2.processing import (
     generate_l2_radar,
     generate_l2e,
     generate_l2m,
 )
 from disdrodb.metadata import read_station_metadata
+from disdrodb.scattering.routines import precompute_scattering_tables
+from disdrodb.utils.archiving import define_temporal_partitions, get_files_partitions
+from disdrodb.utils.dask import execute_tasks_safely
 from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
 from disdrodb.utils.list import flatten_list
 # Logger
 from disdrodb.utils.logger import (
-    close_logger,
-    create_logger_file,
     create_product_logs,
-    log_error,
     log_info,
 )
+from disdrodb.utils.routines import (
+    is_possible_product,
+    run_product_generation,
+    try_get_required_filepaths,
+)
 from disdrodb.utils.time import (
     ensure_sample_interval_in_seconds,
-    ensure_sorted_by_time,
-    generate_time_blocks,
-    get_resampling_information,
+    get_sampling_information,
 )
 from disdrodb.utils.writer import write_product
@@ -81,210 +80,13 @@ logger = logging.getLogger(__name__)
 ####----------------------------------------------------------------------------.
-def identify_events(
-    filepaths,
-    parallel=False,
-    min_drops=5,
-    neighbor_min_size=2,
-    neighbor_time_interval="5MIN",
-    event_max_time_gap="6H",
-    event_min_duration="5MIN",
-    event_min_size=3,
-):
-    """Return a list of rainy events.
-    Rainy timesteps are defined when N > min_drops.
-    Any rainy isolated timesteps (based on neighborhood criteria) is removed.
-    Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
-    exceed `event_max_time_gap`. Finally, events that do not meet minimum size or duration
-    requirements are filtered out.
-    Parameters
-    ----------
-    filepaths: list
-        List of L1C file paths.
-    parallel: bool
-        Whether to load the files in parallel.
-        Set parallel=True only in a multiprocessing environment.
-        The default is False.
-    neighbor_time_interval : str
-        The time interval around a given a timestep defining the neighborhood.
-        Only timesteps that fall within this time interval before or after a timestep are considered neighbors.
-    neighbor_min_size : int, optional
-        The minimum number of neighboring timesteps required within `neighbor_time_interval` for a
-        timestep to be considered non-isolated.  Isolated timesteps are removed !
-        - If `neighbor_min_size=0,  then no timestep is considered isolated and no filtering occurs.
-        - If `neighbor_min_size=1`, the timestep must have at least one neighbor within `neighbor_time_interval`.
-        - If `neighbor_min_size=2`, the timestep must have at least two timesteps within `neighbor_time_interval`.
-        Defaults to 1.
-    event_max_time_gap: str
-        The maximum time interval between two timesteps to be considered part of the same event.
-        This parameters is used to group timesteps into events !
-    event_min_duration : str
-        The minimum duration an event must span. Events shorter than this duration are discarded.
-    event_min_size : int, optional
-        The minimum number of valid timesteps required for an event. Defaults to 1.
-    Returns
-    -------
-    list of dict
-        A list of events, where each event is represented as a dictionary with keys:
-        - "start_time": np.datetime64, start time of the event
-        - "end_time": np.datetime64, end time of the event
-        - "duration": np.timedelta64, duration of the event
-        - "n_timesteps": int, number of valid timesteps in the event
-    """
-    # Open datasets in parallel
-    ds = open_netcdf_files(filepaths, variables=["time", "N"], parallel=parallel)
-    # Sort dataset by time
-    ds = ensure_sorted_by_time(ds)
-    # Define candidate timesteps to group into events
-    idx_valid = ds["N"].data > min_drops
-    timesteps = ds["time"].data[idx_valid]
-    # Define event list
-    event_list = group_timesteps_into_event(
-        timesteps=timesteps,
-        neighbor_min_size=neighbor_min_size,
-        neighbor_time_interval=neighbor_time_interval,
-        event_max_time_gap=event_max_time_gap,
-        event_min_duration=event_min_duration,
-        event_min_size=event_min_size,
-    )
-    return event_list
-def identify_time_partitions(filepaths: list[str], freq: str) -> list[dict]:
-    """Identify the set of time blocks covered by files.
-    The result is a minimal, sorted, and unique set of time partitions.
-    Parameters
-    ----------
-    filepaths : list of str
-        Paths to input files from which start and end times will be extracted
-        via `get_start_end_time_from_filepaths`.
-    freq : {'none', 'hour', 'day', 'month', 'quarter', 'season', 'year'}
-        Frequency determining the granularity of candidate blocks.
-        See `generate_time_blocks` for more details.
-    Returns
-    -------
-    list of dict
-        A list of dictionaries, each containing:
-        - `start_time` (numpy.datetime64[s])
-            Inclusive start of a time block.
-        - `end_time` (numpy.datetime64[s])
-            Inclusive end of a time block.
-        Only those blocks that overlap at least one file's interval are returned.
-        The list is sorted by `start_time` and contains no duplicate blocks.
-    """
-    # Define file start time and end time
-    start_times, end_times = get_start_end_time_from_filepaths(filepaths)
-    # Define files time coverage
-    start_time, end_time = start_times.min(), end_times.max()
-    # Compute candidate time blocks
-    blocks = generate_time_blocks(start_time, end_time, freq=freq)  # end_time non inclusive is correct?
-    # Select time blocks with files
-    mask = (blocks[:, 0][:, None] <= end_times) & (blocks[:, 1][:, None] >= start_times)
-    blocks = blocks[mask.any(axis=1)]
-    # Ensure sorted unique time blocks
-    order = np.argsort(blocks[:, 0])
-    blocks = np.unique(blocks[order], axis=0)
-    # Convert to list of dicts
-    list_time_blocks = [{"start_time": start_time, "end_time": end_time} for start_time, end_time in blocks]
-    return list_time_blocks
-def is_possible_product(accumulation_interval, sample_interval, rolling):
-    """Assess if production is possible given the requested accumulation interval and source sample_interval."""
-    # Avoid rolling product generation at source sample interval
-    if rolling and accumulation_interval == sample_interval:
-        return False
-    # Avoid product generation if the accumulation_interval is less than the sample interval
-    if accumulation_interval < sample_interval:
-        return False
-    # Avoid producti generation if accumulation_interval is not multiple of sample_interval
-    return accumulation_interval % sample_interval == 0
-def define_temporal_partitions(filepaths, strategy, parallel, strategy_options):
-    """Define temporal file processing partitions.
-    Parameters
-    ----------
-    filepaths : list
-        List of files paths to be processed
-    strategy : str
-        Which partitioning strategy to apply:
-        - ``'time_block'`` defines fixed time intervals (e.g. monthly) covering input files.
-        - ``'event'`` detect clusters of precipitation ("events").
-    parallel : bool
-         If True, parallel data loading is used to identify events.
-    strategy_options : dict
-        Dictionary with strategy-specific parameters:
-        If ``strategy == 'time_block'``, supported options are:
-        - ``freq``: Time unit for blocks. One of {'year', 'season', 'month', 'day'}.
-        See identify_time_partitions for more information.
-        If ``strategy == 'event'``, supported options are:
-        - ``min_drops`` : int
-          Minimum number of drops to consider a timestep.
-        - ``neighbor_min_size`` : int
-          Minimum cluster size for merging neighboring events.
-        - ``neighbor_time_interval`` : str
-          Time window (e.g. "5MIN") to merge adjacent clusters.
-        - ``event_max_time_gap`` : str
-          Maximum allowed gap (e.g. "6H") within a single event.
-        - ``event_min_duration`` : str
-          Minimum total duration (e.g. "5MIN") of an event.
-        - ``event_min_size`` : int
-          Minimum number of records in an event.
-        See identify_events for more information.
-    Returns
-    -------
-    list
-        A list of dictionaries, each containing:
-        - ``start_time`` (numpy.datetime64[s])
-            Inclusive start of an event or time block.
-        - ``end_time`` (numpy.datetime64[s])
-            Inclusive end of an event or time block.
-    Notes
-    -----
-    - The ``'event'`` strategy requires loading data into memory to identify clusters.
-    - The ``'time_block'`` strategy can operate on metadata alone, without full data loading.
-    - The ``'event'`` strategy implicitly performs data selection on which files to process !
-    - The ``'time_block'`` strategy does not performs data selection on which files to process !
-    """
-    if strategy not in ["time_block", "event"]:
-        raise ValueError(f"Unknown strategy: {strategy!r}. Must be 'time_block' or 'event'.")
-    if strategy == "event":
-        return identify_events(filepaths, parallel=parallel, **strategy_options)
-    return identify_time_partitions(filepaths, **strategy_options)
 class ProcessingOptions:
     """Define L2 products processing options."""
+    # TODO: TO MOVE ELSEWHERE (AFTER L1 REFACTORING !)
     def __init__(self, product, filepaths, parallel, temporal_resolutions=None):
         """Define L2 products processing options."""
         import disdrodb
@@ -319,10 +121,10 @@ class ProcessingOptions:
             # -------------------------------------------------------------------------.
             # Retrieve product options
-            product_options = dict_product_options[temporal_resolution]
+            product_options = dict_product_options[temporal_resolution].copy()
             # Retrieve accumulation_interval and rolling option
-            accumulation_interval, rolling = get_resampling_information(temporal_resolution)
+            accumulation_interval, rolling = get_sampling_information(temporal_resolution)
             # Extract processing options
             archive_options = product_options.pop("archive_options")
@@ -337,7 +139,7 @@ class ProcessingOptions:
             # -------------------------------------------------------------------------.
             # Define list of temporal partitions
-            # - [{start_time:xxx, end_time: xxx}, ....]
+            # - [{start_time: np.datetime64, end_time: np.datetime64}, ....]
             # - Either strategy: "event" or "time_block" or save_by_time_block"
             # - "event" requires loading data into memory to identify events
             #   --> Does some data filtering on what to process !
@@ -362,6 +164,7 @@ class ProcessingOptions:
             #   some data after the actual event end_time to ensure that the resampled dataset
             #   contains the event_end_time
             #   --> get_files_partitions adjust the event end_time to accounts for the required "border" data.
+            # - ATTENTION: get_files_partitions returns start_time and end_time as datetime objects !
             files_partitions = [
                 get_files_partitions(
                     list_partitions=list_partitions,
@@ -410,45 +213,19 @@ class ProcessingOptions:
         return self.dict_folder_partitioning[temporal_resolution]
-def precompute_scattering_tables(
-    frequency,
-    num_points,
-    diameter_max,
-    canting_angle_std,
-    axis_ratio_model,
-    permittivity_model,
-    water_temperature,
-    elevation_angle,
-    verbose=True,
-):
-    """Precompute the pyTMatrix scattering tables required for radar variables simulations."""
-    from disdrodb.scattering.routines import get_list_simulations_params, load_scatterer
-    # Define parameters for all requested simulations
-    list_params = get_list_simulations_params(
-        frequency=frequency,
-        num_points=num_points,
-        diameter_max=diameter_max,
-        canting_angle_std=canting_angle_std,
-        axis_ratio_model=axis_ratio_model,
-        permittivity_model=permittivity_model,
-        water_temperature=water_temperature,
-        elevation_angle=elevation_angle,
-    )
-    # Compute require scattering tables
-    for params in list_params:
-        # Initialize scattering table
-        _ = load_scatterer(
-            verbose=verbose,
-            **params,
-        )
 ####----------------------------------------------------------------------------.
 #### L2E
+def define_l2e_logs_filename(campaign_name, station_name, start_time, end_time, accumulation_interval, rolling):
+    """Define L2E logs filename."""
+    temporal_resolution = define_temporal_resolution(seconds=accumulation_interval, rolling=rolling)
+    starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
+    ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
+    logs_filename = f"L2E.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
+    return logs_filename
 @delayed_if_parallel
 @single_threaded_if_parallel
 def _generate_l2e(
@@ -457,6 +234,7 @@ def _generate_l2e(
     filepaths,
     data_dir,
     logs_dir,
+    logs_filename,
     folder_partitioning,
     campaign_name,
     station_name,
@@ -469,42 +247,42 @@ def _generate_l2e(
     verbose,
     parallel,  # this is used by the decorator and to initialize correctly the logger !
 ):
-    # -----------------------------------------------------------------.
-    # Define product name
+    """Generate the L2E product from the DISDRODB L1 netCDF file."""
+    # Define product
     product = "L2E"
-    # Copy to avoid in-place replacement (outside this function)
-    product_options = product_options.copy()
-    # -----------------------------------------------------------------.
-    # Create file logger
-    temporal_resolution = define_temporal_resolution(seconds=accumulation_interval, rolling=rolling)
-    starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
-    ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
-    expected_filename = f"L2E.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
-    logger, logger_filepath = create_logger_file(
-        logs_dir=logs_dir,
-        filename=expected_filename,
-        parallel=parallel,
-    )
-    ##------------------------------------------------------------------------.
-    # Log start processing
-    msg = f"{product} creation of {expected_filename} has started."
-    log_info(logger=logger, msg=msg, verbose=verbose)
-    success_flag = False
-    ##------------------------------------------------------------------------.
-    ### Core computation
-    try:
-        # ------------------------------------------------------------------------.
-        #### Open the dataset over the period of interest
+    # Define product processing function
+    def core(
+        filepaths,
+        campaign_name,
+        station_name,
+        product_options,
+        # Processing options
+        logger,
+        parallel,
+        verbose,
+        force,
+        # Resampling arguments
+        start_time,
+        end_time,
+        accumulation_interval,
+        rolling,
+        # Archiving arguments
+        data_dir,
+        folder_partitioning,
+    ):
+        """Define L1 product processing."""
+        # Copy to avoid in-place replacement (outside this function)
+        product_options = product_options.copy()
+        # Open the dataset over the period of interest
         ds = open_netcdf_files(filepaths, start_time=start_time, end_time=end_time, parallel=False)
+        ds = ds.load()
+        ds.close()
-        ##------------------------------------------------------------------------.
-        #### Resample dataset
-        # Define sample interval in seconds
+        # Resample dataset # TODO: in future to perform in L1
+        # - Define sample interval in seconds
         sample_interval = ensure_sample_interval_in_seconds(ds["sample_interval"]).to_numpy().item()
         # - Resample dataset
         ds = resample_dataset(
             ds=ds,
@@ -518,71 +296,71 @@ def _generate_l2e(
         radar_enabled = product_options.get("radar_enabled")
         radar_options = product_options.get("radar_options")
-        ##------------------------------------------------------------------------.
-        #### Generate L2E product
-        # - Only if at least 2 timesteps available
-        if ds["time"].size > 2:
-            # Compute L2E variables
-            ds = generate_l2e(ds=ds, **l2e_options)
-            # Simulate L2M-based radar variables if asked
-            if radar_enabled:
-                ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_options)
-                ds.update(ds_radar)
-                ds.attrs = ds_radar.attrs.copy()
-            # Write netCDF4 dataset
-            if ds["time"].size > 1:
-                # Define filepath
-                filename = define_l2e_filename(
-                    ds,
-                    campaign_name=campaign_name,
-                    station_name=station_name,
-                    sample_interval=accumulation_interval,
-                    rolling=rolling,
-                )
-                folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
-                filepath = os.path.join(folder_path, filename)
-                # Write file
-                write_product(ds, filepath=filepath, force=force)
-                # Update log
-                log_info(logger=logger, msg=f"{product} creation of {filename} has ended.", verbose=verbose)
-            else:
-                log_info(logger=logger, msg="File not created. Less than one timesteps available.", verbose=verbose)
-        else:
+        # Ensure at least 2 timestep available
+        if ds["time"].size < 2:
             log_info(logger=logger, msg="File not created. Less than two timesteps available.", verbose=verbose)
+            return None
-        ##--------------------------------------------------------------------.
-        #### Define logger file final directory
-        if folder_partitioning != "":
-            log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
-            os.makedirs(log_dst_dir, exist_ok=True)
-        ##--------------------------------------------------------------------.
-        # Clean environment
-        del ds
+        # Compute L2E variables
+        ds = generate_l2e(ds=ds, **l2e_options)
-        success_flag = True
+        # Ensure at least 2 timestep available
+        if ds["time"].size < 2:
+            log_info(logger=logger, msg="File not created. Less than two timesteps available.", verbose=verbose)
+            return None
-    ##--------------------------------------------------------------------.
-    # Otherwise log the error
-    except Exception as e:
-        error_type = str(type(e).__name__)
-        msg = f"{error_type}: {e}"
-        log_error(logger, msg, verbose=verbose)
+        # Simulate L2M-based radar variables if asked
+        if radar_enabled:
+            ds_radar = generate_l2_radar(ds, parallel=not parallel, **radar_options)
+            ds.update(ds_radar)
+            ds.attrs = ds_radar.attrs.copy()
-    # Close the file logger
-    close_logger(logger)
+        # Write L2E netCDF4 dataset
+        filename = define_l2e_filename(
+            ds,
+            campaign_name=campaign_name,
+            station_name=station_name,
+            sample_interval=accumulation_interval,
+            rolling=rolling,
+        )
+        folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
+        filepath = os.path.join(folder_path, filename)
+        write_product(ds, filepath=filepath, force=force)
-    # Move logger file to correct partitioning directory
-    if success_flag and folder_partitioning != "" and logger_filepath is not None:
-        # Move logger file to correct partitioning directory
-        dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
-        shutil.move(logger_filepath, dst_filepath)
-        logger_filepath = dst_filepath
+        # Return L2E dataset
+        return ds
+    # Define product processing function kwargs
+    core_func_kwargs = dict(  # noqa: C408
+        filepaths=filepaths,
+        campaign_name=campaign_name,
+        station_name=station_name,
+        product_options=product_options,
+        # Resampling arguments
+        start_time=start_time,
+        end_time=end_time,
+        accumulation_interval=accumulation_interval,
+        rolling=rolling,
+        # Archiving arguments
+        data_dir=data_dir,
+        folder_partitioning=folder_partitioning,
+        # Processing options
+        parallel=parallel,
+        verbose=verbose,
+        force=force,
+    )
+    # Run product generation
+    logger_filepath = run_product_generation(
+        product=product,
+        logs_dir=logs_dir,
+        logs_filename=logs_filename,
+        parallel=parallel,
+        verbose=verbose,
+        folder_partitioning=folder_partitioning,
+        core_func=core,
+        core_func_kwargs=core_func_kwargs,
+        pass_logger=True,
+    )
     # Return the logger file path
     return logger_filepath
@@ -672,33 +450,22 @@ def run_l2e_station(
         log_info(logger=logger, msg=msg, verbose=verbose)
     # -------------------------------------------------------------------------.
-    # List L1 files to process
+    # List files to process
+    # - If no data available, print error message and return None
     required_product = get_required_product(product)
-    flag_not_available_data = False
-    try:
-        filepaths = find_files(
-            data_archive_dir=data_archive_dir,
-            data_source=data_source,
-            campaign_name=campaign_name,
-            station_name=station_name,
-            product=required_product,
-            # Processing options
-            debugging_mode=debugging_mode,
-        )
-    except Exception as e:
-        print(str(e))  # Case where no file paths available
-        flag_not_available_data = True
-    # -------------------------------------------------------------------------.
-    # If no data available, print error message and return None
-    if flag_not_available_data:
-        msg = (
-            f"{product} processing of {data_source} {campaign_name} {station_name} "
-            + f"has not been launched because of missing {required_product} data."
-        )
-        print(msg)
+    filepaths = try_get_required_filepaths(
+        data_archive_dir=data_archive_dir,
+        data_source=data_source,
+        campaign_name=campaign_name,
+        station_name=station_name,
+        product=required_product,
+        # Processing options
+        debugging_mode=debugging_mode,
+    )
+    if filepaths is None:
         return
+    # -------------------------------------------------------------------------.
     # Retrieve L2E processing options
     l2e_processing_options = ProcessingOptions(product="L2E", filepaths=filepaths, parallel=parallel)
@@ -725,7 +492,7 @@ def run_l2e_station(
         product_options = l2e_processing_options.get_product_options(temporal_resolution)
         # Retrieve accumulation_interval and rolling option
-        accumulation_interval, rolling = get_resampling_information(temporal_resolution)
+        accumulation_interval, rolling = get_sampling_information(temporal_resolution)
         # Precompute required scattering tables
         if product_options["radar_enabled"]:
@@ -770,6 +537,14 @@ def run_l2e_station(
                 filepaths=event_info["filepaths"],
                 data_dir=data_dir,
                 logs_dir=logs_dir,
+                logs_filename=define_l2e_logs_filename(
+                    campaign_name=campaign_name,
+                    station_name=station_name,
+                    start_time=event_info["start_time"],
+                    end_time=event_info["end_time"],
+                    rolling=rolling,
+                    accumulation_interval=accumulation_interval,
+                ),
                 folder_partitioning=folder_partitioning,
                 campaign_name=campaign_name,
                 station_name=station_name,
@@ -784,7 +559,7 @@ def run_l2e_station(
             )
             for event_info in files_partitions
         ]
-        list_logs = dask.compute(*list_tasks) if parallel else list_tasks
+        list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
         # -----------------------------------------------------------------.
         # Define product summary logs
@@ -811,6 +586,15 @@ def run_l2e_station(
 ####----------------------------------------------------------------------------.
 #### L2M
+def define_l2m_logs_filename(campaign_name, station_name, start_time, end_time, model_name, sample_interval, rolling):
+    """Define L2M logs filename."""
+    temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
+    starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
+    ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
+    logs_filename = (
+        f"L2M_{model_name}.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
+    )
+    return logs_filename
 @delayed_if_parallel
@@ -821,6 +605,7 @@ def _generate_l2m(
     filepaths,
     data_dir,
     logs_dir,
+    logs_filename,
     folder_partitioning,
     campaign_name,
     station_name,
@@ -834,34 +619,34 @@ def _generate_l2m(
     verbose,
     parallel,  # this is used only to initialize the correct logger !
 ):
-    # -----------------------------------------------------------------.
-    # Define product name
+    """Generate the L2M product from a DISDRODB L2E netCDF file."""
+    # Define product
     product = "L2M"
-    # Copy to avoid in-place replacement (outside this function)
-    product_options = product_options.copy()
-    # -----------------------------------------------------------------.
-    # Create file logger
-    temporal_resolution = define_temporal_resolution(seconds=sample_interval, rolling=rolling)
-    starting_time = pd.to_datetime(start_time).strftime("%Y%m%d%H%M%S")
-    ending_time = pd.to_datetime(end_time).strftime("%Y%m%d%H%M%S")
-    filename = f"L2M_{model_name}.{temporal_resolution}.{campaign_name}.{station_name}.s{starting_time}.e{ending_time}"
-    logger, logger_filepath = create_logger_file(
-        logs_dir=logs_dir,
-        filename=filename,
-        parallel=parallel,
-    )
-    ##------------------------------------------------------------------------.
-    # Log start processing
-    msg = f"{product} creation of {filename} has started."
-    log_info(logger=logger, msg=msg, verbose=verbose)
-    success_flag = False
+    # Define product processing function
+    def core(
+        start_time,
+        end_time,
+        filepaths,
+        campaign_name,
+        station_name,
+        # Processing options
+        logger,
+        verbose,
+        force,
+        # Product options
+        product_options,
+        sample_interval,
+        rolling,
+        model_name,
+        # Archiving arguments
+        data_dir,
+        folder_partitioning,
+    ):
+        """Define L1 product processing."""
+        # Copy to avoid in-place replacement (outside this function)
+        product_options = product_options.copy()
-    ##------------------------------------------------------------------------
-    ### Core computation
-    try:
         ##------------------------------------------------------------------------.
         # Extract L2M processing options
         l2m_options = product_options.get("product_options")
@@ -870,7 +655,10 @@ def _generate_l2m(
         # Define variables to load
         optimization_kwargs = l2m_options["optimization_kwargs"]
-        if "init_method" in optimization_kwargs:
+        if "init_method" in optimization_kwargs and optimization_kwargs["init_method"] is None:
+            optimization_kwargs["init_method"] = "None"
+        if optimization_kwargs.get("init_method", "None") != "None":
             init_method = optimization_kwargs["init_method"]
             moments = [f"M{order}" for order in init_method.replace("M", "")] + ["M1"]
         else:
@@ -887,8 +675,10 @@ def _generate_l2m(
         ]
         ##------------------------------------------------------------------------.
-        # Open the raw netCDF
+        # Open the netCDF files
         ds = open_netcdf_files(filepaths, start_time=start_time, end_time=end_time, variables=variables)
+        ds = ds.load()
+        ds.close()
         # Produce L2M dataset
         ds = generate_l2m(
@@ -902,54 +692,58 @@ def _generate_l2m(
             ds.update(ds_radar)
             ds.attrs = ds_radar.attrs.copy()  # ds_radar contains already all L2M attrs
-        # Write L2M netCDF4 dataset
-        if ds["time"].size > 1:
-            # Define filepath
-            filename = define_l2m_filename(
-                ds,
-                campaign_name=campaign_name,
-                station_name=station_name,
-                sample_interval=sample_interval,
-                rolling=rolling,
-                model_name=model_name,
-            )
-            folder_path = define_file_folder_path(ds, data_dir=data_dir, folder_partitioning=folder_partitioning)
-            filepath = os.path.join(folder_path, filename)
-            # Write to disk
-            write_product(ds, filepath=filepath, force=force)
-        ##--------------------------------------------------------------------.
-        #### - Define logger file final directory
-        if folder_partitioning != "":
-            log_dst_dir = define_file_folder_path(ds, data_dir=logs_dir, folder_partitioning=folder_partitioning)
-            os.makedirs(log_dst_dir, exist_ok=True)
-        ##--------------------------------------------------------------------.
-        # Clean environment
-        del ds
-        # Log end processing
-        msg = f"{product} creation of {filename} has ended."
-        log_info(logger=logger, msg=msg, verbose=verbose)
-        success_flag = True
-    ##--------------------------------------------------------------------.
-    # Otherwise log the error
-    except Exception as e:
-        error_type = str(type(e).__name__)
-        msg = f"{error_type}: {e}"
-        log_error(logger, msg, verbose=verbose)
-    # Close the file logger
-    close_logger(logger)
-    # Move logger file to correct partitioning directory
-    if success_flag and folder_partitioning != "" and logger_filepath is not None:
-        # Move logger file to correct partitioning directory
-        dst_filepath = os.path.join(log_dst_dir, os.path.basename(logger_filepath))
-        shutil.move(logger_filepath, dst_filepath)
-        logger_filepath = dst_filepath
+        # Ensure at least 2 timestep available
+        if ds["time"].size < 2:
+            log_info(logger=logger, msg="File not created. Less than two timesteps available.", verbose=verbose)
+            return None
+        # Write L2M netCDF4 dataset
+        filename = define_l2m_filename(
+            ds,
+            campaign_name=campaign_name,
+            station_name=station_name,
+            sample_interval=sample_interval,
+            rolling=rolling,
+            model_name=model_name,
+        )
+        folder_path = define_file_folder_path(ds, dir_path=data_dir, folder_partitioning=folder_partitioning)
+        filepath = os.path.join(folder_path, filename)
+        write_product(ds, filepath=filepath, force=force)
+        # Return L2M dataset
+        return ds
+    # Define product processing function kwargs
+    core_func_kwargs = dict(  # noqa: C408
+        filepaths=filepaths,
+        start_time=start_time,
+        end_time=end_time,
+        campaign_name=campaign_name,
+        station_name=station_name,
+        # Processing options
+        verbose=verbose,
+        force=force,
+        # Product options
+        product_options=product_options,
+        sample_interval=sample_interval,
+        rolling=rolling,
+        model_name=model_name,
+        # Archiving arguments
+        data_dir=data_dir,
+        folder_partitioning=folder_partitioning,
+    )
+    # Run product generation
+    logger_filepath = run_product_generation(
+        product=product,
+        logs_dir=logs_dir,
+        logs_filename=logs_filename,
+        parallel=parallel,
+        verbose=verbose,
+        folder_partitioning=folder_partitioning,
+        core_func=core,
+        core_func_kwargs=core_func_kwargs,
+        pass_logger=True,
+    )
     # Return the logger file path
     return logger_filepath
@@ -1045,11 +839,10 @@ def run_l2m_station(
     # temporal_resolution = "1MIN"
     # temporal_resolution = "10MIN"
     temporal_resolutions = get_product_temporal_resolutions("L2M")
-    print(temporal_resolutions)
     for temporal_resolution in temporal_resolutions:
         # Retrieve accumulation_interval and rolling option
-        accumulation_interval, rolling = get_resampling_information(temporal_resolution)
+        accumulation_interval, rolling = get_sampling_information(temporal_resolution)
         # ------------------------------------------------------------------.
         # Avoid generation of rolling products for source sample interval !
@@ -1062,33 +855,21 @@ def run_l2m_station(
         # -----------------------------------------------------------------.
         # List files to process
+        # - If no data available, print error message and try with other L2E accumulation intervals
         required_product = get_required_product(product)
-        flag_not_available_data = False
-        try:
-            filepaths = find_files(
-                data_archive_dir=data_archive_dir,
-                # Station arguments
-                data_source=data_source,
-                campaign_name=campaign_name,
-                station_name=station_name,
-                # Product options
-                product=required_product,
-                sample_interval=accumulation_interval,
-                rolling=rolling,
-                # Processing options
-                debugging_mode=debugging_mode,
-            )
-        except Exception as e:
-            print(str(e))  # Case where no file paths available
-            flag_not_available_data = True
-        # If no data available, try with other L2E accumulation intervals
-        if flag_not_available_data:
-            msg = (
-                f"{product} processing of {data_source} {campaign_name} {station_name} "
-                + f"has not been launched because of missing {required_product} {temporal_resolution} data."
-            )
-            log_info(logger=logger, msg=msg, verbose=verbose)
+        filepaths = try_get_required_filepaths(
+            data_archive_dir=data_archive_dir,
+            data_source=data_source,
+            campaign_name=campaign_name,
+            station_name=station_name,
+            product=required_product,
+            # Processing options
+            debugging_mode=debugging_mode,
+            # Product options
+            sample_interval=accumulation_interval,
+            rolling=rolling,
+        )
+        if filepaths is None:
             continue
         # -------------------------------------------------------------------------.
@@ -1120,6 +901,7 @@ def run_l2m_station(
         # -----------------------------------------------------------------.
         # Loop over distributions to fit
         # model_name = "GAMMA_ML"
+        # model_name = "LOGNORMAL_GS_ND_MAE"
         # model_options =  l2m_options["models"][model_name]
         # Retrieve list of models to fit
         models = global_product_options.pop("models")
@@ -1146,23 +928,31 @@ def run_l2m_station(
             # -------------------------------------------------------------.
             # Create product directory
-            data_dir = create_product_directory(
-                # DISDRODB root directories
-                data_archive_dir=data_archive_dir,
-                metadata_archive_dir=metadata_archive_dir,
-                # Station arguments
-                data_source=data_source,
-                campaign_name=campaign_name,
-                station_name=station_name,
-                # Processing options
-                product=product,
-                force=force,
-                # Option for L2E
-                sample_interval=accumulation_interval,
-                rolling=rolling,
-                # Option for L2M
-                model_name=model_name,
-            )
+            try:
+                data_dir = create_product_directory(
+                    # DISDRODB root directories
+                    data_archive_dir=data_archive_dir,
+                    metadata_archive_dir=metadata_archive_dir,
+                    # Station arguments
+                    data_source=data_source,
+                    campaign_name=campaign_name,
+                    station_name=station_name,
+                    # Processing options
+                    product=product,
+                    force=force,
+                    # Option for L2E
+                    sample_interval=accumulation_interval,
+                    rolling=rolling,
+                    # Option for L2M
+                    model_name=model_name,
+                )
+            except Exception:
+                msg = (
+                    f"Production of L2M_{model_name} for sample interval {accumulation_interval} s has been "
+                    + "skipped because the product already exists and force=False."
+                )
+                log_info(logger=logger, msg=msg, verbose=verbose)
+                continue
             # Define logs directory
             logs_dir = create_logs_directory(
@@ -1189,6 +979,15 @@ def run_l2m_station(
                     filepaths=event_info["filepaths"],
                     data_dir=data_dir,
                     logs_dir=logs_dir,
+                    logs_filename=define_l2m_logs_filename(
+                        campaign_name=campaign_name,
+                        station_name=station_name,
+                        start_time=event_info["start_time"],
+                        end_time=event_info["end_time"],
+                        model_name=model_name,
+                        sample_interval=accumulation_interval,
+                        rolling=rolling,
+                    ),
                     folder_partitioning=folder_partitioning,
                     campaign_name=campaign_name,
                     station_name=station_name,
@@ -1204,7 +1003,7 @@ def run_l2m_station(
                 )
                 for event_info in files_partitions
             ]
-            list_logs = dask.compute(*list_tasks) if parallel else list_tasks
+            list_logs = execute_tasks_safely(list_tasks=list_tasks, parallel=parallel, logs_dir=logs_dir)
             # -----------------------------------------------------------------.
             # Define L2M summary logs
@@ -1218,7 +1017,7 @@ def run_l2m_station(
                 data_archive_dir=data_archive_dir,
                 # Product options
                 model_name=model_name,
-                sample_interval=sample_interval,
+                sample_interval=accumulation_interval,
                 rolling=rolling,
                 # Logs list
                 list_logs=list_logs,

disdrodb 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

disdrodb 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl