PyPI - disdrodb - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

disdrodb 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

disdrodb/__init__.py +4 -0
disdrodb/_version.py +2 -2
disdrodb/api/checks.py +70 -47
disdrodb/api/configs.py +0 -2
disdrodb/api/info.py +3 -3
disdrodb/api/io.py +48 -8
disdrodb/api/path.py +116 -133
disdrodb/api/search.py +12 -3
disdrodb/cli/disdrodb_create_summary.py +103 -0
disdrodb/cli/disdrodb_create_summary_station.py +1 -1
disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
disdrodb/cli/disdrodb_run_l1_station.py +2 -2
disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
disdrodb/data_transfer/download_data.py +123 -7
disdrodb/issue/writer.py +2 -0
disdrodb/l0/l0a_processing.py +10 -5
disdrodb/l0/l0b_nc_processing.py +10 -6
disdrodb/l0/l0b_processing.py +26 -61
disdrodb/l0/l0c_processing.py +369 -251
disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
disdrodb/l1/fall_velocity.py +46 -0
disdrodb/l1/processing.py +1 -1
disdrodb/l2/processing.py +1 -1
disdrodb/metadata/checks.py +132 -125
disdrodb/psd/fitting.py +172 -205
disdrodb/psd/models.py +1 -1
disdrodb/routines/__init__.py +54 -0
disdrodb/{l0/routines.py → routines/l0.py} +288 -418
disdrodb/{l1/routines.py → routines/l1.py} +60 -92
disdrodb/{l2/routines.py → routines/l2.py} +249 -462
disdrodb/{routines.py → routines/wrappers.py} +95 -7
disdrodb/scattering/axis_ratio.py +5 -1
disdrodb/scattering/permittivity.py +18 -0
disdrodb/scattering/routines.py +56 -36
disdrodb/summary/routines.py +110 -34
disdrodb/utils/archiving.py +434 -0
disdrodb/utils/cli.py +5 -5
disdrodb/utils/dask.py +62 -1
disdrodb/utils/decorators.py +31 -0
disdrodb/utils/encoding.py +5 -1
disdrodb/{l2 → utils}/event.py +1 -66
disdrodb/utils/logger.py +1 -1
disdrodb/utils/manipulations.py +22 -12
disdrodb/utils/routines.py +166 -0
disdrodb/utils/time.py +3 -291
disdrodb/utils/xarray.py +3 -0
disdrodb/viz/plots.py +85 -14
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/METADATA +2 -2
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/RECORD +62 -54
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +1 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0

disdrodb/__init__.py CHANGED Viewed

@@ -62,6 +62,8 @@ from disdrodb.metadata.checks import (
     check_station_metadata,
 )
 from disdrodb.routines import (
+    create_summary,
+    create_summary_station,
     run_l0,
     run_l0_station,
     run_l0a,
@@ -126,6 +128,8 @@ __all__ = [
     "check_metadata_archive",
     "check_metadata_archive_geolocation",
     "check_station_metadata",
+    "create_summary",
+    "create_summary_station",
     "decibel",
     "define_configs",
     "download_archive",

disdrodb/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.1.3'
-__version_tuple__ = version_tuple = (0, 1, 3)
+__version__ = version = '0.1.4'
+__version_tuple__ = version_tuple = (0, 1, 4)
 __commit_id__ = commit_id = None

disdrodb/api/checks.py CHANGED Viewed

@@ -144,14 +144,14 @@ def check_measurement_intervals(measurement_intervals):
 def check_sample_interval(sample_interval):
     """Check sample_interval argument validity."""
-    if not isinstance(sample_interval, int):
-        raise ValueError("'sample_interval' must be an integer.")
+    if not isinstance(sample_interval, int) or isinstance(sample_interval, bool):
+        raise TypeError("'sample_interval' must be an integer.")
 def check_rolling(rolling):
     """Check rolling argument validity."""
     if not isinstance(rolling, bool):
-        raise ValueError("'rolling' must be a boolean.")
+        raise TypeError("'rolling' must be a boolean.")
 def check_folder_partitioning(folder_partitioning):
@@ -163,12 +163,12 @@ def check_folder_partitioning(folder_partitioning):
     folder_partitioning : str or None
         Defines the subdirectory structure based on the dataset's start time.
         Allowed values are:
-          - "": No additional subdirectories, files are saved directly in data_dir.
-          - "year": Files are stored under a subdirectory for the year (<data_dir>/2025).
-          - "year/month": Files are stored under subdirectories by year and month (<data_dir>/2025/04).
-          - "year/month/day": Files are stored under subdirectories by year, month and day (<data_dir>/2025/04/01).
-          - "year/month_name": Files are stored under subdirectories by year and month name (<data_dir>/2025/April).
-          - "year/quarter": Files are stored under subdirectories by year and quarter (<data_dir>/2025/Q2).
+          - "" or None: No additional subdirectories, files are saved directly in dir.
+          - "year": Files are stored under a subdirectory for the year (<dir>/2025).
+          - "year/month": Files are stored under subdirectories by year and month (<dir>/2025/04).
+          - "year/month/day": Files are stored under subdirectories by year, month and day (<dir>/2025/04/01).
+          - "year/month_name": Files are stored under subdirectories by year and month name (<dir>/2025/April).
+          - "year/quarter": Files are stored under subdirectories by year and quarter (<dir>/2025/Q2).
     Returns
     -------
@@ -176,6 +176,8 @@ def check_folder_partitioning(folder_partitioning):
         The verified folder partitioning scheme.
     """
     valid_options = ["", "year", "year/month", "year/month/day", "year/month_name", "year/quarter"]
+    if folder_partitioning is None:
+        folder_partitioning = ""
     if folder_partitioning not in valid_options:
         raise ValueError(
             f"Invalid folder_partitioning scheme '{folder_partitioning}'. Valid options are: {valid_options}.",
@@ -331,16 +333,37 @@ def check_valid_fields(fields, available_fields, field_name, invalid_fields_poli
         fields = [fields]
     fields = np.unique(np.array(fields))
     invalid_fields_policy = check_invalid_fields_policy(invalid_fields_policy)
     # Check for invalid fields
     fields = np.array(fields)
     is_valid = np.isin(fields, available_fields)
     invalid_fields_values = fields[~is_valid].tolist()
     fields = fields[is_valid].tolist()
+    # If invalid fields, suggest corrections using difflib
+    if invalid_fields_values:
+        # Format invalid fields nicely (avoid single-element lists)
+        if len(invalid_fields_values) == 1:
+            invalid_fields_str = f"'{invalid_fields_values[0]}'"
+        else:
+            invalid_fields_str = f"{invalid_fields_values}"
+        # Prepare suggestion string
+        suggestions = []
+        for invalid in invalid_fields_values:
+            matches = difflib.get_close_matches(invalid, available_fields, n=1, cutoff=0.4)
+            if matches:
+                suggestions.append(f"Did you mean '{matches[0]}' instead of '{invalid}'?")
+        suggestion_msg = " " + " ".join(suggestions) if suggestions else ""
     # Error handling for invalid fields were found
     if invalid_fields_policy == "warn" and invalid_fields_values:
-        warnings.warn(f"Ignoring invalid {field_name}: {invalid_fields_values}", UserWarning, stacklevel=2)
+        msg = f"Ignoring invalid {field_name}: {invalid_fields_str}.{suggestion_msg}"
+        warnings.warn(msg, UserWarning, stacklevel=2)
     elif invalid_fields_policy == "raise" and invalid_fields_values:
-        raise ValueError(f"These {field_name} does not exist: {invalid_fields_values}.")
+        msg = f"These {field_name} do not exist: {invalid_fields_str}.{suggestion_msg}"
+        raise ValueError(msg)
     else:  # "ignore" silently drop invalid entries
         pass
     # If no valid fields left, raise error
@@ -349,38 +372,6 @@ def check_valid_fields(fields, available_fields, field_name, invalid_fields_poli
     return fields
-def has_available_data(
-    data_source,
-    campaign_name,
-    station_name,
-    product,
-    data_archive_dir=None,
-    # Product Options
-    **product_kwargs,
-):
-    """Return ``True`` if data are available for the given product and station."""
-    # Define product directory
-    data_dir = define_data_dir(
-        product=product,
-        data_archive_dir=data_archive_dir,
-        data_source=data_source,
-        campaign_name=campaign_name,
-        station_name=station_name,
-        # Directory options
-        check_exists=False,
-        # Product Options
-        **product_kwargs,
-    )
-    # If the product directory does not exists, return False
-    if not os.path.isdir(data_dir):
-        return False
-    # If no files, return False
-    filepaths = list_files(data_dir, recursive=True)
-    nfiles = len(filepaths)
-    return nfiles >= 1
 def check_station_inputs(
     data_source,
     campaign_name,
@@ -396,6 +387,7 @@ def check_station_inputs(
         matches = difflib.get_close_matches(data_source, valid_data_sources, n=1, cutoff=0.4)
         suggestion = f"Did you mean '{matches[0]}'?" if matches else ""
         raise ValueError(f"DISDRODB does not include a data source named {data_source}. {suggestion}")
     # Check campaign name
     valid_campaigns = disdrodb.available_campaigns(data_sources=data_source, metadata_archive_dir=metadata_archive_dir)
     if campaign_name not in valid_campaigns:
@@ -420,6 +412,38 @@ def check_station_inputs(
         )
+def has_available_data(
+    data_source,
+    campaign_name,
+    station_name,
+    product,
+    data_archive_dir=None,
+    # Product Options
+    **product_kwargs,
+):
+    """Return ``True`` if data are available for the given product and station."""
+    # Define product directory
+    data_dir = define_data_dir(
+        product=product,
+        data_archive_dir=data_archive_dir,
+        data_source=data_source,
+        campaign_name=campaign_name,
+        station_name=station_name,
+        # Directory options
+        check_exists=False,
+        # Product Options
+        **product_kwargs,
+    )
+    # If the product directory does not exists, return False
+    if not os.path.isdir(data_dir):
+        return False
+    # If no files, return False
+    filepaths = list_files(data_dir, recursive=True)
+    nfiles = len(filepaths)
+    return nfiles >= 1
 def check_data_availability(
     product,
     data_source,
@@ -480,10 +504,9 @@ def check_issue_dir(data_source, campaign_name, metadata_archive_dir=None):
         campaign_name=campaign_name,
         check_exists=False,
     )
-    if not os.path.exists(issue_dir) and os.path.isdir(issue_dir):
-        msg = "The issue directory does not exist at {issue_dir}."
+    if not os.path.exists(issue_dir) or not os.path.isdir(issue_dir):
+        msg = f"The issue directory does not exist at {issue_dir}."
         logger.error(msg)
-        raise ValueError(msg)
     return issue_dir
@@ -504,7 +527,7 @@ def check_issue_file(data_source, campaign_name, station_name, metadata_archive_
         station_name=station_name,
         check_exists=False,
     )
-    # Check existence
+    # Check existence. If not, create one !
     if not os.path.exists(issue_filepath):
         create_station_issue(
             metadata_archive_dir=metadata_archive_dir,

disdrodb/api/configs.py CHANGED Viewed

@@ -54,8 +54,6 @@ def get_sensor_configs_dir(sensor_name: str, product: str) -> str:
     config_dir = define_config_dir(product=product)
     config_sensor_dir = os.path.join(config_dir, sensor_name)
     if not os.path.exists(config_sensor_dir):
-        list_sensors = sorted(list_directories(config_dir, recursive=False, return_paths=False))
-        print(f"Available sensor_name are {list_sensors}")
         raise ValueError(f"The config directory {config_sensor_dir} does not exist.")
     return config_sensor_dir

disdrodb/api/info.py CHANGED Viewed

@@ -410,8 +410,8 @@ def get_time_component(time, component):
     return str(func_dict[component](time))
-def _get_groups_value(groups, filepath):
-    """Return the value associated to the groups keys.
+def get_groups_value(groups, filepath):
+    """Return a string associated to the groups keys.
     If multiple keys are specified, the value returned is a string of format: ``<group_value_1>/<group_value_2>/...``
@@ -463,5 +463,5 @@ def group_filepaths(filepaths, groups=None):
         return filepaths
     groups = check_groups(groups)
     filepaths_dict = defaultdict(list)
-    _ = [filepaths_dict[_get_groups_value(groups, filepath)].append(filepath) for filepath in filepaths]
+    _ = [filepaths_dict[get_groups_value(groups, filepath)].append(filepath) for filepath in filepaths]
     return dict(filepaths_dict)

disdrodb/api/io.py CHANGED Viewed

@@ -133,6 +133,7 @@ def find_files(
     product,
     debugging_mode: bool = False,
     data_archive_dir: Optional[str] = None,
+    metadata_archive_dir: Optional[str] = None,
     glob_pattern=None,
     start_time=None,
     end_time=None,
@@ -198,6 +199,7 @@ def find_files(
             data_source=data_source,
             campaign_name=campaign_name,
             station_name=station_name,
+            metadata_archive_dir=metadata_archive_dir,
         )
         glob_pattern = metadata.get("raw_data_glob_pattern", "")
@@ -232,7 +234,7 @@ def find_files(
 #### DISDRODB Open Product Files
-def open_raw_files(filepaths, data_source, campaign_name, station_name):
+def _open_raw_files(filepaths, data_source, campaign_name, station_name, metadata_archive_dir):
     """Open raw files to DISDRODB L0A or L0B format.
     Raw text files are opened into a DISDRODB L0A pandas Dataframe.
@@ -247,6 +249,7 @@ def open_raw_files(filepaths, data_source, campaign_name, station_name):
         data_source=data_source,
         campaign_name=campaign_name,
         station_name=station_name,
+        metadata_archive_dir=metadata_archive_dir,
     )
     sensor_name = metadata["sensor_name"]
@@ -256,6 +259,7 @@ def open_raw_files(filepaths, data_source, campaign_name, station_name):
             data_source=data_source,
             campaign_name=campaign_name,
             station_name=station_name,
+            metadata_archive_dir=metadata_archive_dir,
         )
     except Exception:
         issue_dict = None
@@ -265,6 +269,7 @@ def open_raw_files(filepaths, data_source, campaign_name, station_name):
         data_source=data_source,
         campaign_name=campaign_name,
         station_name=station_name,
+        metadata_archive_dir=metadata_archive_dir,
     )
     # Return DISDRODB L0A dataframe if raw text files
     if metadata["raw_data_format"] == "txt":
@@ -289,6 +294,35 @@ def open_raw_files(filepaths, data_source, campaign_name, station_name):
     return ds
+def filter_dataset_by_time(ds, start_time=None, end_time=None):
+    """Subset an xarray.Dataset by time, robust to duplicated/non-monotonic indices.
+    NOTE: ds.sel(time=slice(start_time, end_time)) fails in presence of duplicated
+    timesteps because time 'index is not monotonic increasing or decreasing'.
+    Parameters
+    ----------
+    ds : xr.Dataset
+        Dataset with a `time` coordinate.
+    start_time : np.datetime64 or None
+        Inclusive start bound. If None, no lower bound is applied.
+    end_time : np.datetime64 or None
+        Inclusive end bound. If None, no upper bound is applied.
+    Returns
+    -------
+    xr.Dataset
+        Subset dataset with the same ordering of timesteps (duplicates preserved).
+    """
+    time = ds["time"].to_numpy()
+    mask = np.ones(time.shape, dtype=bool)
+    if start_time is not None:
+        mask &= time >= np.array(start_time, dtype="datetime64[ns]")
+    if end_time is not None:
+        mask &= time <= np.array(end_time, dtype="datetime64[ns]")
+    return ds.isel(time=np.where(mask)[0])
 def open_netcdf_files(
     filepaths,
     chunks=-1,
@@ -299,7 +333,10 @@ def open_netcdf_files(
     compute=True,
     **open_kwargs,
 ):
-    """Open DISDRODB netCDF files using xarray."""
+    """Open DISDRODB netCDF files using xarray.
+    Using combine="nested" and join="outer" ensure that duplicated timesteps are not overwritten!
+    """
     import xarray as xr
     # Ensure variables is a list
@@ -313,6 +350,7 @@ def open_netcdf_files(
         filepaths,
         chunks=chunks,
         combine="nested",
+        join="outer",
         concat_dim="time",
         engine="netcdf4",
         parallel=parallel,
@@ -329,7 +367,8 @@ def open_netcdf_files(
     if variables is not None and preprocess is None:
         ds = ds[variables]
     # - Subset time
-    ds = ds.sel(time=slice(start_time, end_time))
+    if start_time is not None or end_time is not None:
+        ds = filter_dataset_by_time(ds, start_time=start_time, end_time=end_time)
     # - If compute=True, load in memory and close connections to files
     if compute:
         dataset = ds.compute()
@@ -349,6 +388,7 @@ def open_dataset(
     product_kwargs=None,
     debugging_mode: bool = False,
     data_archive_dir: Optional[str] = None,
+    metadata_archive_dir: Optional[str] = None,
     chunks=-1,
     parallel=False,
     compute=False,
@@ -399,6 +439,7 @@ def open_dataset(
     # List product files
     filepaths = find_files(
         data_archive_dir=data_archive_dir,
+        metadata_archive_dir=metadata_archive_dir,
         data_source=data_source,
         campaign_name=campaign_name,
         station_name=station_name,
@@ -413,11 +454,12 @@ def open_dataset(
     # - For raw txt files return DISDRODB L0A dataframe
     # - For raw netCDF files return DISDRODB L0B dataframe
     if product == "RAW":
-        obj = open_raw_files(
+        obj = _open_raw_files(
             filepaths=filepaths,
             data_source=data_source,
             campaign_name=campaign_name,
             station_name=station_name,
+            metadata_archive_dir=metadata_archive_dir,
         )
         return obj
@@ -464,11 +506,9 @@ def remove_product(
         station_name=station_name,
         **product_kwargs,
     )
-    if logger is not None:
-        log_info(logger=logger, msg="Removal of {product} files started.", verbose=verbose)
+    log_info(logger=logger, msg="Removal of {product} files started.", verbose=verbose)
     shutil.rmtree(data_dir)
-    if logger is not None:
-        log_info(logger=logger, msg="Removal of {product} files ended.", verbose=verbose)
+    log_info(logger=logger, msg="Removal of {product} files ended.", verbose=verbose)
 ####--------------------------------------------------------------------------.

disdrodb 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

disdrodb 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl