PyPI - disdrodb - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

disdrodb 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

disdrodb/__init__.py +4 -0
disdrodb/_version.py +2 -2
disdrodb/api/checks.py +70 -47
disdrodb/api/configs.py +0 -2
disdrodb/api/info.py +3 -3
disdrodb/api/io.py +48 -8
disdrodb/api/path.py +116 -133
disdrodb/api/search.py +12 -3
disdrodb/cli/disdrodb_create_summary.py +103 -0
disdrodb/cli/disdrodb_create_summary_station.py +1 -1
disdrodb/cli/disdrodb_run_l0a_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +2 -2
disdrodb/cli/disdrodb_run_l0c_station.py +2 -2
disdrodb/cli/disdrodb_run_l1_station.py +2 -2
disdrodb/cli/disdrodb_run_l2e_station.py +2 -2
disdrodb/cli/disdrodb_run_l2m_station.py +2 -2
disdrodb/data_transfer/download_data.py +123 -7
disdrodb/issue/writer.py +2 -0
disdrodb/l0/l0a_processing.py +10 -5
disdrodb/l0/l0b_nc_processing.py +10 -6
disdrodb/l0/l0b_processing.py +26 -61
disdrodb/l0/l0c_processing.py +369 -251
disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +7 -0
disdrodb/l0/readers/PARSIVEL2/ARM/ARM_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/CANADA/UQAM_NC.py +69 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +136 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +220 -0
disdrodb/l0/readers/PARSIVEL2/NASA/LPVEX.py +109 -0
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +3 -0
disdrodb/l1/fall_velocity.py +46 -0
disdrodb/l1/processing.py +1 -1
disdrodb/l2/processing.py +1 -1
disdrodb/metadata/checks.py +132 -125
disdrodb/psd/fitting.py +172 -205
disdrodb/psd/models.py +1 -1
disdrodb/routines/__init__.py +54 -0
disdrodb/{l0/routines.py → routines/l0.py} +288 -418
disdrodb/{l1/routines.py → routines/l1.py} +60 -92
disdrodb/{l2/routines.py → routines/l2.py} +249 -462
disdrodb/{routines.py → routines/wrappers.py} +95 -7
disdrodb/scattering/axis_ratio.py +5 -1
disdrodb/scattering/permittivity.py +18 -0
disdrodb/scattering/routines.py +56 -36
disdrodb/summary/routines.py +110 -34
disdrodb/utils/archiving.py +434 -0
disdrodb/utils/cli.py +5 -5
disdrodb/utils/dask.py +62 -1
disdrodb/utils/decorators.py +31 -0
disdrodb/utils/encoding.py +5 -1
disdrodb/{l2 → utils}/event.py +1 -66
disdrodb/utils/logger.py +1 -1
disdrodb/utils/manipulations.py +22 -12
disdrodb/utils/routines.py +166 -0
disdrodb/utils/time.py +3 -291
disdrodb/utils/xarray.py +3 -0
disdrodb/viz/plots.py +85 -14
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/METADATA +2 -2
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/RECORD +62 -54
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/entry_points.txt +1 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/WHEEL +0 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.3.dist-info → disdrodb-0.1.4.dist-info}/top_level.txt +0 -0

disdrodb/data_transfer/download_data.py CHANGED Viewed

@@ -239,7 +239,7 @@ def check_consistent_station_name(metadata_filepath, station_name):
     return station_name
-def download_station_data(metadata_filepath: str, data_archive_dir: str, force: bool = False) -> None:
+def download_station_data(metadata_filepath: str, data_archive_dir: str, force: bool = False, verbose=True) -> None:
     """Download and unzip the station data .
     Parameters
@@ -275,17 +275,27 @@ def download_station_data(metadata_filepath: str, data_archive_dir: str, force:
         raise ValueError(f"Invalid disdrodb_data_url '{disdrodb_data_url}' for station {station_name}")
     # Download files
-    # - Option 1: Download Zip file containing all station raw data
+    # - Option 1: Download ZIP file containing all station raw data
     if disdrodb_data_url.startswith("https://zenodo.org/") or disdrodb_data_url.startswith("https://cloudnet.fmi.fi/"):
         download_zip_file(url=disdrodb_data_url, dst_dir=station_dir, force=force)
     # - Option 2: Recursive download from a web server via HTTP or HTTPS.
     elif disdrodb_data_url.startswith("http"):
-        download_web_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=force, verbose=True)
+        download_web_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=force, verbose=verbose)
+        # - Retry to be more sure that all data have been downloaded
+        download_web_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=True, verbose=verbose)
+    # - Option 3: Recursive download from a ftp server
+    elif disdrodb_data_url.startswith("ftp"):
+        download_ftp_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=force, verbose=verbose)
+        # - Retry to be more sure that all data have been downloaded
+        download_ftp_server_data(url=disdrodb_data_url, dst_dir=station_dir, force=True, verbose=verbose)
     else:
         raise NotImplementedError(f"Open a GitHub Issue to enable the download of data from {disdrodb_data_url}.")
-####-----------------------------------------------------------------------------------------.
+####--------------------------------------------------------------------.
 #### Download from Web Server via HTTP or HTTPS
@@ -301,9 +311,17 @@ def download_web_server_data(url: str, dst_dir: str, force=True, verbose=True) -
     3. Compute cut-dirs so that only the last segment of the path remains locally.
     4. Build and run the wget command.
-    Example:
-        download_with_wget("https://ruisdael.citg.tudelft.nl/parsivel/PAR001_Cabauw/2021/202101/")
-        # → Creates a local folder "202101/" with all files and subfolders.
+    Parameters
+    ----------
+    url : str
+        HTTPS URL pointing to webserver folder. Example: "https://ruisdael.citg.tudelft.nl/parsivel/PAR001_Cabauw/"
+    dst_dir : str
+         Local directory where to download the file (DISDRODB station data directory).
+    force : bool, optional
+        If ``True``, re-download new/updated files (skip unchanged ones).
+        If ``False``, keep existing files untouched.
+    verbose : bool, optional
+        Print wget output (default is True).
     """
     # 1. Ensure wget exists
     ensure_wget_available()
@@ -393,6 +411,104 @@ def build_webserver_wget_command(url: str, cut_dirs: int, dst_dir: str, force: b
     return cmd
+####--------------------------------------------------------------------.
+#### Download from FTP Server
+def build_ftp_server_wget_command(
+    url: str,
+    cut_dirs: int,
+    dst_dir: str,
+    force: bool,
+    verbose: bool,
+) -> list[str]:
+    """Construct the wget command list for FTP recursive download.
+    Parameters
+    ----------
+    url : str
+        FTP URL to download from.
+    cut_dirs : int
+        Number of leading path components to strip.
+    dst_dir : str
+        Local destination directory.
+    force : bool
+        If True, re-download newer files (--timestamping).
+        If False, keep existing files untouched (--no-clobber).
+    verbose : bool
+        If False, suppress wget output (-q).
+    """
+    cmd = ["wget"]  # base command
+    if not verbose:
+        cmd.append("-q")  # quiet mode --> no output except errors
+    cmd += [
+        "-r",  # recursive --> traverse into subdirectories
+        "-np",  # no parent --> don't ascend to higher-level dirs
+        "-nH",  # no host dirs --> avoid creating ftp.example.com/ locally
+        f"--cut-dirs={cut_dirs}",  # strip N leading path components
+    ]
+    if force:
+        cmd.append("--timestamping")  # download if remote file is newer
+    else:
+        cmd.append("--no-clobber")  # skip files that already exist
+    cmd += [
+        "-P",  # specify local destination directory
+        dst_dir,
+        f"ftp://anonymous:disdrodb@{url}",  # target FTP URL
+    ]
+    return cmd
+def download_ftp_server_data(url: str, dst_dir: str, force: bool = False, verbose: bool = True) -> None:
+    """Download data from an FTP server with anonymous login.
+    Parameters
+    ----------
+    url : str
+        FTP server URL pointing to a folder. Example: "ftp://ftp.example.com/path/to/data/"
+    dst_dir : str
+         Local directory where to download the file (DISDRODB station data directory).
+    force : bool, optional
+        If ``True``, re-download new/updated files (skip unchanged ones).
+        If ``False``, keep existing files untouched.
+    verbose : bool, optional
+        Print wget output (default is True).
+    """
+    ensure_wget_available()
+    # Ensure trailing slash
+    url = ensure_trailing_slash(url)
+    # Compute cut-dirs so files land directly in dst_dir
+    cut_dirs = compute_cut_dirs(url)
+    # Make destination directory
+    os.makedirs(dst_dir, exist_ok=True)
+    # Build wget command
+    cmd = build_ftp_server_wget_command(
+        url,
+        cut_dirs=cut_dirs,
+        dst_dir=dst_dir,
+        force=force,
+        verbose=verbose,
+    )
+    # Run wget
+    try:
+        subprocess.run(cmd, check=True)
+    except subprocess.CalledProcessError as e:
+        raise subprocess.CalledProcessError(
+            returncode=e.returncode,
+            cmd=e.cmd,
+            output=e.output,
+            stderr=e.stderr,
+        )
 ####--------------------------------------------------------------------.
 #### Download from Zenodo

disdrodb/issue/writer.py CHANGED Viewed

@@ -120,9 +120,11 @@ def create_station_issue(data_source, campaign_name, station_name, metadata_arch
     )
     if os.path.exists(issue_filepath):
         raise ValueError("A issue YAML file already exists at {issue_filepath}.")
     # Create issue dir if not existing
     issue_dir = os.path.dirname(issue_filepath)
     os.makedirs(issue_dir, exist_ok=True)
     # Write issue file
     write_issue(filepath=issue_filepath)
     print(f"An empty issue YAML file for station {station_name} has been created .")

disdrodb/l0/l0a_processing.py CHANGED Viewed

@@ -269,13 +269,15 @@ def remove_issue_timesteps(df, issue_dict, logger=None, verbose=False):
     # Retrieve timesteps and time_periods
     timesteps = issue_dict.get("timesteps", None)
     time_periods = issue_dict.get("time_periods", None)
+    timesteps = [] if timesteps is None else timesteps
+    time_periods = [] if time_periods is None else time_periods
     # Drop rows of specified timesteps
-    if timesteps:
+    if len(timesteps) > 0:
         df = drop_timesteps(df=df, timesteps=timesteps)
     # Drop rows within specified time_period
-    if time_periods:
+    if len(time_periods) > 0:
         df = drop_time_periods(df, time_periods=time_periods)
     # Report number of dropped rows
@@ -804,9 +806,6 @@ def read_l0a_dataframe(
     # Define the list of dataframe
     df = pq.ParquetDataset(filepaths).read().to_pandas()
-    # Ensure no index
-    df = df.reset_index(drop=True)
     # Reduce rows
     if debugging_mode:
         n_rows = min(100, len(df))
@@ -815,6 +814,12 @@ def read_l0a_dataframe(
     # Ensure time is in nanoseconds
     df["time"] = df["time"].astype("M8[ns]")
+    # Ensure sorted by time
+    df = df.sort_values(by="time")
+    # Ensure no index
+    df = df.reset_index(drop=True)
     # ---------------------------------------------------
     # Return dataframe
     return df

disdrodb/l0/l0b_nc_processing.py CHANGED Viewed

@@ -36,7 +36,6 @@ from disdrodb.l0.standards import (
 from disdrodb.utils.logger import (
     log_error,
     # log_warning,
-    # log_debug,
     log_info,
 )
@@ -347,7 +346,7 @@ def drop_timesteps(ds, timesteps: list):
     # Ensure there's at least one timestep left
     if ds_filtered.sizes.get("time", 0) == 0:
         raise ValueError(
-            "No timesteps left after removing problematic timesteps. " "Maybe you need to adjust the issue YAML file.",
+            "No timesteps left after removing problematic timesteps. Maybe you need to adjust the issue YAML file.",
         )
     return ds_filtered
@@ -423,16 +422,21 @@ def remove_issue_timesteps(
     ValueError
         If after removing specified timesteps/periods no data remains.
     """
+    # Retrieve number of initial rows
     n_initial = ds.sizes.get("time", 0)
-    timesteps = issue_dict.get("timesteps", []) or []
-    time_periods = issue_dict.get("time_periods", []) or []
+    # Retrieve timesteps and time_periods
+    timesteps = issue_dict.get("timesteps")
+    time_periods = issue_dict.get("time_periods")
+    timesteps = [] if timesteps is None else timesteps
+    time_periods = [] if time_periods is None else time_periods
     # Drop individual timesteps
-    if timesteps:
+    if len(timesteps) > 0:
         ds = drop_timesteps(ds, timesteps)
     # Drop intervals of time
-    if time_periods:
+    if len(time_periods) > 0:
         ds = drop_time_periods(ds, time_periods)
     # Report number dropped

disdrodb/l0/l0b_processing.py CHANGED Viewed

@@ -19,7 +19,6 @@
 """Functions to process DISDRODB L0A files into DISDRODB L0B netCDF files."""
 import logging
-import os
 import numpy as np
 import pandas as pd
@@ -43,13 +42,8 @@ from disdrodb.utils.attrs import (
     set_coordinate_attributes,
     set_disdrodb_attrs,
 )
-from disdrodb.utils.directories import create_directory, remove_if_exists
 from disdrodb.utils.encoding import set_encodings
-from disdrodb.utils.logger import (
-    # log_warning,
-    # log_debug,
-    log_info,
-)
+from disdrodb.utils.logger import log_info
 from disdrodb.utils.time import ensure_sorted_by_time
 logger = logging.getLogger(__name__)
@@ -246,12 +240,20 @@ def retrieve_l0b_arrays(
             unavailable_keys.append(key)
             continue
-        # Ensure is a string
-        df_series = df[key].astype(str)
+        # Ensure is a string, get a numpy array for each row and then stack
+        # - Option 1: Clear but lot of copies
+        # df_series = df[key].astype(str)
+        # list_arr = df_series.apply(_format_string_array, n_values=n_values)
+        # arr = np.stack(list_arr, axis=0)
+        # - Option 2: still copies
+        # arr = np.vstack(_format_string_array(s, n_values=n_values) for s in df_series.astype(str))
-        # Get a numpy array for each row and then stack
-        list_arr = df_series.apply(_format_string_array, n_values=n_values)
-        arr = np.stack(list_arr, axis=0)
+        # - Option 3: more memory efficient
+        n_timesteps = len(df[key])
+        arr = np.empty((n_timesteps, n_values), dtype=float)  # preallocates
+        for i, s in enumerate(df[key].astype(str)):
+            arr[i, :] = _format_string_array(s, n_values=n_values)
         # Retrieve dimensions
         dims_order = dims_order_dict[key]
@@ -333,18 +335,6 @@ def _set_variable_attributes(ds: xr.Dataset, sensor_name: str) -> xr.Dataset:
     return ds
-def _set_dataset_attrs(ds, sensor_name):
-    """Set variable and coordinates attributes."""
-    # - Add netCDF variable attributes
-    # --> Attributes: long_name, units, descriptions, valid_min, valid_max
-    ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
-    # - Add netCDF coordinate attributes
-    ds = set_coordinate_attributes(ds=ds)
-    #  - Set DISDRODB global attributes
-    ds = set_disdrodb_attrs(ds=ds, product="L0B")
-    return ds
 def add_dataset_crs_coords(ds):
     """Add the CRS coordinate to the xr.Dataset."""
     # TODO: define CF-compliant CRS !
@@ -475,16 +465,25 @@ def finalize_dataset(ds, sensor_name, metadata):
     ds = add_dataset_crs_coords(ds)
     # Set netCDF dimension order
+    # --> Required for correct encoding !
     ds = ds.transpose("time", "diameter_bin_center", ...)
-    # Add netCDF variable and coordinate attributes
-    ds = _set_dataset_attrs(ds, sensor_name)
     # Ensure variables with dtype object are converted to string
     ds = _convert_object_variables_to_string(ds)
+    # Add netCDF variable and coordinate attributes
+    # - Add variable attributes: long_name, units, descriptions, valid_min, valid_max
+    ds = _set_variable_attributes(ds=ds, sensor_name=sensor_name)
+    # - Add netCDF coordinate attributes
+    ds = set_coordinate_attributes(ds=ds)
+    #  - Set DISDRODB global attributes
+    ds = set_disdrodb_attrs(ds=ds, product="L0B")
     # Check L0B standards
     check_l0b_standards(ds)
+    # Set L0B encodings
+    ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
     return ds
@@ -508,38 +507,4 @@ def set_l0b_encodings(ds: xr.Dataset, sensor_name: str):
     return ds
-def write_l0b(ds: xr.Dataset, filepath: str, force=False) -> None:
-    """Save the xarray dataset into a NetCDF file.
-    Parameters
-    ----------
-    ds  : xarray.Dataset
-        Input xarray dataset.
-    filepath : str
-        Output file path.
-    sensor_name : str
-        Name of the sensor.
-    force : bool, optional
-        Whether to overwrite existing data.
-        If ``True``, overwrite existing data into destination directories.
-        If ``False``, raise an error if there are already data into destination directories. This is the default.
-    """
-    # Create station directory if does not exist
-    create_directory(os.path.dirname(filepath))
-    # Check if the file already exists
-    # - If force=True --> Remove it
-    # - If force=False --> Raise error
-    remove_if_exists(filepath, force=force)
-    # Get sensor name from dataset
-    sensor_name = ds.attrs.get("sensor_name")
-    # Set encodings
-    ds = set_l0b_encodings(ds=ds, sensor_name=sensor_name)
-    # Write netcdf
-    ds.to_netcdf(filepath, engine="netcdf4")
 ####--------------------------------------------------------------------------.

disdrodb 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

disdrodb 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl