PyPI - disdrodb - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

disdrodb 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

disdrodb/__init__.py +64 -34
disdrodb/_config.py +5 -4
disdrodb/_version.py +16 -3
disdrodb/accessor/__init__.py +20 -0
disdrodb/accessor/methods.py +125 -0
disdrodb/api/checks.py +139 -9
disdrodb/api/configs.py +4 -2
disdrodb/api/info.py +10 -10
disdrodb/api/io.py +237 -18
disdrodb/api/path.py +81 -75
disdrodb/api/search.py +6 -6
disdrodb/cli/disdrodb_create_summary_station.py +91 -0
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
disdrodb/configs.py +149 -4
disdrodb/constants.py +61 -0
disdrodb/data_transfer/download_data.py +5 -5
disdrodb/etc/configs/attributes.yaml +339 -0
disdrodb/etc/configs/encodings.yaml +473 -0
disdrodb/etc/products/L1/global.yaml +13 -0
disdrodb/etc/products/L2E/10MIN.yaml +12 -0
disdrodb/etc/products/L2E/1MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +22 -0
disdrodb/etc/products/L2M/10MIN.yaml +12 -0
disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +26 -0
disdrodb/l0/__init__.py +13 -0
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
disdrodb/l0/l0a_processing.py +30 -30
disdrodb/l0/l0b_nc_processing.py +108 -2
disdrodb/l0/l0b_processing.py +4 -4
disdrodb/l0/l0c_processing.py +5 -13
disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
disdrodb/l0/routines.py +105 -14
disdrodb/l1/__init__.py +5 -0
disdrodb/l1/filters.py +34 -20
disdrodb/l1/processing.py +45 -44
disdrodb/l1/resampling.py +77 -66
disdrodb/l1/routines.py +35 -43
disdrodb/l1_env/routines.py +18 -3
disdrodb/l2/__init__.py +7 -0
disdrodb/l2/empirical_dsd.py +58 -10
disdrodb/l2/event.py +27 -120
disdrodb/l2/processing.py +267 -116
disdrodb/l2/routines.py +618 -254
disdrodb/metadata/standards.py +3 -1
disdrodb/psd/fitting.py +463 -144
disdrodb/psd/models.py +8 -5
disdrodb/routines.py +3 -3
disdrodb/scattering/__init__.py +16 -4
disdrodb/scattering/axis_ratio.py +56 -36
disdrodb/scattering/permittivity.py +486 -0
disdrodb/scattering/routines.py +701 -159
disdrodb/summary/__init__.py +17 -0
disdrodb/summary/routines.py +4120 -0
disdrodb/utils/attrs.py +68 -125
disdrodb/utils/compression.py +30 -1
disdrodb/utils/dask.py +59 -8
disdrodb/utils/dataframe.py +61 -7
disdrodb/utils/directories.py +35 -15
disdrodb/utils/encoding.py +33 -19
disdrodb/utils/logger.py +13 -6
disdrodb/utils/manipulations.py +71 -0
disdrodb/utils/subsetting.py +214 -0
disdrodb/utils/time.py +165 -19
disdrodb/utils/writer.py +20 -7
disdrodb/utils/xarray.py +2 -4
disdrodb/viz/__init__.py +13 -0
disdrodb/viz/plots.py +327 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
disdrodb/l1/encoding_attrs.py +0 -642
disdrodb/l2/processing_options.py +0 -213
/disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0

disdrodb/utils/attrs.py CHANGED Viewed

@@ -18,15 +18,26 @@
 # -----------------------------------------------------------------------------.
 """DISDRODB netCDF4 attributes utilities."""
 import datetime
+import os
-from disdrodb import ARCHIVE_VERSION, CONVENTIONS, SOFTWARE_VERSION
+from disdrodb.constants import ARCHIVE_VERSION, CONVENTIONS, COORDINATES, SOFTWARE_VERSION
+from disdrodb.utils.yaml import read_yaml
 ####---------------------------------------------------------------------.
-#### Variable attributes
+#### Variable and coordinates attributes
+def get_attrs_dict():
+    """Get attributes dictionary for DISDRODB product variables and coordinates."""
+    import disdrodb
+    configs_path = os.path.join(disdrodb.__root_path__, "disdrodb", "etc", "configs")
+    attrs_dict = read_yaml(os.path.join(configs_path, "attributes.yaml"))
+    return attrs_dict
 def set_attrs(ds, attrs_dict):
-    """Set attributes to the variables of the xr.Dataset."""
+    """Set attributes to the variables and coordinates of the xr.Dataset."""
     for var in attrs_dict:
         if var in ds:
             ds[var].attrs.update(attrs_dict[var])
@@ -37,104 +48,13 @@ def set_attrs(ds, attrs_dict):
 #### Coordinates attributes
-def get_coords_attrs_dict():
-    """Return dictionary with DISDRODB coordinates attributes."""
-    attrs_dict = {}
-    # Define diameter attributes
-    attrs_dict["diameter_bin_center"] = {
-        "name": "diameter_bin_center",
-        "standard_name": "diameter_bin_center",
-        "long_name": "diameter_bin_center",
-        "units": "mm",
-        "description": "Bin center drop diameter value",
-    }
-    attrs_dict["diameter_bin_width"] = {
-        "name": "diameter_bin_width",
-        "standard_name": "diameter_bin_width",
-        "long_name": "diameter_bin_width",
-        "units": "mm",
-        "description": "Drop diameter bin width",
-    }
-    attrs_dict["diameter_bin_upper"] = {
-        "name": "diameter_bin_upper",
-        "standard_name": "diameter_bin_upper",
-        "long_name": "diameter_bin_upper",
-        "units": "mm",
-        "description": "Bin upper bound drop diameter value",
-    }
-    attrs_dict["velocity_bin_lower"] = {
-        "name": "velocity_bin_lower",
-        "standard_name": "velocity_bin_lower",
-        "long_name": "velocity_bin_lower",
-        "units": "mm",
-        "description": "Bin lower bound drop diameter value",
-    }
-    # Define velocity attributes
-    attrs_dict["velocity_bin_center"] = {
-        "name": "velocity_bin_center",
-        "standard_name": "velocity_bin_center",
-        "long_name": "velocity_bin_center",
-        "units": "m/s",
-        "description": "Bin center drop fall velocity value",
-    }
-    attrs_dict["velocity_bin_width"] = {
-        "name": "velocity_bin_width",
-        "standard_name": "velocity_bin_width",
-        "long_name": "velocity_bin_width",
-        "units": "m/s",
-        "description": "Drop fall velocity bin width",
-    }
-    attrs_dict["velocity_bin_upper"] = {
-        "name": "velocity_bin_upper",
-        "standard_name": "velocity_bin_upper",
-        "long_name": "velocity_bin_upper",
-        "units": "m/s",
-        "description": "Bin upper bound drop fall velocity value",
-    }
-    attrs_dict["velocity_bin_lower"] = {
-        "name": "velocity_bin_lower",
-        "standard_name": "velocity_bin_lower",
-        "long_name": "velocity_bin_lower",
-        "units": "m/s",
-        "description": "Bin lower bound drop fall velocity value",
-    }
-    # Define geolocation attributes
-    attrs_dict["latitude"] = {
-        "name": "latitude",
-        "standard_name": "latitude",
-        "long_name": "Latitude",
-        "units": "degrees_north",
-    }
-    attrs_dict["longitude"] = {
-        "name": "longitude",
-        "standard_name": "longitude",
-        "long_name": "Longitude",
-        "units": "degrees_east",
-    }
-    attrs_dict["altitude"] = {
-        "name": "altitude",
-        "standard_name": "altitude",
-        "long_name": "Altitude",
-        "units": "m",
-        "description": "Elevation above sea level",
-    }
-    # Define time attributes
-    attrs_dict["time"] = {
-        "name": "time",
-        "standard_name": "time",
-        "long_name": "time",
-        "description": "UTC Time",
-    }
-    return attrs_dict
 def set_coordinate_attributes(ds):
     """Set coordinates attributes."""
     # Get attributes dictionary
-    attrs_dict = get_coords_attrs_dict()
+    attrs_dict = get_attrs_dict()
+    coords_dict = {coord: attrs_dict[coord] for coord in COORDINATES if coord in attrs_dict}
     # Set attributes
-    ds = set_attrs(ds, attrs_dict)
+    ds = set_attrs(ds, coords_dict)
     return ds
@@ -142,14 +62,14 @@ def set_coordinate_attributes(ds):
 #### DISDRODB Global Attributes
-def set_disdrodb_attrs(ds, product: str):
+def update_disdrodb_attrs(ds, product: str):
     """Add DISDRODB processing information to the netCDF global attributes.
     It assumes stations metadata are already added the dataset.
     Parameters
     ----------
-    ds : xarray.Dataset
+    ds : xarray dataset.
         Dataset
     product: str
         DISDRODB product.
@@ -159,30 +79,53 @@ def set_disdrodb_attrs(ds, product: str):
     xarray dataset
         Dataset.
     """
-    # Add dataset conventions
-    ds.attrs["Conventions"] = CONVENTIONS
-    # Add featureType
-    if "platform_type" in ds.attrs:
-        platform_type = ds.attrs["platform_type"]
-        if platform_type == "fixed":
-            ds.attrs["featureType"] = "timeSeries"
-        else:
-            ds.attrs["featureType"] = "trajectory"
+    attrs = ds.attrs.copy()
+    # ----------------------------------------------
+    # Drop metadata not relevant for DISDRODB products
+    keys_to_drop = [
+        "disdrodb_reader",
+        "disdrodb_data_url",
+        "raw_data_glob_pattern",
+        "raw_data_format",
+    ]
+    for key in keys_to_drop:
+        _ = attrs.pop(key, None)
+    # ----------------------------------------------
+    # Add time_coverage_start and time_coverage_end
+    if "time" in ds.dims:
+        attrs["time_coverage_start"] = str(ds["time"].data[0])
+        attrs["time_coverage_end"] = str(ds["time"].data[-1])
-    # Update DISDRODDB attributes
-    ds = update_disdrodb_attrs(ds=ds, product=product)
+    # ----------------------------------------------
+    # Set DISDRODDB attributes
+    # - Add DISDRODB processing info
+    now = datetime.datetime.utcnow()
+    current_time = now.strftime("%Y-%m-%d %H:%M:%S")
+    attrs["disdrodb_processing_date"] = current_time
+    # - Add DISDRODB product and version
+    attrs["disdrodb_product_version"] = ARCHIVE_VERSION
+    attrs["disdrodb_software_version"] = SOFTWARE_VERSION
+    attrs["disdrodb_product"] = product
+    # ----------------------------------------------
+    # Finalize attributes dictionary
+    # - Sort attributes alphabetically
+    attrs = dict(sorted(attrs.items()))
+    # - Set attributes
+    ds.attrs = attrs
     return ds
-def update_disdrodb_attrs(ds, product: str):
+def set_disdrodb_attrs(ds, product: str):
     """Add DISDRODB processing information to the netCDF global attributes.
     It assumes stations metadata are already added the dataset.
     Parameters
     ----------
-    ds : xarray dataset.
+    ds : xarray.Dataset
         Dataset
     product: str
         DISDRODB product.
@@ -192,17 +135,17 @@ def update_disdrodb_attrs(ds, product: str):
     xarray dataset
         Dataset.
     """
-    # Add time_coverage_start and time_coverage_end
-    ds.attrs["time_coverage_start"] = str(ds["time"].data[0])
-    ds.attrs["time_coverage_end"] = str(ds["time"].data[-1])
+    # Add dataset conventions
+    ds.attrs["Conventions"] = CONVENTIONS
-    # DISDRODDB attributes
-    # - Add DISDRODB processing info
-    now = datetime.datetime.utcnow()
-    current_time = now.strftime("%Y-%m-%d %H:%M:%S")
-    ds.attrs["disdrodb_processing_date"] = current_time
-    # - Add DISDRODB product and version
-    ds.attrs["disdrodb_product_version"] = ARCHIVE_VERSION
-    ds.attrs["disdrodb_software_version"] = SOFTWARE_VERSION
-    ds.attrs["disdrodb_product"] = product
+    # Add featureType
+    if "platform_type" in ds.attrs:
+        platform_type = ds.attrs["platform_type"]
+        if platform_type == "fixed":
+            ds.attrs["featureType"] = "timeSeries"
+        else:
+            ds.attrs["featureType"] = "trajectory"
+    # Update DISDRODDB attributes
+    ds = update_disdrodb_attrs(ds=ds, product=product)
     return ds

disdrodb/utils/compression.py CHANGED Viewed

@@ -22,6 +22,7 @@ import bz2
 import gzip
 import os
 import shutil
+import subprocess
 import tempfile
 import zipfile
 from typing import Optional
@@ -53,6 +54,34 @@ def unzip_file(filepath: str, dest_path: str) -> None:
         zip_ref.extractall(dest_path)
+def unzip_file_on_terminal(filepath: str, dest_path: str) -> str:
+    """Unzip a file into a directory using the terminal command.
+    Parameters
+    ----------
+    filepath : str
+        Path of the file to unzip.
+    dest_path : str
+        Path of the destination directory.
+    """
+    os.makedirs(dest_path, exist_ok=True)
+    if os.name == "nt":
+        # Windows: use PowerShell Expand-Archive
+        cmd = [
+            "powershell.exe",
+            "-NoProfile",
+            "-NonInteractive",
+            "-Command",
+            f"Expand-Archive -LiteralPath '{filepath}' -DestinationPath '{dest_path}' -Force",
+        ]
+    else:
+        # macOS/Linux: use unzip
+        cmd = ["unzip", "-q", filepath, "-d", dest_path]
+    subprocess.run(cmd, check=True)
 def _zip_dir(dir_path: str) -> str:
     """Zip a directory into a file located in the same directory.
@@ -157,7 +186,7 @@ def compress_station_files(
         raise ValueError(f"Station data directory {station_dir} does not exist.")
     # Get list of files inside the station directory (in all nested directories)
-    filepaths = list_files(station_dir, glob_pattern="*", recursive=True)
+    filepaths = list_files(station_dir, recursive=True)
     for filepath in filepaths:
         _ = _compress_file(filepath, method, skip=skip)

disdrodb/utils/dask.py CHANGED Viewed

@@ -16,31 +16,82 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
-"""Utilities for Dask Distributed computations."""
+"""Utilities for Dask Distributed Computations."""
 import logging
 import os
+import numpy as np
-def initialize_dask_cluster():
+def check_parallel_validity(parallel):
+    """Check validity of parallel option given Dask settings."""
+    import dask
+    scheduler = dask.config.get("scheduler", None)
+    if scheduler is None:
+        return parallel
+    if scheduler in ["synchronous", "threads"]:
+        return False
+    if scheduler == "distributed":
+        from dask.distributed import default_client
+        client = default_client()
+        info = client.scheduler_info()
+        # If ThreadWorker, only 1 pid
+        pids = list(client.run(os.getpid).values())
+        if len(np.unique(pids)) == 1:
+            return False
+        # If ProcessWorker
+        # - Check single thread per worker to avoid locks
+        nthreads_per_process = np.array([v["nthreads"] for v in info["workers"].values()])
+        if not np.all(nthreads_per_process == 1):
+            print(
+                "To open netCDFs in parallel with dask distributed (processes=True), please set threads_per_worker=1 !",
+            )
+            return False
+    # Otherwise let the user choose
+    return parallel
+def initialize_dask_cluster(minimum_memory=None):
     """Initialize Dask Cluster."""
     import dask
+    import psutil
+    # Silence dask warnings
+    # dask.config.set({"logging.distributed": "error"})
+    # Import dask.distributed after setting the config
     from dask.distributed import Client, LocalCluster
+    from dask.utils import parse_bytes
     # Set HDF5_USE_FILE_LOCKING to avoid going stuck with HDF
     os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"
-    # Retrieve the number of process to run
-    available_workers = os.cpu_count() - 2  # if not set, all CPUs
+    # Retrieve the number of processes to run
+    available_workers = os.cpu_count() - 2  # if not set, all CPUs minus 2
     num_workers = dask.config.get("num_workers", available_workers)
-    # Silence dask warnings
-    dask.config.set({"logging.distributed": "error"})
-    # dask.config.set({"distributed.admin.system-monitor.gil.enabled": False})
+    # If memory limit specified, ensure correct amount of workers
+    if minimum_memory is not None:
+        # Compute available memory (in bytes)
+        total_memory = psutil.virtual_memory().total
+        # Get minimum memory per worker (in bytes)
+        minimum_memory = parse_bytes(minimum_memory)
+        # Determine number of workers constrained by memory
+        maximum_workers_allowed = max(1, total_memory // minimum_memory)
+        # Respect both CPU and memory requirements
+        num_workers = min(maximum_workers_allowed, num_workers)
     # Create dask.distributed local cluster
     cluster = LocalCluster(
         n_workers=num_workers,
         threads_per_worker=1,
         processes=True,
         # memory_limit='8GB',
-        # silence_logs=False,
+        silence_logs=logging.ERROR,
     )
     client = Client(cluster)
     return cluster, client

disdrodb/utils/dataframe.py CHANGED Viewed

@@ -20,6 +20,8 @@
 import numpy as np
 import pandas as pd
+from disdrodb.utils.warnings import suppress_warnings
 def log_arange(start, stop, log_step=0.1, base=10):
     """
@@ -47,7 +49,39 @@ def log_arange(start, stop, log_step=0.1, base=10):
     log_start = np.log(start) / np.log(base)
     log_stop = np.log(stop) / np.log(base)
-    log_values = np.arange(log_start, log_stop, log_step)
+    log_values = np.arange(log_start, log_stop + log_step / 2, log_step)
+    return base**log_values
+def log_linspace(start, stop, n_bins, base=10):
+    """
+    Return numbers spaced evenly on a log scale between start and stop.
+    Parameters
+    ----------
+    start : float
+        The starting value of the sequence (must be > 0).
+    stop : float
+        The end value of the sequence (must be > 0).
+    n_bins : int
+        The number of points to generate (including start and stop).
+    base : float
+        The logarithmic base (default is 10).
+    Returns
+    -------
+    np.ndarray
+        Array of values spaced evenly in log space.
+    """
+    if start <= 0 or stop <= 0:
+        raise ValueError("Both start and stop must be > 0 for log spacing.")
+    if n_bins < 2:
+        raise ValueError("n_bins must be >= 2 to include start and stop values.")
+    log_start = np.log(start) / np.log(base)
+    log_stop = np.log(stop) / np.log(base)
+    log_values = np.linspace(log_start, log_stop, n_bins)
     return base**log_values
@@ -100,6 +134,9 @@ def compute_1d_histogram(df, column, variables=None, bins=10, labels=None, prefi
     if len(df) == 0:
         raise ValueError("No valid data points after removing NaN values")
+    # Keep only data within bin range
+    df = df[(df[column] >= bins[0]) & (df[column] < bins[-1])]
     # Create binned columns with explicit handling of out-of-bounds values
     df[f"{column}_binned"] = pd.cut(df[column], bins=bins, include_lowest=True)
@@ -134,7 +171,7 @@ def compute_1d_histogram(df, column, variables=None, bins=10, labels=None, prefi
                 (f"{prefix}std", "std"),
                 (f"{prefix}min", "min"),
                 (f"{prefix}max", "max"),
-                (f"{prefix}mad", lambda s: np.median(np.abs(s - np.median(s)))),
+                (f"{prefix}mad", lambda s: (s - s.median()).abs().median()),
             ]
             if i == 0:
                 list_stats.append(("count", "count"))
@@ -142,7 +179,8 @@ def compute_1d_histogram(df, column, variables=None, bins=10, labels=None, prefi
             list_stats = [("count", "count")]
         # Compute statistics
-        df_stats = df_grouped[var].agg(list_stats)
+        with suppress_warnings():
+            df_stats = df_grouped[var].agg(list_stats)
         # Compute other variable statistics
         if variables_specified:
@@ -253,8 +291,18 @@ def compute_2d_histogram(
         raise ValueError("No valid data points after removing NaN values")
     # Create binned columns with explicit handling of out-of-bounds values
-    df[f"{x}_binned"] = pd.cut(df[x], bins=x_bins, include_lowest=True)
-    df[f"{y}_binned"] = pd.cut(df[y], bins=y_bins, include_lowest=True)
+    df[f"{x}_binned"] = pd.cut(
+        df[x],
+        bins=pd.IntervalIndex.from_breaks(x_bins, closed="right"),
+        include_lowest=True,
+        ordered=True,
+    )
+    df[f"{y}_binned"] = pd.cut(
+        df[y],
+        bins=pd.IntervalIndex.from_breaks(y_bins, closed="right"),
+        include_lowest=True,
+        ordered=True,
+    )
     # Create complete IntervalIndex for both dimensions
     x_intervals = df[f"{x}_binned"].cat.categories
@@ -318,8 +366,8 @@ def compute_2d_histogram(
     df_stats = df_stats.reindex(full_index)
     # Determine coordinates
-    x_centers = x_intervals.mid
-    y_centers = y_intervals.mid
+    x_centers = np.array(x_intervals.mid)
+    y_centers = np.array(y_intervals.mid)
     # Use provided labels if available
     x_coords = x_labels if x_labels is not None else x_centers
@@ -337,6 +385,12 @@ def compute_2d_histogram(
     # Convert to dataset
     ds = df_stats.to_xarray()
+    # Convert Categorical coordinates to float if possible
+    if np.issubdtype(x_coords.dtype, np.number):
+        ds[f"{x}"] = ds[f"{x}"].astype(float)
+    if np.issubdtype(y_coords.dtype, np.number):
+        ds[f"{y}"] = ds[f"{y}"].astype(float)
     # Transpose arrays
     ds = ds.transpose(y, x)
     return ds

disdrodb/utils/directories.py CHANGED Viewed

@@ -98,18 +98,29 @@ def _recursive_glob(dir_path, glob_pattern):
     return [str(path) for path in dir_path.rglob(glob_pattern)]
-def _list_paths(dir_path, glob_pattern, recursive=False):
+def _is_hidden(path):
+    """Return True if any component of path is hidden."""
+    return any(part.startswith(".") for part in path.split(os.sep))
+def _list_paths(dir_path, glob_pattern, recursive=False, skip_hidden=True):
     """Return a list of filepaths and directory paths based on a single glob pattern."""
     # If glob pattern has separators, disable recursive option
     if "/" in glob_pattern and "**" not in glob_pattern:
         recursive = False
     # Search paths
     if not recursive:
-        return glob.glob(os.path.join(dir_path, glob_pattern))
-    return _recursive_glob(dir_path, glob_pattern)
+        matches = glob.glob(os.path.join(dir_path, glob_pattern))
+    else:
+        matches = _recursive_glob(dir_path, glob_pattern)
+    # Filter out anything with a hidden component
+    if skip_hidden:
+        matches = [p for p in matches if not _is_hidden(os.path.relpath(p, dir_path))]
+    return matches
-def list_paths(dir_path, glob_pattern, recursive=False):
+def list_paths(dir_path, glob_pattern, recursive=False, skip_hidden=True):
     """Return a list of filepaths and directory paths.
     This function accept also a list of glob patterns !
@@ -119,35 +130,41 @@ def list_paths(dir_path, glob_pattern, recursive=False):
     # Search path for specified glob patterns
     paths = flatten_list(
         [
-            _list_paths(dir_path=dir_path, glob_pattern=glob_pattern, recursive=recursive)
+            _list_paths(dir_path=dir_path, glob_pattern=glob_pattern, recursive=recursive, skip_hidden=skip_hidden)
             for glob_pattern in glob_patterns
         ],
     )
     return paths
-def list_files(dir_path, glob_pattern, recursive=False):
+def list_files(dir_path, glob_pattern="*", recursive=False, skip_hidden=True, return_paths=True):
     """Return a list of filepaths (exclude directory paths)."""
-    paths = list_paths(dir_path, glob_pattern, recursive=recursive)
+    paths = list_paths(dir_path, glob_pattern, recursive=recursive, skip_hidden=skip_hidden)
     filepaths = [f for f in paths if os.path.isfile(f)]
+    # If return_paths is False, return only files names
+    if not return_paths:
+        filepaths = [os.path.basename(f) for f in filepaths]
     return filepaths
-def list_directories(dir_path, glob_pattern, recursive=False):
+def list_directories(dir_path, glob_pattern="*", recursive=False, skip_hidden=True, return_paths=True):
     """Return a list of directory paths (exclude file paths)."""
-    paths = list_paths(dir_path, glob_pattern, recursive=recursive)
+    paths = list_paths(dir_path, glob_pattern, recursive=recursive, skip_hidden=skip_hidden)
     dir_paths = [f for f in paths if os.path.isdir(f)]
+    # If return_paths is False, return only directory names
+    if not return_paths:
+        dir_paths = [os.path.basename(f) for f in dir_paths]
     return dir_paths
-def count_files(dir_path, glob_pattern, recursive=False):
+def count_files(dir_path, glob_pattern="*", recursive=False, skip_hidden=True):
     """Return the number of files (exclude directories)."""
-    return len(list_files(dir_path, glob_pattern, recursive=recursive))
+    return len(list_files(dir_path, glob_pattern, recursive=recursive, skip_hidden=skip_hidden))
-def count_directories(dir_path, glob_pattern, recursive=False):
+def count_directories(dir_path, glob_pattern="*", recursive=False, skip_hidden=True):
     """Return the number of files (exclude directories)."""
-    return len(list_directories(dir_path, glob_pattern, recursive=recursive))
+    return len(list_directories(dir_path, glob_pattern, recursive=recursive, skip_hidden=skip_hidden))
 def check_directory_exists(dir_path):
@@ -177,7 +194,7 @@ def create_required_directory(dir_path, dir_name, exist_ok=True):
     create_directory(path=new_dir_path, exist_ok=exist_ok)
-def is_empty_directory(path):
+def is_empty_directory(path, skip_hidden=True):
     """Check if a directory path is empty.
     Return ``False`` if path is a file or non-empty directory.
@@ -187,8 +204,11 @@ def is_empty_directory(path):
         raise OSError(f"{path} does not exist.")
     if not os.path.isdir(path):
         return False
     paths = os.listdir(path)
+    # If skip_hidden is True, filter out hidden files/directories
+    if skip_hidden:
+        paths = [f for f in paths if not f.startswith(".")]
     return len(paths) == 0

disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

disdrodb 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl