PyPI - disdrodb - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

disdrodb 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

disdrodb/__init__.py +64 -34
disdrodb/_config.py +5 -4
disdrodb/_version.py +16 -3
disdrodb/accessor/__init__.py +20 -0
disdrodb/accessor/methods.py +125 -0
disdrodb/api/checks.py +139 -9
disdrodb/api/configs.py +4 -2
disdrodb/api/info.py +10 -10
disdrodb/api/io.py +237 -18
disdrodb/api/path.py +81 -75
disdrodb/api/search.py +6 -6
disdrodb/cli/disdrodb_create_summary_station.py +91 -0
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0_station.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0b_station.py +1 -1
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l0c_station.py +1 -1
disdrodb/cli/disdrodb_run_l2e_station.py +1 -1
disdrodb/configs.py +149 -4
disdrodb/constants.py +61 -0
disdrodb/data_transfer/download_data.py +5 -5
disdrodb/etc/configs/attributes.yaml +339 -0
disdrodb/etc/configs/encodings.yaml +473 -0
disdrodb/etc/products/L1/global.yaml +13 -0
disdrodb/etc/products/L2E/10MIN.yaml +12 -0
disdrodb/etc/products/L2E/1MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +22 -0
disdrodb/etc/products/L2M/10MIN.yaml +12 -0
disdrodb/etc/products/L2M/GAMMA_ML.yaml +8 -0
disdrodb/etc/products/L2M/NGAMMA_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/NGAMMA_GS_Z_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +26 -0
disdrodb/l0/__init__.py +13 -0
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +5 -5
disdrodb/l0/configs/PARSIVEL2/l0b_encodings.yml +3 -3
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +1 -1
disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PWS100/raw_data_format.yml +1 -1
disdrodb/l0/l0a_processing.py +30 -30
disdrodb/l0/l0b_nc_processing.py +108 -2
disdrodb/l0/l0b_processing.py +4 -4
disdrodb/l0/l0c_processing.py +5 -13
disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_LPM_NC.py +66 -0
disdrodb/l0/readers/LPM/SLOVENIA/{CRNI_VRH.py → UL.py} +3 -0
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +195 -0
disdrodb/l0/readers/PARSIVEL/GPM/PIERS.py +0 -2
disdrodb/l0/readers/PARSIVEL/JAPAN/JMA.py +4 -1
disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
disdrodb/l0/readers/PARSIVEL2/BELGIUM/ILVO.py +168 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +165 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +69 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +255 -134
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +525 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/SIRTA_PARSIVEL2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GPM/GCPEX.py +9 -7
disdrodb/l0/readers/PARSIVEL2/KIT/BURKINA_FASO.py +1 -1
disdrodb/l0/readers/PARSIVEL2/KIT/TEAMX.py +123 -0
disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +120 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +1 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +126 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_PIPS.py +165 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_PIPS.py +20 -12
disdrodb/l0/readers/PARSIVEL2/NETHERLANDS/DELFT_NC.py +2 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +144 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CR1000DL.py +201 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/LIAISE.py +137 -0
disdrodb/l0/readers/PARSIVEL2/{NETHERLANDS/DELFT.py → USA/C3WE.py} +65 -85
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +105 -99
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100_SIRTA.py +151 -0
disdrodb/l0/routines.py +105 -14
disdrodb/l1/__init__.py +5 -0
disdrodb/l1/filters.py +34 -20
disdrodb/l1/processing.py +45 -44
disdrodb/l1/resampling.py +77 -66
disdrodb/l1/routines.py +35 -43
disdrodb/l1_env/routines.py +18 -3
disdrodb/l2/__init__.py +7 -0
disdrodb/l2/empirical_dsd.py +58 -10
disdrodb/l2/event.py +27 -120
disdrodb/l2/processing.py +267 -116
disdrodb/l2/routines.py +618 -254
disdrodb/metadata/standards.py +3 -1
disdrodb/psd/fitting.py +463 -144
disdrodb/psd/models.py +8 -5
disdrodb/routines.py +3 -3
disdrodb/scattering/__init__.py +16 -4
disdrodb/scattering/axis_ratio.py +56 -36
disdrodb/scattering/permittivity.py +486 -0
disdrodb/scattering/routines.py +701 -159
disdrodb/summary/__init__.py +17 -0
disdrodb/summary/routines.py +4120 -0
disdrodb/utils/attrs.py +68 -125
disdrodb/utils/compression.py +30 -1
disdrodb/utils/dask.py +59 -8
disdrodb/utils/dataframe.py +61 -7
disdrodb/utils/directories.py +35 -15
disdrodb/utils/encoding.py +33 -19
disdrodb/utils/logger.py +13 -6
disdrodb/utils/manipulations.py +71 -0
disdrodb/utils/subsetting.py +214 -0
disdrodb/utils/time.py +165 -19
disdrodb/utils/writer.py +20 -7
disdrodb/utils/xarray.py +2 -4
disdrodb/viz/__init__.py +13 -0
disdrodb/viz/plots.py +327 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/METADATA +3 -2
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/RECORD +121 -88
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/entry_points.txt +1 -0
disdrodb/l1/encoding_attrs.py +0 -642
disdrodb/l2/processing_options.py +0 -213
/disdrodb/l0/readers/PARSIVEL/SLOVENIA/{UL_FGG.py → UL.py} +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/WHEEL +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.2.dist-info → disdrodb-0.1.3.dist-info}/top_level.txt +0 -0

disdrodb/utils/time.py CHANGED Viewed

@@ -33,7 +33,7 @@ logger = logging.getLogger(__name__)
 #### Sampling Interval Acronyms
-def seconds_to_acronym(seconds):
+def seconds_to_temporal_resolution(seconds):
     """
     Convert a duration in seconds to a readable string format (e.g., "1H30", "1D2H").
@@ -57,27 +57,27 @@ def seconds_to_acronym(seconds):
         parts.append(f"{components.minutes}MIN")
     if components.seconds > 0:
         parts.append(f"{components.seconds}S")
-    acronym = "".join(parts)
-    return acronym
+    temporal_resolution = "".join(parts)
+    return temporal_resolution
-def get_resampling_information(sample_interval_acronym):
+def get_resampling_information(temporal_resolution):
     """
-    Extract resampling information from the sample interval acronym.
+    Extract resampling information from the temporal_resolution string.
     Parameters
     ----------
-    sample_interval_acronym: str
-      A string representing the sample interval: e.g., "1H30MIN", "ROLL1H30MIN".
+    temporal_resolution: str
+      A string representing the product temporal resolution: e.g., "1H30MIN", "ROLL1H30MIN".
     Returns
     -------
     sample_interval_seconds, rolling: tuple
         Sample_interval in seconds and whether rolling is enabled.
     """
-    rolling = sample_interval_acronym.startswith("ROLL")
+    rolling = temporal_resolution.startswith("ROLL")
     if rolling:
-        sample_interval_acronym = sample_interval_acronym[4:]  # Remove "ROLL"
+        temporal_resolution = temporal_resolution[4:]  # Remove "ROLL"
     # Allowed pattern: one or more occurrences of "<number><unit>"
     # where unit is exactly one of D, H, MIN, or S.
@@ -85,15 +85,15 @@ def get_resampling_information(sample_interval_acronym):
     pattern = r"^(\d+(?:D|H|MIN|S))+$"
     # Check if the entire string matches the pattern
-    if not re.match(pattern, sample_interval_acronym):
+    if not re.match(pattern, temporal_resolution):
         raise ValueError(
-            f"Invalid sample interval acronym '{sample_interval_acronym}'. "
+            f"Invalid temporal resolution '{temporal_resolution}'. "
             "Must be composed of one or more <number><unit> groups, where unit is D, H, MIN, or S.",
         )
     # Regular expression to match duration components and extract all (value, unit) pairs
     pattern = r"(\d+)(D|H|MIN|S)"
-    matches = re.findall(pattern, sample_interval_acronym)
+    matches = re.findall(pattern, temporal_resolution)
     # Conversion factors for each unit
     unit_to_seconds = {
@@ -112,21 +112,21 @@ def get_resampling_information(sample_interval_acronym):
     return sample_interval, rolling
-def acronym_to_seconds(acronym):
+def temporal_resolution_to_seconds(temporal_resolution):
     """
-    Extract the interval in seconds from the duration acronym.
+    Extract the measurement interval in seconds from the temporal resolution string.
     Parameters
     ----------
-    acronym: str
-      A string representing a duration: e.g., "1H30MIN", "ROLL1H30MIN".
+    temporal_resolution: str
+      A string representing the product measurement interval: e.g., "1H30MIN", "ROLL1H30MIN".
     Returns
     -------
     seconds
         Duration in seconds.
     """
-    seconds, _ = get_resampling_information(acronym)
+    seconds, _ = get_resampling_information(temporal_resolution)
     return seconds
@@ -262,6 +262,7 @@ def regularize_dataset(
         Regularized dataset.
     """
+    attrs = xr_obj.attrs.copy()
     xr_obj = _check_time_sorted(xr_obj, time_dim=time_dim)
     start_time, end_time = get_dataset_start_end_time(xr_obj, time_dim=time_dim)
@@ -289,11 +290,14 @@ def regularize_dataset(
         # tolerance=tolerance,  # mismatch in seconds
         fill_value=fill_value,
     )
+    # Ensure attributes are preserved
+    xr_obj.attrs = attrs
     return xr_obj
 ####------------------------------------------
-#### Sampling interval utilities
+#### Interval utilities
 def ensure_sample_interval_in_seconds(sample_interval):  # noqa: PLR0911
@@ -376,7 +380,7 @@ def ensure_sample_interval_in_seconds(sample_interval):  # noqa: PLR0911
             raise TypeError("Float array sample_interval must contain only whole numbers.")
         return sample_interval.astype(int)
-    # Deal with xarray.DataArrayy of floats that are all integer-valued (with optionally some NaN)
+    # Deal with xarray.DataArray of floats that are all integer-valued (with optionally some NaN)
     if isinstance(sample_interval, xr.DataArray) and np.issubdtype(sample_interval.dtype, np.floating):
         arr = sample_interval.copy()
         data = arr.data
@@ -397,6 +401,17 @@ def ensure_sample_interval_in_seconds(sample_interval):  # noqa: PLR0911
     )
+def ensure_timedelta_seconds_interval(interval):
+    """Return interval as numpy.timedelta64 in seconds."""
+    if isinstance(interval, (xr.DataArray, np.ndarray)):
+        return ensure_sample_interval_in_seconds(interval).astype("m8[s]")
+    return np.array(ensure_sample_interval_in_seconds(interval), dtype="m8[s]")
+####------------------------------------------
+#### Sample Interval Utilities
 def infer_sample_interval(ds, robust=False, verbose=False, logger=None):
     """Infer the sample interval of a dataset.
@@ -655,3 +670,134 @@ def regularize_timesteps(ds, sample_interval, robust=False, add_quality_flag=Tru
         ds = ds.isel(time=idx_valid_timesteps)
     # Return dataset
     return ds
+####---------------------------------------------------------------------------------
+#### Time blocks
+def check_freq(freq: str) -> None:
+    """Check validity of freq argument."""
+    valid_freq = ["none", "year", "season", "quarter", "month", "day", "hour"]
+    if not isinstance(freq, str):
+        raise TypeError("'freq' must be a string.")
+    if freq not in valid_freq:
+        raise ValueError(
+            f"'freq' '{freq}' is not possible. Must be one of: {valid_freq}.",
+        )
+    return freq
+def generate_time_blocks(start_time: np.datetime64, end_time: np.datetime64, freq: str) -> np.ndarray:  # noqa: PLR0911
+    """Generate time blocks between `start_time` and `end_time` for a given frequency.
+    Parameters
+    ----------
+    start_time : numpy.datetime64
+        Inclusive start of the overall time range.
+    end_time : numpy.datetime64
+        Inclusive end of the overall time range.
+    freq : str
+        Frequency specifier. Accepted values are:
+        - 'none'    : return a single block [start_time, end_time]
+        - 'day'     : split into daily blocks
+        - 'month'   : split into calendar months
+        - 'quarter' : split into calendar quarters
+        - 'year'    : split into calendar years
+        - 'season'  : split into meteorological seasons (MAM, JJA, SON, DJF)
+    Returns
+    -------
+    numpy.ndarray
+        Array of shape (n, 2) with dtype datetime64[s], where each row is [block_start, block_end].
+    """
+    freq = check_freq(freq)
+    if freq == "none":
+        return np.array([[start_time, end_time]], dtype="datetime64[s]")
+    if freq == "hour":
+        periods = pd.period_range(start=start_time, end=end_time, freq="h")
+        blocks = np.array(
+            [
+                [
+                    period.start_time.to_datetime64().astype("datetime64[s]"),
+                    period.end_time.to_datetime64().astype("datetime64[s]"),
+                ]
+                for period in periods
+            ],
+            dtype="datetime64[s]",
+        )
+        return blocks
+    if freq == "day":
+        periods = pd.period_range(start=start_time, end=end_time, freq="d")
+        blocks = np.array(
+            [
+                [
+                    period.start_time.to_datetime64().astype("datetime64[s]"),
+                    period.end_time.to_datetime64().astype("datetime64[s]"),
+                ]
+                for period in periods
+            ],
+            dtype="datetime64[s]",
+        )
+        return blocks
+    if freq == "month":
+        periods = pd.period_range(start=start_time, end=end_time, freq="M")
+        blocks = np.array(
+            [
+                [
+                    period.start_time.to_datetime64().astype("datetime64[s]"),
+                    period.end_time.to_datetime64().astype("datetime64[s]"),
+                ]
+                for period in periods
+            ],
+            dtype="datetime64[s]",
+        )
+        return blocks
+    if freq == "year":
+        periods = pd.period_range(start=start_time, end=end_time, freq="Y")
+        blocks = np.array(
+            [
+                [
+                    period.start_time.to_datetime64().astype("datetime64[s]"),
+                    period.end_time.to_datetime64().astype("datetime64[s]"),
+                ]
+                for period in periods
+            ],
+            dtype="datetime64[s]",
+        )
+        return blocks
+    if freq == "quarter":
+        periods = pd.period_range(start=start_time, end=end_time, freq="Q")
+        blocks = np.array(
+            [
+                [
+                    period.start_time.to_datetime64().astype("datetime64[s]"),
+                    period.end_time.floor("s").to_datetime64().astype("datetime64[s]"),
+                ]
+                for period in periods
+            ],
+            dtype="datetime64[s]",
+        )
+        return blocks
+    if freq == "season":
+        # Fiscal quarter frequency ending in Feb → seasons DJF, MAM, JJA, SON
+        periods = pd.period_range(start=start_time, end=end_time, freq="Q-FEB")
+        blocks = np.array(
+            [
+                [
+                    period.start_time.to_datetime64().astype("datetime64[s]"),
+                    period.end_time.to_datetime64().astype("datetime64[s]"),
+                ]
+                for period in periods
+            ],
+            dtype="datetime64[s]",
+        )
+        return blocks
+    raise NotImplementedError(f"Frequency '{freq}' is not implemented.")

disdrodb/utils/writer.py CHANGED Viewed

@@ -22,11 +22,29 @@ import os
 import xarray as xr
-from disdrodb.utils.attrs import set_disdrodb_attrs
+from disdrodb.utils.attrs import get_attrs_dict, set_attrs, set_disdrodb_attrs
 from disdrodb.utils.directories import create_directory, remove_if_exists
+from disdrodb.utils.encoding import get_encodings_dict, set_encodings
-def write_product(ds: xr.Dataset, filepath: str, product: str, force: bool = False) -> None:
+def finalize_product(ds, product=None) -> xr.Dataset:
+    """Finalize DISDRODB product."""
+    # Add variables attributes
+    attrs_dict = get_attrs_dict()
+    ds = set_attrs(ds, attrs_dict=attrs_dict)
+    # Add variables encoding
+    encodings_dict = get_encodings_dict()
+    ds = set_encodings(ds, encodings_dict=encodings_dict)
+    # Add DISDRODB global attributes
+    # - e.g. in generate_l2_radar it inherit from input dataset !
+    if product is not None:
+        ds = set_disdrodb_attrs(ds, product=product)
+    return ds
+def write_product(ds: xr.Dataset, filepath: str, force: bool = False) -> None:
     """Save the xarray dataset into a NetCDF file.
     Parameters
@@ -35,8 +53,6 @@ def write_product(ds: xr.Dataset, filepath: str, product: str, force: bool = Fal
         Input xarray dataset.
     filepath : str
         Output file path.
-    product: str
-        DISDRODB product name.
     force : bool, optional
         Whether to overwrite existing data.
         If ``True``, overwrite existing data into destination directories.
@@ -50,8 +66,5 @@ def write_product(ds: xr.Dataset, filepath: str, product: str, force: bool = Fal
     # - If force=False --> Raise error
     remove_if_exists(filepath, force=force)
-    # Update attributes
-    ds = set_disdrodb_attrs(ds, product=product)
     # Write netcdf
     ds.to_netcdf(filepath, engine="netcdf4")

disdrodb/utils/xarray.py CHANGED Viewed

@@ -21,6 +21,8 @@ import numpy as np
 import xarray as xr
 from xarray.core import dtypes
+from disdrodb.constants import DIAMETER_COORDS, VELOCITY_COORDS
 def xr_get_last_valid_idx(da_condition, dim, fill_value=None):
     """
@@ -246,13 +248,9 @@ def define_fill_value_dictionary(xr_obj):
 def remove_diameter_coordinates(xr_obj):
     """Drop diameter coordinates from xarray object."""
-    from disdrodb import DIAMETER_COORDS
     return xr_obj.drop_vars(DIAMETER_COORDS, errors="ignore")
 def remove_velocity_coordinates(xr_obj):
     """Drop velocity coordinates from xarray object."""
-    from disdrodb import VELOCITY_COORDS
     return xr_obj.drop_vars(VELOCITY_COORDS, errors="ignore")

disdrodb/viz/__init__.py CHANGED Viewed

@@ -15,3 +15,16 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
 """DISDRODB Visualization Module."""
+from disdrodb.viz.plots import (
+    compute_dense_lines,
+    max_blend_images,
+    plot_nd,
+    to_rgba,
+)
+__all__ = [
+    "compute_dense_lines",
+    "max_blend_images",
+    "plot_nd",
+    "to_rgba",
+]

disdrodb/viz/plots.py CHANGED Viewed

@@ -15,3 +15,330 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
 """DISDRODB Plotting Tools."""
+import matplotlib.pyplot as plt
+import numpy as np
+import xarray as xr
+from matplotlib.colors import LogNorm, Normalize
+def plot_nd(ds, var="drop_number_concentration", cmap=None, norm=None):
+    """Plot drop number concentration N(D) timeseries."""
+    # Check inputs
+    if var not in ds:
+        raise ValueError(f"{var} is not a xarray Dataset variable!")
+    # Check only time and diameter dimensions are specified
+    # TODO: DIAMETER_DIMENSION, "time"
+    # Select N(D)
+    ds_var = ds[[var]].compute()
+    # Regularize input
+    ds_var = ds_var.disdrodb.regularize()
+    # Set 0 values to np.nan
+    ds_var = ds_var.where(ds_var[var] > 0)
+    # Define cmap an norm
+    if cmap is None:
+        cmap = plt.get_cmap("Spectral_r").copy()
+    vmin = ds_var[var].min().item()
+    norm = LogNorm(vmin, None) if norm is None else norm
+    # Plot N(D)
+    p = ds_var[var].plot.pcolormesh(x="time", norm=norm, cmap=cmap)
+    p.axes.set_title("Drop number concentration (N(D))")
+    p.axes.set_ylabel("Drop diameter (mm)")
+    return p
+def normalize_array(arr, method="max"):
+    """Normalize a NumPy array according to the chosen method.
+    Parameters
+    ----------
+    arr : np.ndarray
+        Input array.
+    method : str
+        Normalization method. Options:
+        - 'max'  : Divide by the maximum value.
+        - 'minmax': Scale to [0, 1] range.
+        - 'zscore': Standardize to mean 0, std 1.
+        - 'log'  : Apply log10 transform (shifted if min <= 0).
+        - 'none' : No normalization (return original array).
+    Returns
+    -------
+    np.ndarray
+        Normalized array.
+    """
+    arr = np.asarray(arr, dtype=float)
+    if method == "max":
+        max_val = np.nanmax(arr)
+        return arr / max_val if max_val != 0 else arr
+    if method == "minmax":
+        min_val = np.nanmin(arr)
+        max_val = np.nanmax(arr)
+        return (arr - min_val) / (max_val - min_val) if max_val != min_val else np.zeros_like(arr)
+    if method == "zscore":
+        mean_val = np.nanmean(arr)
+        std_val = np.nanstd(arr)
+        return (arr - mean_val) / std_val if std_val != 0 else np.zeros_like(arr)
+    if method == "log":
+        min_val = np.nanmin(arr)
+        shifted = arr - min_val + 1e-12  # Shift to avoid log(0) or log of negative
+        return np.log10(shifted)
+    if method == "none":
+        return arr
+    raise ValueError(f"Unknown normalization method: {method}")
+def _np_to_rgba_alpha(arr, cmap="viridis", cmap_norm=None, scaling="linear"):
+    """Convert a numpy array to an RGBA array with alpha based on array value.
+    Parameters
+    ----------
+    arr : numpy.ndarray
+        arr of counts or frequencies.
+    cmap : str or Colormap, optional
+        Matplotlib colormap to use for RGB channels.
+    cmap_norm: matplotlib.colors.Norm
+        Norm to be used to scale data before assigning cmap colors.
+        The default is Normalize(vmin, vmax).
+    scaling : str, optional
+        Scaling type for alpha mapping:
+        - "linear"   : min-max normalization
+        - "log"      : logarithmic normalization (positive values only)
+        - "sqrt"     : square-root (power-law with exponent=0.5)
+        - "exp"      : exponential scaling
+        - "quantile" : percentile-based scaling
+        - "none"     : full opacity (alpha=1)
+    Returns
+    -------
+    rgba : 3D numpy array (ny, nx, 4)
+        RGBA array.
+    """
+    # Ensure numpy array
+    arr = np.asarray(arr, dtype=float)
+    # Define mask with NaN pixel
+    mask_na = np.isnan(arr)
+    # Retrieve array shape
+    ny, nx = arr.shape
+    # Define colormap norm
+    if cmap_norm is None:
+        cmap_norm = Normalize(vmin=np.nanmin(arr), vmax=np.nanmax(arr))
+    # Define alpha
+    if scaling == "linear":
+        norm = Normalize(vmin=np.nanmin(arr), vmax=np.nanmax(arr))
+        alpha = norm(arr)
+    elif scaling == "log":
+        vals = np.where(arr > 0, arr, np.nan)  # mask non-positive
+        norm = LogNorm(vmin=np.nanmin(vals), vmax=np.nanmax(vals))
+        alpha = norm(arr)
+        alpha = np.nan_to_num(alpha, nan=0.0)
+    elif scaling == "sqrt":
+        alpha = np.sqrt(np.clip(arr, 0, None) / np.nanmax(arr))
+    elif scaling == "exp":
+        normed = np.clip(arr / np.nanmax(arr), 0, 1)
+        alpha = np.expm1(normed) / np.expm1(1)
+    elif scaling == "quantile":
+        flat = arr.ravel()
+        ranks = np.argsort(np.argsort(flat))  # rankdata without scipy
+        alpha = ranks / (len(flat) - 1)
+        alpha = alpha.reshape(arr.shape)
+    elif scaling == "none":
+        alpha = np.ones_like(arr, dtype=float)
+    else:
+        raise ValueError(f"Unknown scaling type: {scaling}")
+    # Map values to colors
+    cmap = plt.get_cmap(cmap).copy()
+    rgba = cmap(cmap_norm(arr))
+    # Set alpha channel
+    alpha[mask_na] = 0  # where input was NaN
+    rgba[..., -1] = np.clip(alpha, 0, 1)
+    return rgba
+def to_rgba(obj, cmap="viridis", norm=None, scaling="none"):
+    """Map a xarray DataArray (or numpy array) to RGBA with optional alpha-scaling."""
+    input_is_xarray = False
+    if isinstance(obj, xr.DataArray):
+        # Define template for RGBA DataArray
+        da_rgba = obj.copy()
+        da_rgba = da_rgba.expand_dims({"rgba": 4}).transpose(..., "rgba")
+        input_is_xarray = True
+        # Extract numpy array
+        obj = obj.to_numpy()
+    # Apply transparency
+    arr = _np_to_rgba_alpha(obj, cmap=cmap, cmap_norm=norm, scaling=scaling)
+    # Return xarray.DataArray
+    if input_is_xarray:
+        da_rgba.data = arr
+        return da_rgba
+    # Or numpy array otherwise
+    return arr
+def max_blend_images(ds_rgb, dim):
+    """Max blend a RGBA DataArray across a samples dimensions."""
+    # Ensure dimension to blend in first position
+    ds_rgb = ds_rgb.transpose(dim, ...)
+    # Extract numpy array
+    stack = ds_rgb.data
+    # Extract alpha array
+    alphas = stack[..., 3]
+    # Select the winning RGBA per pixel  # (N, H, W)
+    idx = np.argmax(alphas, axis=0)  # (H, W), index of image with max alpha
+    idx4 = np.repeat(idx[np.newaxis, ..., np.newaxis], 4, axis=-1)  # (1, H, W, 4)
+    out = np.take_along_axis(stack, idx4, axis=0)[0]  # (H, W, 4)
+    # Create output RGBA array
+    da = ds_rgb.isel({dim: 0}).copy()
+    da.data = out
+    return da
+def compute_dense_lines(
+    da: xr.DataArray,
+    coord: str,
+    x_bins: list,
+    y_bins: list,
+    normalization="max",
+):
+    """
+    Compute a 2D density-of-lines histogram from an xarray.DataArray.
+    Parameters
+    ----------
+    da : xarray.DataArray
+        Input data array. One of its dimensions (named by ``coord``) is taken
+        as the horizontal coordinate. All other dimensions are collapsed into
+        “series,” so that each combination of the remaining dimension values
+        produces one 1D line along ``coord``.
+    coord : str
+        The name of the coordinate/dimension of the DataArray to bin over.
+        ``da.coords[coord]`` must be a 1D numeric array (monotonic is recommended).
+    x_bins : array_like of shape (nx+1,)
+        Bin edges to bin the coordinate/dimension.
+        Must be monotonically increasing.
+        The number of x-bins will be ``nx = len(x_bins) - 1``.
+    y_bins : array_like of shape (ny+1,)
+        Bin edges for the DataArray values.
+        Must be monotonically increasing.
+        The number of y-bins will be ``ny = len(y_bins) - 1``.
+    normalization : bool, optional
+        If 'none', returns the raw histogram.
+        By default, the function normalize the histogram by its global maximum ('max').
+        Log-normalization ('log') is also available.
+    Returns
+    -------
+    xr.DataArray
+        2D histogram of shape ``(ny, nx)``. Dimensions are ``('y', 'x')``, where:
+        - ``x``: the bin-center coordinate of ``x_bins`` (length ``nx``)
+        - ``y``: the bin-center coordinate of ``y_bins`` (length ``ny``)
+        Each element ``out.values[y_i, x_j]`` is the count (or normalized count) of how
+        many “series-values” from ``da`` fell into the rectangular bin
+        ``x_bins[j] ≤ x_value < x_bins[j+1]`` and
+        ``y_bins[i] ≤ data_value < y_bins[i+1]``.
+    References
+    ----------
+    Moritz, D., Fisher, D. (2018).
+    Visualizing a Million Time Series with the Density Line Chart
+    https://doi.org/10.48550/arXiv.1808.06019
+    """
+    # Check DataArray name
+    if da.name is None or da.name == "":
+        raise ValueError("The DataArray must have a name.")
+    # Validate x_bins and y_bins
+    x_bins = np.asarray(x_bins)
+    y_bins = np.asarray(y_bins)
+    if x_bins.ndim != 1 or x_bins.size < 2:
+        raise ValueError("`x_bins` must be a 1D array with at least two edges.")
+    if y_bins.ndim != 1 or y_bins.size < 2:
+        raise ValueError("`y_bins` must be a 1D array with at least two edges.")
+    if not np.all(np.diff(x_bins) > 0):
+        raise ValueError("`x_bins` must be strictly increasing.")
+    if not np.all(np.diff(y_bins) > 0):
+        raise ValueError("`y_bins` must be strictly increasing.")
+    # Verify that `coord` exists as either a dimension or a coordinate
+    if coord not in (list(da.coords) + list(da.dims)):
+        raise ValueError(f"'{coord}' is not a dimension or coordinate of the DataArray.")
+    if coord not in da.dims:
+        if da[coord].ndim != 1:
+            raise ValueError(f"Coordinate '{coord}' must be 1D. Instead has dimensions {da[coord].dims}")
+        x_dim = da[coord].dims[0]
+    else:
+        x_dim = coord
+    # Extract the coordinate array
+    x_values = (x_bins[0:-1] + x_bins[1:]) / 2
+    # Extract the array (samples, x)
+    other_dims = [d for d in da.dims if d != x_dim]
+    if len(other_dims) == 1:
+        arr = da.transpose(*other_dims, x_dim).to_numpy()
+    else:
+        arr = da.stack({"sample": other_dims}).transpose("sample", x_dim).to_numpy()
+    # Define y bins center
+    y_center = (y_bins[0:-1] + y_bins[1:]) / 2
+    # Prepare the 2D count grid of shape (ny, nx)
+    # - ny correspond tot he value of the timeseries at nx points
+    nx = len(x_bins) - 1
+    ny = len(y_bins) - 1
+    nsamples = arr.shape[0]
+    grid = np.zeros((ny, nx), dtype=float)
+    # For each (series, x-index), find which y-bin it falls into:
+    # - np.searchsorted(y_bins, value) gives the insertion index in y_bins;
+    #   --> subtracting 1 yields the bin index.
+    # If a value is not in y_bins, searchsorted returns 0, so idx = -1
+    indices = np.searchsorted(y_bins, arr) - 1  # (samples, nx)
+    # Assign 1 when line pass in a bin
+    valid = (indices >= 0) & (indices < ny)
+    s_idx, x_idx = np.nonzero(valid)
+    y_idx = indices[valid]
+    grid_3d = np.zeros((nsamples, ny, nx), dtype=int)
+    grid_3d[s_idx, y_idx, x_idx] = 1
+    # Normalize by columns
+    col_sums = grid_3d.sum(axis=1, keepdims=True)
+    col_sums[col_sums == 0] = 1  # Avoid division by zero
+    grid_3d = grid_3d / col_sums
+    # Normalize over samples
+    grid = grid_3d.sum(axis=0)
+    # Normalize grid
+    grid = normalize_array(grid, method=normalization)
+    # Create DataArray
+    name = da.name
+    out = xr.DataArray(grid, dims=[name, coord], coords={coord: (coord, x_values), name: (name, y_center)})
+    # Mask values which are 0 with NaN
+    out = out.where(out > 0)
+    # Return 2D histogram
+    return out

disdrodb 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

disdrodb 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl