PyPI - cloudnetpy - Versions diffs - 1.49.9__py3-none-any.whl → 1.87.3__py3-none-any.whl - Mend

cloudnetpy 1.49.9py3-none-any.whl → 1.87.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

cloudnetpy/categorize/__init__.py +1 -2
cloudnetpy/categorize/atmos_utils.py +297 -67
cloudnetpy/categorize/attenuation.py +31 -0
cloudnetpy/categorize/attenuations/__init__.py +37 -0
cloudnetpy/categorize/attenuations/gas_attenuation.py +30 -0
cloudnetpy/categorize/attenuations/liquid_attenuation.py +84 -0
cloudnetpy/categorize/attenuations/melting_attenuation.py +78 -0
cloudnetpy/categorize/attenuations/rain_attenuation.py +84 -0
cloudnetpy/categorize/categorize.py +332 -156
cloudnetpy/categorize/classify.py +127 -125
cloudnetpy/categorize/containers.py +107 -76
cloudnetpy/categorize/disdrometer.py +40 -0
cloudnetpy/categorize/droplet.py +23 -21
cloudnetpy/categorize/falling.py +53 -24
cloudnetpy/categorize/freezing.py +25 -12
cloudnetpy/categorize/insects.py +35 -23
cloudnetpy/categorize/itu.py +243 -0
cloudnetpy/categorize/lidar.py +36 -41
cloudnetpy/categorize/melting.py +34 -26
cloudnetpy/categorize/model.py +84 -37
cloudnetpy/categorize/mwr.py +18 -14
cloudnetpy/categorize/radar.py +215 -102
cloudnetpy/cli.py +578 -0
cloudnetpy/cloudnetarray.py +43 -89
cloudnetpy/concat_lib.py +218 -78
cloudnetpy/constants.py +28 -10
cloudnetpy/datasource.py +61 -86
cloudnetpy/exceptions.py +49 -20
cloudnetpy/instruments/__init__.py +5 -0
cloudnetpy/instruments/basta.py +29 -12
cloudnetpy/instruments/bowtie.py +135 -0
cloudnetpy/instruments/ceilo.py +138 -115
cloudnetpy/instruments/ceilometer.py +164 -80
cloudnetpy/instruments/cl61d.py +21 -5
cloudnetpy/instruments/cloudnet_instrument.py +74 -36
cloudnetpy/instruments/copernicus.py +108 -30
cloudnetpy/instruments/da10.py +54 -0
cloudnetpy/instruments/disdrometer/common.py +126 -223
cloudnetpy/instruments/disdrometer/parsivel.py +453 -94
cloudnetpy/instruments/disdrometer/thies.py +254 -87
cloudnetpy/instruments/fd12p.py +201 -0
cloudnetpy/instruments/galileo.py +65 -23
cloudnetpy/instruments/hatpro.py +123 -49
cloudnetpy/instruments/instruments.py +113 -1
cloudnetpy/instruments/lufft.py +39 -17
cloudnetpy/instruments/mira.py +268 -61
cloudnetpy/instruments/mrr.py +187 -0
cloudnetpy/instruments/nc_lidar.py +19 -8
cloudnetpy/instruments/nc_radar.py +109 -55
cloudnetpy/instruments/pollyxt.py +135 -51
cloudnetpy/instruments/radiometrics.py +313 -59
cloudnetpy/instruments/rain_e_h3.py +171 -0
cloudnetpy/instruments/rpg.py +321 -189
cloudnetpy/instruments/rpg_reader.py +74 -40
cloudnetpy/instruments/toa5.py +49 -0
cloudnetpy/instruments/vaisala.py +95 -343
cloudnetpy/instruments/weather_station.py +774 -105
cloudnetpy/metadata.py +90 -19
cloudnetpy/model_evaluation/file_handler.py +55 -52
cloudnetpy/model_evaluation/metadata.py +46 -20
cloudnetpy/model_evaluation/model_metadata.py +1 -1
cloudnetpy/model_evaluation/plotting/plot_tools.py +32 -37
cloudnetpy/model_evaluation/plotting/plotting.py +327 -117
cloudnetpy/model_evaluation/products/advance_methods.py +92 -83
cloudnetpy/model_evaluation/products/grid_methods.py +88 -63
cloudnetpy/model_evaluation/products/model_products.py +43 -35
cloudnetpy/model_evaluation/products/observation_products.py +41 -35
cloudnetpy/model_evaluation/products/product_resampling.py +17 -7
cloudnetpy/model_evaluation/products/tools.py +29 -20
cloudnetpy/model_evaluation/statistics/statistical_methods.py +30 -20
cloudnetpy/model_evaluation/tests/e2e/conftest.py +3 -3
cloudnetpy/model_evaluation/tests/e2e/process_cf/main.py +9 -5
cloudnetpy/model_evaluation/tests/e2e/process_cf/tests.py +15 -14
cloudnetpy/model_evaluation/tests/e2e/process_iwc/main.py +9 -5
cloudnetpy/model_evaluation/tests/e2e/process_iwc/tests.py +15 -14
cloudnetpy/model_evaluation/tests/e2e/process_lwc/main.py +9 -5
cloudnetpy/model_evaluation/tests/e2e/process_lwc/tests.py +15 -14
cloudnetpy/model_evaluation/tests/unit/conftest.py +42 -41
cloudnetpy/model_evaluation/tests/unit/test_advance_methods.py +41 -48
cloudnetpy/model_evaluation/tests/unit/test_grid_methods.py +216 -194
cloudnetpy/model_evaluation/tests/unit/test_model_products.py +23 -21
cloudnetpy/model_evaluation/tests/unit/test_observation_products.py +37 -38
cloudnetpy/model_evaluation/tests/unit/test_plot_tools.py +43 -40
cloudnetpy/model_evaluation/tests/unit/test_plotting.py +30 -36
cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py +68 -31
cloudnetpy/model_evaluation/tests/unit/test_tools.py +33 -26
cloudnetpy/model_evaluation/utils.py +2 -1
cloudnetpy/output.py +170 -111
cloudnetpy/plotting/__init__.py +2 -1
cloudnetpy/plotting/plot_meta.py +562 -822
cloudnetpy/plotting/plotting.py +1142 -704
cloudnetpy/products/__init__.py +1 -0
cloudnetpy/products/classification.py +370 -88
cloudnetpy/products/der.py +85 -55
cloudnetpy/products/drizzle.py +77 -34
cloudnetpy/products/drizzle_error.py +15 -11
cloudnetpy/products/drizzle_tools.py +79 -59
cloudnetpy/products/epsilon.py +211 -0
cloudnetpy/products/ier.py +27 -50
cloudnetpy/products/iwc.py +55 -48
cloudnetpy/products/lwc.py +96 -70
cloudnetpy/products/mwr_tools.py +186 -0
cloudnetpy/products/product_tools.py +170 -128
cloudnetpy/utils.py +455 -240
cloudnetpy/version.py +2 -2
{cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/METADATA +44 -40
cloudnetpy-1.87.3.dist-info/RECORD +127 -0
{cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/WHEEL +1 -1
cloudnetpy-1.87.3.dist-info/entry_points.txt +2 -0
docs/source/conf.py +2 -2
cloudnetpy/categorize/atmos.py +0 -361
cloudnetpy/products/mwr_multi.py +0 -68
cloudnetpy/products/mwr_single.py +0 -75
cloudnetpy-1.49.9.dist-info/RECORD +0 -112
{cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info/licenses}/LICENSE +0 -0
{cloudnetpy-1.49.9.dist-info → cloudnetpy-1.87.3.dist-info}/top_level.txt +0 -0

cloudnetpy/utils.py CHANGED Viewed

@@ -1,31 +1,35 @@
-""" This module contains general helper functions. """
+"""This module contains general helper functions."""
 import datetime
 import logging
 import os
 import re
+import textwrap
 import uuid
 import warnings
-from datetime import timezone
-from typing import Iterator
+from collections.abc import Callable, Iterator
+from os import PathLike
+from typing import Literal, TypeVar
 import netCDF4
 import numpy as np
-import requests
+import numpy.typing as npt
 from numpy import ma
 from scipy import ndimage, stats
-from scipy.interpolate import RectBivariateSpline, RegularGridInterpolator, griddata
+from scipy import ndimage as ndi
+from scipy.interpolate import (
+    RectBivariateSpline,
+    RegularGridInterpolator,
+    griddata,
+    interp1d,
+)
+from cloudnetpy.cloudnetarray import CloudnetArray
+from cloudnetpy.constants import SEC_IN_DAY, SEC_IN_HOUR, SEC_IN_MINUTE
 from cloudnetpy.exceptions import ValidTimeStampError
-Epoch = tuple[int, int, int]
-Date = tuple[str, str, str]
-SECONDS_PER_MINUTE = 60
-SECONDS_PER_HOUR = 3600
-SECONDS_PER_DAY = 86400
-def seconds2hours(time_in_seconds: np.ndarray) -> np.ndarray:
+def seconds2hours(time_in_seconds: npt.NDArray) -> npt.NDArray:
     """Converts seconds since some epoch to fraction hour.
     Args:
@@ -38,32 +42,19 @@ def seconds2hours(time_in_seconds: np.ndarray) -> np.ndarray:
         Excludes leap seconds.
     """
-    seconds_since_midnight = np.mod(time_in_seconds, SECONDS_PER_DAY)
-    fraction_hour = seconds_since_midnight / SECONDS_PER_HOUR
+    seconds_since_midnight = np.mod(time_in_seconds, SEC_IN_DAY)
+    fraction_hour = seconds_since_midnight / SEC_IN_HOUR
     if fraction_hour[-1] == 0:
         fraction_hour[-1] = 24
     return fraction_hour
-def seconds2time(time_in_seconds: float) -> list:
-    """Converts seconds since some epoch to time of day.
-    Args:
-        time_in_seconds: seconds since some epoch.
-    Returns:
-        list: [hours, minutes, seconds] formatted as '05' etc.
-    """
-    seconds_since_midnight = np.mod(time_in_seconds, SECONDS_PER_DAY)
-    hours = seconds_since_midnight // SECONDS_PER_HOUR
-    minutes = seconds_since_midnight % SECONDS_PER_HOUR // SECONDS_PER_MINUTE
-    seconds = seconds_since_midnight % SECONDS_PER_MINUTE
-    time = [hours, minutes, seconds]
-    return [str(t).zfill(2) for t in time]
-def seconds2date(time_in_seconds: float, epoch: Epoch = (2001, 1, 1)) -> list:
+def seconds2date(
+    time_in_seconds: float,
+    epoch: datetime.datetime = datetime.datetime(
+        2001, 1, 1, tzinfo=datetime.timezone.utc
+    ),
+) -> datetime.datetime:
     """Converts seconds since some epoch to datetime (UTC).
     Args:
@@ -71,31 +62,23 @@ def seconds2date(time_in_seconds: float, epoch: Epoch = (2001, 1, 1)) -> list:
         epoch: Epoch, default is (2001, 1, 1) (UTC).
     Returns:
-        [year, month, day, hours, minutes, seconds] formatted as '05' etc (UTC).
+        Datetime
     """
-    epoch_in_seconds = datetime.datetime.timestamp(
-        datetime.datetime(*epoch, tzinfo=timezone.utc)
-    )
-    timestamp = time_in_seconds + epoch_in_seconds
-    return (
-        datetime.datetime.utcfromtimestamp(timestamp)
-        .strftime("%Y %m %d %H %M %S")
-        .split()
-    )
+    return epoch + datetime.timedelta(seconds=float(time_in_seconds))
-def datetime2decimal_hours(data: np.ndarray | list) -> np.ndarray:
-    """Converts array of datetime to decimal_hours"""
+def datetime2decimal_hours(data: npt.NDArray | list) -> npt.NDArray:
+    """Converts array of datetime to decimal_hours."""
     output = []
     for timestamp in data:
         t = timestamp.time()
-        decimal_hours = t.hour + t.minute / 60 + t.second / 3600
+        decimal_hours = t.hour + t.minute / SEC_IN_MINUTE + t.second / SEC_IN_HOUR
         output.append(decimal_hours)
     return np.array(output)
-def time_grid(time_step: int = 30) -> np.ndarray:
+def time_grid(time_step: int = 30) -> npt.NDArray:
     """Returns decimal hour array between 0 and 24.
     Computes fraction hour time vector 0-24 with user-given
@@ -112,12 +95,13 @@ def time_grid(time_step: int = 30) -> np.ndarray:
     """
     if time_step < 1:
-        raise ValueError("Time resolution should be >= 1 seconds")
-    half_step = time_step / SECONDS_PER_HOUR / 2
+        msg = "Time resolution should be >= 1 seconds"
+        raise ValueError(msg)
+    half_step = time_step / SEC_IN_HOUR / 2
     return np.arange(half_step, 24 + half_step, half_step * 2)
-def binvec(x: np.ndarray | list) -> np.ndarray:
+def binvec(x: npt.NDArray | list) -> npt.NDArray:
     """Converts 1-D center points to bins with even spacing.
     Args:
@@ -136,60 +120,55 @@ def binvec(x: np.ndarray | list) -> np.ndarray:
     return np.linspace(edge1, edge2, len(x) + 1)
+REBIN_STAT = Literal["mean", "std", "max"]
+REBIN_STAT_FN: dict[REBIN_STAT, Callable] = {
+    "mean": ma.mean,
+    "std": ma.std,
+    "max": ma.max,
+}
 def rebin_2d(
-    x_in: np.ndarray,
-    array: ma.MaskedArray,
-    x_new: np.ndarray,
-    statistic: str = "mean",
+    x_in: npt.NDArray,
+    array: npt.NDArray,
+    x_new: npt.NDArray,
+    statistic: REBIN_STAT = "mean",
     n_min: int = 1,
-) -> tuple[ma.MaskedArray, list]:
-    """Rebins 2-D data in one dimension.
+    *,
+    keepdim: bool = False,
+    mask_zeros: bool = False,
+) -> tuple[ma.MaskedArray, npt.NDArray]:
+    edges = binvec(x_new)
+    binn = np.digitize(x_in, edges) - 1
+    n_bins = len(x_new)
+    counts = np.bincount(binn[binn >= 0], minlength=n_bins)
-    Args:
-        x_in: 1-D array with shape (n,).
-        array: 2-D input data with shape (n, m).
-        x_new: 1-D target vector (center points) with shape (N,).
-        statistic: Statistic to be calculated. Possible statistics are 'mean', 'std'.
-            Default is 'mean'.
-        n_min: Minimum number of points to have good statistics in a bin. Default is 1.
+    stat_fn = REBIN_STAT_FN[statistic]
-    Returns:
-        tuple: Rebinned data with shape (N, m) and indices of bins without enough data.
+    shape = array.shape if keepdim else (n_bins, array.shape[1])
+    result: ma.MaskedArray = ma.masked_array(np.ones(shape, dtype="float32"), mask=True)
-    Notes:
-        0-values are masked in the returned array.
+    for bin_ind in range(n_bins):
+        if counts[bin_ind] < n_min:
+            continue
+        mask = binn == bin_ind
+        block = array[mask, :]
+        x_ind = mask if keepdim else bin_ind
+        result[x_ind, :] = stat_fn(block, axis=0)
-    """
-    edges = binvec(x_new)
-    result = np.zeros((len(x_new), array.shape[1]))
-    array_screened = ma.masked_invalid(array, copy=True)  # data may contain nan-values
-    for ind, values in enumerate(array_screened.T):
-        mask = ~values.mask
-        if ma.any(values[mask]):
-            result[:, ind], _, _ = stats.binned_statistic(
-                x_in[mask], values[mask], statistic=statistic, bins=edges
-            )
-    result[~np.isfinite(result)] = 0
-    masked_result = ma.masked_equal(result, 0)
+    empty_bins = np.where(counts < n_min)[0]
-    # Fill bins with not enough profiles
-    empty_indices = []
-    for ind in range(len(edges) - 1):
-        is_data = np.where((x_in > edges[ind]) & (x_in <= edges[ind + 1]))[0]
-        if len(is_data) < n_min:
-            masked_result[ind, :] = ma.masked
-            empty_indices.append(ind)
-    if len(empty_indices) > 0:
-        logging.debug(f"No radar data in {len(empty_indices)} bins")
+    if mask_zeros:
+        result[result == 0] = ma.masked
-    return masked_result, empty_indices
+    return result, empty_bins
 def rebin_1d(
-    x_in: np.ndarray,
-    array: np.ndarray | ma.MaskedArray,
-    x_new: np.ndarray,
-    statistic: str = "mean",
+    x_in: npt.NDArray,
+    array: npt.NDArray | ma.MaskedArray,
+    x_new: npt.NDArray,
+    statistic: REBIN_STAT = "mean",
 ) -> ma.MaskedArray:
     """Rebins 1D array.
@@ -201,22 +180,24 @@ def rebin_1d(
             Default is 'mean'.
     Returns:
-        Rebinned data with shape (N,).
+        Re-binned data with shape (N,).
     """
     edges = binvec(x_new)
-    result = np.zeros(len(x_new))
+    result = ma.zeros(len(x_new))
     array_screened = ma.masked_invalid(array, copy=True)  # data may contain nan-values
-    mask = ~array_screened.mask  # pylint: disable=E1101
+    mask = ~array_screened.mask
     if ma.any(array_screened[mask]):
         result, _, _ = stats.binned_statistic(
-            x_in[mask], array_screened[mask], statistic=statistic, bins=edges
+            x_in[mask],
+            array_screened[mask],
+            statistic=statistic,
+            bins=edges,
         )
-    result[~np.isfinite(result)] = 0
-    return ma.masked_equal(result, 0)
+    return ma.masked_invalid(result, copy=True)
-def filter_isolated_pixels(array: np.ndarray) -> np.ndarray:
+def filter_isolated_pixels(array: npt.NDArray) -> npt.NDArray:
     """From a 2D boolean array, remove completely isolated single cells.
     Args:
@@ -236,7 +217,7 @@ def filter_isolated_pixels(array: np.ndarray) -> np.ndarray:
     return _filter(array, structure)
-def filter_x_pixels(array: np.ndarray) -> np.ndarray:
+def filter_x_pixels(array: npt.NDArray) -> npt.NDArray:
     """From a 2D boolean array, remove cells isolated in x-direction.
     Args:
@@ -259,7 +240,7 @@ def filter_x_pixels(array: np.ndarray) -> np.ndarray:
     return _filter(array, structure)
-def _filter(array: np.ndarray, structure: np.ndarray) -> np.ndarray:
+def _filter(array: npt.NDArray, structure: npt.NDArray) -> npt.NDArray:
     filtered_array = np.copy(array)
     id_regions, num_ids = ndimage.label(filtered_array, structure=structure)
     id_sizes = np.array(ndimage.sum(array, id_regions, range(num_ids + 1))).astype(int)
@@ -268,8 +249,19 @@ def _filter(array: np.ndarray, structure: np.ndarray) -> np.ndarray:
     return filtered_array
-def isbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
-    """Tests if nth bit (0,1,2..) is set.
+def remove_small_objects(
+    mask: npt.NDArray, max_size: int, connectivity: int
+) -> npt.NDArray:
+    """Removes small connected components from boolean mask."""
+    structure = ndi.generate_binary_structure(mask.ndim, connectivity)
+    labels, num = ndi.label(mask, structure=structure)
+    sizes = ndi.sum(mask, labels, index=np.arange(1, num + 1))
+    keep_labels = np.where(sizes > max_size)[0] + 1
+    return np.isin(labels, keep_labels)
+def isbit(array: npt.NDArray, nth_bit: int) -> npt.NDArray:
+    """Tests if nth bit (0,1,2,...) is set.
     Args:
         array: Integer array.
@@ -287,17 +279,18 @@ def isbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
         >>> isbit(4, 2)
             True
-    See also:
+    See Also:
         utils.setbit()
     """
     if nth_bit < 0:
-        raise ValueError("Negative bit number")
+        msg = "Negative bit number"
+        raise ValueError(msg)
     mask = 1 << nth_bit
     return array & mask > 0
-def setbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
+def setbit(array: npt.NDArray, nth_bit: int) -> npt.NDArray:
     """Sets nth bit (0, 1, 2, ...) on number.
     Args:
@@ -316,24 +309,25 @@ def setbit(array: np.ndarray, nth_bit: int) -> np.ndarray:
         >>> setbit(0, 2)
             4
-    See also:
+    See Also:
         utils.isbit()
     """
     if nth_bit < 0:
-        raise ValueError("Negative bit number")
+        msg = "Negative bit number"
+        raise ValueError(msg)
     mask = 1 << nth_bit
     array |= mask
     return array
 def interpolate_2d(
-    x: np.ndarray,
-    y: np.ndarray,
-    z: np.ndarray,
-    x_new: np.ndarray,
-    y_new: np.ndarray,
-) -> np.ndarray:
+    x: npt.NDArray,
+    y: npt.NDArray,
+    z: npt.NDArray,
+    x_new: npt.NDArray,
+    y_new: npt.NDArray,
+) -> npt.NDArray:
     """Linear interpolation of gridded 2d data.
     Args:
@@ -355,11 +349,11 @@ def interpolate_2d(
 def interpolate_2d_mask(
-    x: np.ndarray,
-    y: np.ndarray,
+    x: npt.NDArray,
+    y: npt.NDArray,
     z: ma.MaskedArray,
-    x_new: np.ndarray,
-    y_new: np.ndarray,
+    x_new: npt.NDArray,
+    y_new: npt.NDArray,
 ) -> ma.MaskedArray:
     """2D linear interpolation preserving the mask.
@@ -378,32 +372,34 @@ def interpolate_2d_mask(
         interpolation. Input data may contain nan-values.
     """
-    z = ma.array(ma.masked_invalid(z, copy=True))  # ma.array() to avoid pylint nag
+    z = ma.array(ma.masked_invalid(z, copy=True))
     # Interpolate ignoring masked values:
-    valid_points = np.logical_not(z.mask)  # ~z.mask causes pylint nag
+    valid_points = np.logical_not(z.mask)
     xx, yy = np.meshgrid(y, x)
     x_valid = xx[valid_points]
     y_valid = yy[valid_points]
     z_valid = z[valid_points]
     xx_new, yy_new = np.meshgrid(y_new, x_new)
     data = griddata(
-        (x_valid, y_valid), z_valid.ravel(), (xx_new, yy_new), method="linear"
+        (x_valid, y_valid),
+        z_valid.ravel(),
+        (xx_new, yy_new),
+        method="linear",
     )
     # Preserve mask:
-    mask_fun = RectBivariateSpline(x, y, z.mask[:], kx=1, ky=1)
+    mask_fun = RectBivariateSpline(x, y, ma.getmaskarray(z), kx=1, ky=1)
     mask = mask_fun(x_new, y_new)
     mask[mask < 0.5] = 0
     masked_array = ma.array(data, mask=mask.astype(bool))
-    masked_array = ma.masked_invalid(masked_array)
-    return masked_array
+    return ma.masked_invalid(masked_array)
 def interpolate_2d_nearest(
-    x: np.ndarray,
-    y: np.ndarray,
-    z: np.ndarray,
-    x_new: np.ndarray,
-    y_new: np.ndarray,
+    x: npt.NDArray,
+    y: npt.NDArray,
+    z: ma.MaskedArray,
+    x_new: npt.NDArray,
+    y_new: npt.NDArray,
 ) -> ma.MaskedArray:
     """2D nearest neighbor interpolation preserving mask.
@@ -421,25 +417,111 @@ def interpolate_2d_nearest(
         Points outside the original range will be interpolated but masked.
     """
-    data = ma.copy(z)
+    data = ma.filled(z, np.nan)
     fun = RegularGridInterpolator(
         (x, y),
         data,
         method="nearest",
         bounds_error=False,
-        fill_value=ma.masked,
     )
     xx, yy = np.meshgrid(x_new, y_new)
-    return fun((xx, yy)).T
+    zz = fun((xx, yy)).T
+    return ma.masked_where(np.isnan(zz), zz)
+def interpolate_2D_along_y(
+    y: npt.NDArray,
+    z: npt.NDArray | ma.MaskedArray,
+    y_new: npt.NDArray,
+) -> ma.MaskedArray:
+    """Fast 1D nearest-neighbor interpolation along y for each x.
+    Args:
+        y: 1D numpy array of y-coordinates (length M).
+        z: 2D array of shape (N, M).
+        y_new: 1D numpy array of new y-coordinates.
+    Returns:
+        Masked 2D masked array interpolated along y.
-def calc_relative_error(reference: np.ndarray, array: np.ndarray) -> np.ndarray:
+    Notes:
+        Only interpolates along y. Points outside range are masked.
+    """
+    idx = np.searchsorted(y, y_new, side="left")
+    idx = np.clip(idx, 0, len(y) - 1)
+    left = np.maximum(idx - 1, 0)
+    choose_right = (idx == 0) | (
+        (idx < len(y)) & (np.abs(y[idx] - y_new) < np.abs(y_new - y[left]))
+    )
+    idx[~choose_right] = left[~choose_right]
+    z_interp = ma.array(z[:, idx])
+    z_mask = ma.getmaskarray(z_interp)
+    mask = (y_new < y.min()) | (y_new > y.max())
+    z_mask[:, mask] = True
+    return ma.MaskedArray(z_interp, mask=z_mask)
+def interpolate_1d(
+    time: npt.NDArray,
+    y: ma.MaskedArray,
+    time_new: npt.NDArray,
+    max_time: float,
+    method: str = "linear",
+) -> ma.MaskedArray:
+    """1D linear interpolation preserving the mask.
+    Args:
+        time: 1D array in fraction hour.
+        y: 1D array, data values.
+        time_new: 1D array, new time coordinates.
+        max_time: Maximum allowed gap in minutes. Values outside this gap will
+            be masked.
+        method: Interpolation method, 'linear' (default) or 'nearest'.
+    """
+    if np.max(time) > 24 or np.min(time) < 0:
+        msg = "Time vector must be in fraction hours between 0 and 24"
+        raise ValueError(msg)
+    if ma.is_masked(y):
+        if y.mask.all():
+            return ma.masked_all(time_new.shape)
+        time = time[~y.mask]
+        y = y[~y.mask]
+    fun = interp1d(time, y, kind=method, fill_value=(y[0], y[-1]), bounds_error=False)
+    interpolated = ma.array(fun(time_new))
+    bad_idx = get_gap_ind(time, time_new, max_time / 60)
+    if len(bad_idx) > 0:
+        msg = f"Unable to interpolate for {len(bad_idx)} time steps"
+        logging.warning(msg)
+        interpolated[bad_idx] = ma.masked
+    return interpolated
+def get_gap_ind(
+    grid: npt.NDArray, new_grid: npt.NDArray, threshold: float
+) -> list[int]:
+    """Finds indices in new_grid that are too far from grid."""
+    if grid.size == 0:
+        return list(range(len(new_grid)))
+    idxs = np.searchsorted(grid, new_grid)
+    left_dist = np.where(idxs > 0, np.abs(new_grid - grid[idxs - 1]), np.inf)
+    right_dist = np.where(
+        idxs < len(grid),
+        np.abs(new_grid - grid[np.clip(idxs, 0, len(grid) - 1)]),
+        np.inf,
+    )
+    nearest = np.minimum(left_dist, right_dist)
+    return np.where(nearest > threshold)[0].tolist()
+def calc_relative_error(reference: npt.NDArray, array: npt.NDArray) -> npt.NDArray:
     """Calculates relative error (%)."""
     return ((array - reference) / reference) * 100
-def db2lin(array: float | np.ndarray, scale: int = 10) -> np.ndarray:
-    """dB to linear conversion."""
+def db2lin(array: float | npt.NDArray, scale: int = 10) -> npt.NDArray:
+    """DB to linear conversion."""
     data = array / scale
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -448,19 +530,19 @@ def db2lin(array: float | np.ndarray, scale: int = 10) -> np.ndarray:
         return np.power(10, data)
-def lin2db(array: np.ndarray, scale: int = 10) -> np.ndarray:
+def lin2db(array: npt.NDArray, scale: int = 10) -> npt.NDArray:
     """Linear to dB conversion."""
     if ma.isMaskedArray(array):
         return scale * ma.log10(array)
     return scale * np.log10(array)
-def mdiff(array: np.ndarray) -> float:
+def mdiff(array: npt.NDArray) -> float:
     """Returns median difference of 1-D array."""
     return float(ma.median(ma.diff(array)))
-def l2norm(*args) -> ma.MaskedArray:
+def l2norm(*args: npt.NDArray | float) -> ma.MaskedArray:
     """Returns l2 norm.
     Args:
@@ -470,20 +552,23 @@ def l2norm(*args) -> ma.MaskedArray:
         The l2 norm.
     """
-    ss = 0
+    arg_cpy: float | npt.NDArray
+    ss: float | npt.NDArray = 0
     for arg in args:
         if isinstance(arg, ma.MaskedArray):
             # Raise only non-masked values, not sure if this is needed...
-            arg = ma.copy(arg)
-            arg[~arg.mask] = arg[~arg.mask] ** 2
+            arg_cpy = ma.copy(arg)
+            arg_cpy[~arg.mask] = arg_cpy[~arg.mask] ** 2
         else:
-            arg = arg**2
-        ss = ss + arg
+            arg_cpy = arg**2
+        ss = ss + arg_cpy
     return ma.sqrt(ss)
 def l2norm_weighted(
-    values: tuple, overall_scale: float, term_weights: tuple
+    values: tuple,
+    overall_scale: float,
+    term_weights: tuple,
 ) -> ma.MaskedArray:
     """Calculates scaled and weighted Euclidean distance.
@@ -503,12 +588,12 @@ def l2norm_weighted(
     TODO: Use masked arrays instead of tuples.
     """
-    generic_values = ma.array(values, dtype=object)
+    generic_values: ma.MaskedArray = ma.array(values, dtype=object)
     weighted_values = ma.multiply(generic_values, term_weights)
     return overall_scale * l2norm(*weighted_values)
-def cumsumr(array: np.ndarray, axis: int = 0) -> np.ndarray:
+def cumsumr(array: npt.NDArray, axis: int = 0) -> npt.NDArray:
     """Finds cumulative sum that resets on 0.
     Args:
@@ -525,12 +610,10 @@ def cumsumr(array: np.ndarray, axis: int = 0) -> np.ndarray:
     """
     cums = array.cumsum(axis=axis)
-    return cums - np.maximum.accumulate(
-        cums * (array == 0), axis=axis
-    )  # pylint: disable=E1101
+    return cums - np.maximum.accumulate(cums * (array == 0), axis=axis)
-def ffill(array: np.ndarray, value: int = 0) -> np.ndarray:
+def ffill(array: npt.NDArray, value: int = 0) -> npt.NDArray:
     """Forward fills an array.
     Args:
@@ -552,15 +635,19 @@ def ffill(array: np.ndarray, value: int = 0) -> np.ndarray:
     ndims = len(array.shape)
     ran = np.arange(array.shape[ndims - 1])
     idx = np.where((array != value), ran, 0)
-    idx = np.maximum.accumulate(idx, axis=ndims - 1)  # pylint: disable=E1101
+    idx = np.maximum.accumulate(idx, axis=ndims - 1)
     if ndims == 2:
         return array[np.arange(idx.shape[0])[:, None], idx]
     return array[idx]
 def init(
-    n_vars: int, shape: tuple, dtype: type = float, masked: bool = True
-) -> Iterator[np.ndarray | ma.MaskedArray]:
+    n_vars: int,
+    shape: tuple,
+    dtype: type = float,
+    *,
+    masked: bool = True,
+) -> Iterator[npt.NDArray | ma.MaskedArray]:
     """Initializes several numpy arrays.
     Args:
@@ -590,14 +677,14 @@ def init(
             yield np.zeros(shape, dtype=dtype)
-def n_elements(array: np.ndarray, dist: float, var: str | None = None) -> int:
+def n_elements(array: npt.NDArray, dist: float, var: str | None = None) -> int:
     """Returns the number of elements that cover certain distance.
     Args:
         array: Input array with arbitrary units or time in fraction hour. *x* should
             be evenly spaced or at least close to.
         dist: Distance to be covered. If x is fraction time, *dist* is in minutes.
-            Otherwise *x* and *dist* should have the same units.
+            Otherwise, *x* and *dist* should have the same units.
         var: If 'time', input is fraction hour and distance in minutes, else inputs
             have the same units. Default is None (same units).
@@ -625,11 +712,11 @@ def n_elements(array: np.ndarray, dist: float, var: str | None = None) -> int:
     """
     n = dist / mdiff(array)
     if var == "time":
-        n = n / 60
+        n = n / SEC_IN_MINUTE
     return int(np.round(n))
-def isscalar(array) -> bool:
+def isscalar(array: npt.NDArray | float | list | netCDF4.Variable) -> bool:
     """Tests if input is scalar.
     By "scalar" we mean that array has a single value.
@@ -645,52 +732,60 @@ def isscalar(array) -> bool:
             True
     """
-    arr = ma.array(array)
-    if not hasattr(arr, "__len__") or arr.shape == () or len(arr) == 1:
-        return True
-    return False
+    arr: ma.MaskedArray = ma.array(array)
+    return not hasattr(arr, "__len__") or arr.shape == () or len(arr) == 1
 def get_time() -> str:
     """Returns current UTC-time."""
-    return f"{datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')} +00:00"
+    t_zone = datetime.timezone.utc
+    form = "%Y-%m-%d %H:%M:%S"
+    return f"{datetime.datetime.now(tz=t_zone).strftime(form)} +00:00"
 def date_range(
-    start_date: datetime.date, end_date: datetime.date
+    start_date: datetime.date,
+    end_date: datetime.date,
 ) -> Iterator[datetime.date]:
     """Returns range between two dates (datetimes)."""
     for n in range(int((end_date - start_date).days)):
         yield start_date + datetime.timedelta(n)
-def get_uuid() -> str:
-    """Returns unique identifier."""
-    return str(uuid.uuid4())
+def get_uuid(input_uuid: str | uuid.UUID | None) -> uuid.UUID:
+    """Parse or generate unique identifier."""
+    if input_uuid is None:
+        return uuid.uuid4()
+    if isinstance(input_uuid, str):
+        return uuid.UUID(input_uuid)
+    return input_uuid
-def get_wl_band(radar_frequency: float) -> int:
-    """Returns integer corresponding to radar frequency.
+def get_wl_band(radar_frequency: float) -> Literal["X", "Ka", "W"]:
+    """Returns IEEE radar band corresponding to radar frequency.
     Args:
         radar_frequency: Radar frequency (GHz).
     Returns:
-        0 = 35GHz radar, 1 = 94Ghz radar.
+        IEEE radar band as string.
     """
-    return 0 if (30 < radar_frequency < 40) else 1
-def get_frequency(wl_band: int) -> str:
-    """Returns radar frequency string corresponding to wl band."""
-    return "35.5" if wl_band == 0 else "94"
+    if 8 < radar_frequency < 12:
+        return "X"
+    if 27 < radar_frequency < 40:
+        return "Ka"
+    if 75 < radar_frequency < 110:
+        return "W"
+    msg = f"Unknown band: {radar_frequency} GHz"
+    raise ValueError(msg)
-def transpose(data: np.ndarray) -> np.ndarray:
+def transpose(data: npt.NDArray) -> npt.NDArray:
     """Transposes numpy array of (n, ) to (n, 1)."""
     if data.ndim != 1 or len(data) <= 1:
-        raise ValueError("Invalid input array shape")
+        msg = "Invalid input array shape"
+        raise ValueError(msg)
     return data[:, np.newaxis]
@@ -713,8 +808,12 @@ def del_dict_keys(data: dict, keys: tuple | list) -> dict:
 def array_to_probability(
-    array: np.ndarray, loc: float, scale: float, invert: bool = False
-) -> np.ndarray:
+    array: npt.NDArray,
+    loc: float,
+    scale: float,
+    *,
+    invert: bool = False,
+) -> npt.NDArray:
     """Converts continuous variable into 0-1 probability.
     Args:
@@ -740,7 +839,7 @@ def array_to_probability(
     return prob
-def range_to_height(range_los: np.ndarray, tilt_angle: float) -> np.ndarray:
+def range_to_height(range_los: npt.NDArray, tilt_angle: float) -> npt.NDArray:
     """Converts distances from a tilted instrument to height above the ground.
     Args:
@@ -759,27 +858,21 @@ def range_to_height(range_los: np.ndarray, tilt_angle: float) -> np.ndarray:
 def is_empty_line(line: str) -> bool:
     """Tests if a line (of a text file) is empty."""
-    if line in ("\n", "\r\n"):
-        return True
-    return False
+    return line in ("\n", "\r\n")
 def is_timestamp(timestamp: str) -> bool:
-    """Tests if the input string is formatted as -yyyy-mm-dd hh:mm:ss"""
+    """Tests if the input string is formatted as -yyyy-mm-dd hh:mm:ss."""
     reg_exp = re.compile(r"-\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}")
-    if reg_exp.match(timestamp) is not None:
-        return True
-    return False
+    return reg_exp.match(timestamp) is not None
-def get_sorted_filenames(file_path: str, extension: str) -> list:
+def get_sorted_filenames(file_path: str | PathLike, extension: str) -> list[str]:
     """Returns full paths of files with some extension, sorted by filename."""
     extension = extension.lower()
     all_files = os.listdir(file_path)
     files = [
-        "/".join((file_path, file))
-        for file in all_files
-        if file.lower().endswith(extension)
+        f"{file_path}/{file}" for file in all_files if file.lower().endswith(extension)
     ]
     files.sort()
     return files
@@ -793,23 +886,16 @@ def str_to_numeric(value: str) -> int | float:
         return float(value)
-def fetch_cloudnet_model_types() -> list:
-    """Finds different model types."""
-    url = "https://cloudnet.fmi.fi/api/models"
-    data = requests.get(url=url, timeout=60).json()
-    models = [model["id"] for model in data]
-    model_types = [model.split("-")[0] for model in models]
-    return list(set(model_types))
-def get_epoch(units: str) -> Epoch:
+def get_epoch(units: str) -> datetime.datetime:
     """Finds epoch from units string."""
-    fallback = (2001, 1, 1)
+    fallback = datetime.datetime(2001, 1, 1, tzinfo=datetime.timezone.utc)
     try:
         date = units.split()[2]
     except IndexError:
         return fallback
     date = date.replace(",", "")
+    if "T" in date:
+        date = date[: date.index("T")]
     try:
         date_components = [int(x) for x in date.split("-")]
     except ValueError:
@@ -818,13 +904,15 @@ def get_epoch(units: str) -> Epoch:
         except ValueError:
             return fallback
     year, month, day = date_components
-    current_year = datetime.datetime.today().year
+    current_year = datetime.datetime.now(tz=datetime.timezone.utc).year
     if (1900 < year <= current_year) and (0 < month < 13) and (0 < day < 32):
-        return year, month, day
+        return datetime.datetime(year, month, day, tzinfo=datetime.timezone.utc)
     return fallback
-def screen_by_time(data_in: dict, epoch: Epoch, expected_date: str) -> dict:
+def screen_by_time(
+    data_in: dict, epoch: datetime.datetime, expected_date: datetime.date
+) -> dict:
     """Screen data by time.
     Args:
@@ -861,7 +949,9 @@ def screen_by_time(data_in: dict, epoch: Epoch, expected_date: str) -> dict:
     return data
-def find_valid_time_indices(time: np.ndarray, epoch: Epoch, expected_date: str) -> list:
+def find_valid_time_indices(
+    time: npt.NDArray, epoch: datetime.datetime, expected_date: datetime.date
+) -> list[int]:
     """Finds valid time array indices for the given date.
     Args:
@@ -884,15 +974,15 @@ def find_valid_time_indices(time: np.ndarray, epoch: Epoch, expected_date: str)
     ind_sorted = np.argsort(time)
     ind_valid: list[int] = []
     for ind in ind_sorted:
-        date_str = "-".join(seconds2date(time[ind], epoch=epoch)[:3])
-        if date_str == expected_date and time[ind] not in time[ind_valid]:
+        date = seconds2date(time[ind], epoch=epoch).date()
+        if date == expected_date and time[ind] not in time[ind_valid]:
             ind_valid.append(ind)
     if not ind_valid:
         raise ValidTimeStampError
     return ind_valid
-def append_data(data_in: dict, key: str, array: np.ndarray) -> dict:
+def append_data(data_in: dict, key: str, array: npt.NDArray) -> dict:
     """Appends data to a dictionary field (creates the field if not yet present).
     Args:
@@ -909,7 +999,7 @@ def append_data(data_in: dict, key: str, array: np.ndarray) -> dict:
     return data
-def edges2mid(data: np.ndarray, reference: str) -> np.ndarray:
+def edges2mid(data: npt.NDArray, reference: Literal["upper", "lower"]) -> npt.NDArray:
     """Shifts values half bin towards up or down.
     Args:
@@ -920,8 +1010,6 @@ def edges2mid(data: np.ndarray, reference: str) -> np.ndarray:
         Shifted values.
     """
-    if reference not in ("lower", "upper"):
-        raise ValueError
     gaps = (data[1:] - data[0:-1]) / 2
     if reference == "lower":
         gaps = np.append(gaps, gaps[-1])
@@ -930,29 +1018,156 @@ def edges2mid(data: np.ndarray, reference: str) -> np.ndarray:
     return data - gaps
-def get_file_type(filename: str) -> str:
-    """Returns cloudnet file type from new and legacy files."""
-    with netCDF4.Dataset(filename) as nc:
-        if hasattr(nc, "cloudnet_file_type"):
-            file_type = nc.cloudnet_file_type
-            return file_type
-    product = filename.split("_")[-1][:-3]
-    if product in ("categorize", "classification", "drizzle"):
-        return product
-    if product[:3] in ("lwc", "iwc"):
-        return product[:3]
-    raise ValueError("Unknown file type")
-def get_files_with_common_range(files: list) -> list:
-    """Returns files with the same (most common) number of range gates."""
-    n_range = []
-    for file in files:
+def get_files_with_variables(filenames: list, variables: list[str]) -> list:
+    """Returns files where all variables exist."""
+    valid_files = []
+    for file in filenames:
         with netCDF4.Dataset(file) as nc:
-            n_range.append(len(nc.variables["range"]))
-    most_common = np.bincount(n_range).argmax()
-    n_removed = len([n for n in n_range if n != most_common])
-    if n_removed > 0:
-        logging.warning(f"Removing {n_removed} files due to inconsistent height vector")
-    ind = np.where(n_range == most_common)[0]
-    return [file for i, file in enumerate(files) if i in ind]
+            for variable in variables:
+                if variable not in nc.variables:
+                    break
+            else:
+                valid_files.append(file)
+    return valid_files
+def is_all_masked(array: npt.NDArray) -> bool:
+    """Tests if all values are masked."""
+    return bool(ma.isMaskedArray(array) and hasattr(array, "mask") and array.mask.all())
+def find_masked_profiles_indices(array: ma.MaskedArray) -> list:
+    """Finds indices of masked profiles in a 2-D array."""
+    non_masked_counts = np.ma.count(array, axis=1)
+    masked_profiles_indices = np.where(non_masked_counts == 0)[0]
+    return list(masked_profiles_indices)
+T = TypeVar("T", int, str)
+def _format_definition(kind: str, definitions: dict[T, str]) -> str:
+    lines = [""]
+    for key, value in definitions.items():
+        prefix = f"{kind} {key}: "
+        indent = " " * len(prefix)
+        text = " ".join(value.split())
+        wrapped = textwrap.wrap(prefix + text, subsequent_indent=indent)
+        lines.extend(wrapped)
+    return "\n".join(lines)
+def status_field_definition(definitions: dict[T, str]) -> str:
+    return _format_definition("Value", definitions)
+def bit_field_definition(definitions: dict[T, str]) -> str:
+    return _format_definition("Bit", definitions)
+def path_lengths_from_ground(height_agl: npt.NDArray) -> npt.NDArray:
+    return np.diff(height_agl, prepend=0)
+def add_site_geolocation(
+    data: dict,
+    *,
+    gps: bool,
+    site_meta: dict | None = None,
+    dataset: netCDF4.Dataset | None = None,
+) -> None:
+    tmp_data = {}
+    tmp_source = {}
+    value: npt.NDArray | float | None
+    for key in ("latitude", "longitude", "altitude"):
+        value = None
+        source = None
+        # Prefer accurate GPS coordinates. Don't trust altitude because its less
+        # accurate and at least in Lindenberg BASTA there are large jumps.
+        if gps and key != "altitude":
+            values = None
+            if isinstance(dataset, netCDF4.Dataset) and key in dataset.variables:
+                values = dataset[key][:]
+            elif key in data:
+                values = data[key].data
+            if (
+                values is not None
+                and not np.all(ma.getmaskarray(values))
+                and np.any(values != 0)
+            ):
+                value = ma.masked_where(values == 0, values)
+                source = "GPS"
+        # User-supplied site coordinate.
+        if value is None and site_meta is not None and key in site_meta:
+            value = np.array(float(site_meta[key]))
+            source = "site coordinates"
+        # From source data (CHM15k, CL61, MRR-PRO, Copernicus, Galileo...).
+        # Assume value is manually set, so cannot trust it.
+        if (
+            value is None
+            and isinstance(dataset, netCDF4.Dataset)
+            and key in dataset.variables
+            and not np.all(ma.getmaskarray(dataset[key][:]))
+        ):
+            value = dataset[key][:]
+            source = "raw file"
+        # From source global attributes (MIRA).
+        # Seems to be manually set, so cannot trust it.
+        if (
+            value is None
+            and isinstance(dataset, netCDF4.Dataset)
+            and hasattr(dataset, key.capitalize())
+        ):
+            value = _parse_global_attribute_numeral(dataset, key.capitalize())
+            source = "raw file"
+        if value is not None:
+            tmp_data[key] = value
+            tmp_source[key] = source
+    if "latitude" in tmp_data and "longitude" in tmp_data:
+        lat = np.atleast_1d(tmp_data["latitude"])
+        lon = np.atleast_1d(tmp_data["longitude"])
+        lon[lon > 180] - 360
+        if _are_stationary(lat, lon):
+            tmp_data["latitude"] = float(ma.mean(lat))
+            tmp_data["longitude"] = float(ma.mean(lon))
+        else:
+            tmp_data["latitude"] = lat
+            tmp_data["longitude"] = lon
+    if "altitude" in tmp_data:
+        alt = np.atleast_1d(tmp_data["altitude"])
+        if ma.max(alt) - ma.min(alt) < 100:
+            tmp_data["altitude"] = float(ma.mean(alt))
+    for key in ("latitude", "longitude", "altitude"):
+        if key in tmp_data:
+            data[key] = CloudnetArray(
+                tmp_data[key],
+                key,
+                source=tmp_source[key],
+                dimensions=None if isinstance(tmp_data[key], float) else ("time",),
+            )
+def _parse_global_attribute_numeral(dataset: netCDF4.Dataset, key: str) -> float | None:
+    new_str = ""
+    attr = getattr(dataset, key)
+    if attr == "Unknown":
+        return None
+    for char in attr:
+        if char.isdigit() or char == ".":
+            new_str += char
+    return float(new_str)
+def _are_stationary(latitude: npt.NDArray, longitude: npt.NDArray) -> bool:
+    min_lat, max_lat = np.min(latitude), np.max(latitude)
+    min_lon, max_lon = np.min(longitude), np.max(longitude)
+    lat_threshold = 0.01  # deg, around 1 km
+    avg_lat = (min_lat + max_lat) / 2
+    lon_threshold = lat_threshold / np.cos(np.radians(avg_lat))
+    lat_diff = max_lat - min_lat
+    lon_diff = max_lon - min_lon
+    return lat_diff <= lat_threshold and lon_diff <= lon_threshold

cloudnetpy 1.49.9__py3-none-any.whl → 1.87.3__py3-none-any.whl

cloudnetpy 1.49.9py3-none-any.whl → 1.87.3py3-none-any.whl