PyPI - disdrodb - Versions diffs - 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

disdrodb 0.1.4py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

disdrodb/__init__.py +1 -5
disdrodb/_version.py +2 -2
disdrodb/accessor/methods.py +14 -3
disdrodb/api/checks.py +10 -0
disdrodb/api/create_directories.py +0 -2
disdrodb/api/io.py +14 -17
disdrodb/api/path.py +42 -77
disdrodb/api/search.py +89 -23
disdrodb/cli/disdrodb_create_summary.py +11 -1
disdrodb/cli/disdrodb_create_summary_station.py +10 -0
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0a.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l1.py +1 -1
disdrodb/cli/disdrodb_run_l2e.py +1 -1
disdrodb/cli/disdrodb_run_l2m.py +1 -1
disdrodb/configs.py +30 -83
disdrodb/constants.py +4 -3
disdrodb/data_transfer/download_data.py +4 -2
disdrodb/docs.py +2 -2
disdrodb/etc/products/L1/1MIN.yaml +13 -0
disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
disdrodb/etc/products/L1/global.yaml +7 -1
disdrodb/etc/products/L2E/10MIN.yaml +1 -12
disdrodb/etc/products/L2E/5MIN.yaml +1 -0
disdrodb/etc/products/L2E/global.yaml +1 -1
disdrodb/etc/products/L2M/MODELS/GAMMA_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +1 -1
disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/MODELS/LOGNORMAL_GS_ND_MAE.yaml +6 -0
disdrodb/etc/products/L2M/MODELS/LOGNORMAL_ML.yaml +8 -0
disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +11 -3
disdrodb/l0/check_configs.py +49 -16
disdrodb/l0/configs/LPM/l0a_encodings.yml +2 -2
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +2 -2
disdrodb/l0/configs/LPM/l0b_encodings.yml +2 -2
disdrodb/l0/configs/LPM/raw_data_format.yml +2 -2
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
disdrodb/l0/configs/PWS100/l0b_encodings.yml +1 -0
disdrodb/l0/configs/SWS250/bins_diameter.yml +108 -0
disdrodb/l0/configs/SWS250/bins_velocity.yml +83 -0
disdrodb/l0/configs/SWS250/l0a_encodings.yml +18 -0
disdrodb/l0/configs/SWS250/l0b_cf_attrs.yml +72 -0
disdrodb/l0/configs/SWS250/l0b_encodings.yml +155 -0
disdrodb/l0/configs/SWS250/raw_data_format.yml +148 -0
disdrodb/l0/l0_reader.py +2 -2
disdrodb/l0/l0b_processing.py +70 -15
disdrodb/l0/l0c_processing.py +7 -3
disdrodb/l0/readers/LPM/ARM/ARM_LPM.py +1 -1
disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +2 -2
disdrodb/l0/readers/LPM/BELGIUM/ULIEGE.py +256 -0
disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +2 -2
disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +2 -2
disdrodb/l0/readers/LPM/GERMANY/DWD.py +491 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +2 -2
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
disdrodb/l0/readers/LPM/KIT/CHWALA.py +2 -2
disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +107 -12
disdrodb/l0/readers/LPM/SLOVENIA/UL.py +3 -3
disdrodb/l0/readers/LPM/SWITZERLAND/INNERERIZ_LPM.py +2 -2
disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/LPVEX.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010.py +5 -14
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +8 -17
disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL.py +117 -8
disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
disdrodb/l0/readers/PARSIVEL2/BRAZIL/CHUVA_PARSIVEL2.py +10 -14
disdrodb/l0/readers/PARSIVEL2/BRAZIL/GOAMAZON_PARSIVEL2.py +10 -14
disdrodb/l0/readers/PARSIVEL2/DENMARK/DTU.py +8 -14
disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +382 -0
disdrodb/l0/readers/PARSIVEL2/FINLAND/FMI_PARSIVEL2.py +4 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/OSUG.py +1 -1
disdrodb/l0/readers/PARSIVEL2/GREECE/NOA.py +127 -0
disdrodb/l0/readers/PARSIVEL2/ITALY/HYDROX.py +239 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/FARM_PARSIVEL2.py +5 -11
disdrodb/l0/readers/PARSIVEL2/NCAR/PERILS_MIPS.py +4 -17
disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +5 -14
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_PJ.py +10 -13
disdrodb/l0/readers/PARSIVEL2/NCAR/SNOWIE_SB.py +10 -13
disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/PAGASA.py +232 -0
disdrodb/l0/readers/PARSIVEL2/SPAIN/CENER.py +6 -18
disdrodb/l0/readers/PARSIVEL2/{NASA/LPVEX.py → SPAIN/GRANADA.py} +46 -35
disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +10 -28
disdrodb/l0/readers/PWS100/AUSTRIA/HOAL.py +321 -0
disdrodb/l0/readers/SW250/BELGIUM/KMI.py +239 -0
disdrodb/l1/beard_model.py +31 -129
disdrodb/l1/fall_velocity.py +136 -83
disdrodb/l1/filters.py +25 -28
disdrodb/l1/processing.py +16 -17
disdrodb/l1/resampling.py +101 -38
disdrodb/l1_env/routines.py +46 -17
disdrodb/l2/empirical_dsd.py +6 -0
disdrodb/l2/processing.py +6 -5
disdrodb/metadata/geolocation.py +0 -2
disdrodb/metadata/search.py +3 -4
disdrodb/psd/fitting.py +16 -13
disdrodb/routines/l0.py +2 -2
disdrodb/routines/l1.py +173 -60
disdrodb/routines/l2.py +148 -284
disdrodb/routines/options.py +345 -0
disdrodb/routines/wrappers.py +14 -1
disdrodb/scattering/axis_ratio.py +90 -84
disdrodb/scattering/permittivity.py +6 -0
disdrodb/summary/routines.py +735 -670
disdrodb/utils/archiving.py +51 -44
disdrodb/utils/attrs.py +3 -1
disdrodb/utils/dask.py +4 -4
disdrodb/utils/dict.py +33 -0
disdrodb/utils/encoding.py +6 -1
disdrodb/utils/routines.py +9 -8
disdrodb/utils/time.py +11 -3
disdrodb/viz/__init__.py +0 -13
disdrodb/viz/plots.py +231 -1
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/METADATA +2 -1
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/RECORD +135 -103
/disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
/disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/IFLOODS.py +0 -0
/disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +0 -0
/disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/PIERS.py +0 -0
/disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/GCPEX.py +0 -0
/disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/WHEEL +0 -0
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/entry_points.txt +0 -0
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.4.dist-info → disdrodb-0.2.0.dist-info}/top_level.txt +0 -0

disdrodb/l1/processing.py CHANGED Viewed

@@ -19,8 +19,8 @@
 import xarray as xr
 from disdrodb.constants import DIAMETER_DIMENSION, VELOCITY_DIMENSION
-from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity
-from disdrodb.l1.filters import define_spectrum_mask, filter_diameter_bins, filter_velocity_bins
+from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity_from_ds
+from disdrodb.l1.filters import define_raindrop_spectrum_mask, filter_diameter_bins, filter_velocity_bins
 from disdrodb.l1.resampling import add_sample_interval
 from disdrodb.l1_env.routines import load_env_dataset
 from disdrodb.l2.empirical_dsd import (  # TODO: maybe move out of L2
@@ -34,7 +34,7 @@ from disdrodb.utils.writer import finalize_product
 def generate_l1(
     ds,
     # Fall velocity option
-    fall_velocity_method="Beard1976",
+    fall_velocity_model="Beard1976",
     # Diameter-Velocity Filtering Options
     minimum_diameter=0,
     maximum_diameter=10,
@@ -54,7 +54,7 @@ def generate_l1(
     ----------
     ds : xarray.Dataset
         DISDRODB L0C dataset.
-    fall_velocity_method : str, optional
+    fall_velocity_model : str, optional
         Method to compute fall velocity.
         The default method is ``"Beard1976"``.
     minimum_diameter : float, optional
@@ -106,7 +106,9 @@ def generate_l1(
     # ---------------------------------------------------------------------------
     # Retrieve ENV dataset or take defaults
-    # --> Used only for Beard fall velocity currently !
+    # - Used only for Beard fall velocity currently !
+    # - It checks and includes default geolocation if missing
+    # - For mobile disdrometer, infill missing geolocation with backward and forward filling
     ds_env = load_env_dataset(ds)
     # ---------------------------------------------------------------------------
@@ -119,16 +121,18 @@ def generate_l1(
     # Add sample interval as coordinate (in seconds)
     ds_l1 = add_sample_interval(ds_l1, sample_interval=sample_interval)
-    # Add L0C coordinates that might got lost
-    if "time_qc" in ds_l1:
-        ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
+    # Add optional variables to L1 dataset
+    optional_variables = ["time_qc", "qc_resampling"]
+    for var in optional_variables:
+        if var in ds:
+            ds_l1[var] = ds[var]
     # -------------------------------------------------------------------------------------------
     # Filter dataset by diameter and velocity bins
     if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
         # - Remove first two bins because never reports data !
         # - If not removed, can alter e.g. L2M model fitting
-        ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.312)  # it includes the 0.2495-0.3745 bin
+        ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=0.2495)  # it includes the 0.2495-0.3745 bin
     # - Filter diameter bins
     ds_l1 = filter_diameter_bins(ds=ds_l1, minimum_diameter=minimum_diameter, maximum_diameter=maximum_diameter)
@@ -138,16 +142,12 @@ def generate_l1(
     # -------------------------------------------------------------------------------------------
     # Compute fall velocity
-    ds_l1["fall_velocity"] = get_raindrop_fall_velocity(
-        diameter=ds_l1["diameter_bin_center"],
-        method=fall_velocity_method,
-        ds_env=ds_env,  # mm
-    )
+    ds_l1["fall_velocity"] = get_raindrop_fall_velocity_from_ds(ds=ds_l1, ds_env=ds_env, model=fall_velocity_model)
     # -------------------------------------------------------------------------------------------
     # Define filtering mask according to fall velocity
     if has_velocity_dimension:
-        mask = define_spectrum_mask(
+        mask = define_raindrop_spectrum_mask(
             drop_number=ds_l1["raw_drop_number"],
             fall_velocity=ds_l1["fall_velocity"],
             above_velocity_fraction=above_velocity_fraction,
@@ -162,10 +162,9 @@ def generate_l1(
     # -------------------------------------------------------------------------------------------
     # Retrieve drop number and drop_counts arrays
     if has_velocity_dimension:
-        drop_number = ds_l1["raw_drop_number"].where(mask)  # 2D (diameter, velocity)
+        drop_number = ds_l1["raw_drop_number"].where(mask, 0)  # 2D (diameter, velocity)
         drop_counts = drop_number.sum(dim=VELOCITY_DIMENSION)  # 1D (diameter)
         drop_counts_raw = ds_l1["raw_drop_number"].sum(dim=VELOCITY_DIMENSION)  # 1D (diameter)
     else:
         drop_number = ds_l1["raw_drop_number"]  # 1D (diameter)
         drop_counts = ds_l1["raw_drop_number"]  # 1D (diameter)

disdrodb/l1/resampling.py CHANGED Viewed

@@ -19,9 +19,12 @@ import numpy as np
 import pandas as pd
 import xarray as xr
-from disdrodb.utils.time import ensure_sample_interval_in_seconds, regularize_dataset
-DEFAULT_ACCUMULATIONS = ["10s", "30s", "1min", "2min", "5min", "10min", "30min", "1hour"]
+from disdrodb.utils.time import (
+    ensure_sample_interval_in_seconds,
+    get_dataset_start_end_time,
+    get_sampling_information,
+    regularize_dataset,
+)
 def add_sample_interval(ds, sample_interval):
@@ -95,6 +98,27 @@ def define_window_size(sample_interval, accumulation_interval):
     return window_size
+def _finalize_qc_resampling(ds, sample_interval, accumulation_interval):
+    # Compute qc_resampling
+    # - 0 if not missing timesteps
+    # - 1 if all timesteps missing
+    n_timesteps = accumulation_interval / sample_interval
+    ds["qc_resampling"] = np.round(1 - ds["qc_resampling"] / n_timesteps, 1)
+    ds["qc_resampling"].attrs = {
+        "long_name": "Resampling Quality Control Flag",
+        "standard_name": "quality_flag",
+        "units": "",
+        "valid_min": 0.0,
+        "valid_max": 1.0,
+        "description": (
+            "Fraction of timesteps missing when resampling the data."
+            "0 = No timesteps missing; 1 = All timesteps missing;"
+            "Intermediate values indicate partial data coverage."
+        ),
+    }
+    return ds
 def _resample(ds, variables, accumulation, op):
     if not variables:
         return {}
@@ -113,23 +137,24 @@ def _rolling(ds, variables, window_size, op):
     return ds_subset
-def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
+def resample_dataset(ds, sample_interval, temporal_resolution):
     """
     Resample the dataset to a specified accumulation interval.
+    The output timesteps correspond to the starts of the periods over which
+    the resampling operation has been performed !
     Parameters
     ----------
     ds : xarray.Dataset
         The input dataset to be resampled.
     sample_interval : int
-        The sample interval of the input dataset.
-    accumulation_interval : int
-        The interval in seconds over which to accumulate the data.
-    rolling : bool, optional
-        If True, apply a rolling window before resampling. Default is True.
-        If True, forward rolling is performed.
-        The output timesteps correspond to the starts of the periods over which
-        the resampling operation has been performed !
+        The sample interval (in seconds) of the input dataset.
+    temporal_resolution : str
+        The desired temporal resolution for resampling.
+        It should be a string representing the accumulation interval,
+        e.g., "5MIN" for 5 minutes, "1H" for 1 hour, "30S" for 30 seconds, etc.
+        Prefixed with "ROLL" for rolling resampling, e.g., "ROLL5MIN".
     Returns
     -------
@@ -149,6 +174,9 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
     # Ensure sample interval in seconds
     sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
+    # Retrieve accumulation_interval and rolling option
+    accumulation_interval, rolling = get_sampling_information(temporal_resolution)
     # --------------------------------------------------------------------------.
     # Raise error if the accumulation_interval is less than the sample interval
     if accumulation_interval < sample_interval:
@@ -157,51 +185,78 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
     if not accumulation_interval % sample_interval == 0:
         raise ValueError("The accumulation_interval is not a multiple of sample interval.")
+    # Retrieve input dataset start_time and end_time
+    start_time, end_time = get_dataset_start_end_time(ds, time_dim="time")
+    # Initialize qc_resampling
+    ds["qc_resampling"] = xr.ones_like(ds["time"], dtype="float")
+    # Retrieve dataset attributes
+    attrs = ds.attrs.copy()
+    # If no resampling, return as it is
+    if sample_interval == accumulation_interval:
+        attrs["disdrodb_aggregated_product"] = "False"
+        attrs["disdrodb_rolled_product"] = "False"
+        attrs["disdrodb_temporal_resolution"] = temporal_resolution
+        ds = _finalize_qc_resampling(ds, sample_interval=sample_interval, accumulation_interval=accumulation_interval)
+        ds = add_sample_interval(ds, sample_interval=accumulation_interval)
+        ds.attrs = attrs
+        return ds
     # --------------------------------------------------------------------------.
     #### Preprocess the dataset
-    # Here we set NaN in the raw_drop_number to 0
-    # - We assume that NaN corresponds to 0
-    # - When we regularize, we infill with NaN
+    # - Set timesteps with NaN in drop_number to zero (and set qc_resampling to 0)
     # - When we aggregate with sum, we don't skip NaN
-    # --> Aggregation with original missing timesteps currently results in NaN !
+    #   --> Resampling over missing timesteps will result in NaN drop_number and qc_resampling = 1
+    #   --> Resampling over timesteps with NaN in drop_number will result in finite drop_number but qc_resampling > 0
+    # - qc_resampling will inform on the amount of timesteps missing
-    # Infill NaN values with zeros for drop_number and raw_drop_number
-    # - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
-    # - TODO: NaN should not be set as 0 !
-    for var in ["drop_number", "raw_drop_number"]:
+    for var in ["drop_number", "raw_drop_number", "drop_counts", "drop_number_concentration"]:
         if var in ds:
-            ds[var] = xr.where(np.isnan(ds[var]), 0, ds[var])
+            dims = set(ds[var].dims) - {"time"}
+            invalid_timesteps = np.isnan(ds[var]).any(dim=dims)
+            ds[var] = ds[var].where(~invalid_timesteps, 0)
+            ds["qc_resampling"] = ds["qc_resampling"].where(~invalid_timesteps, 0)
+            if np.all(invalid_timesteps).item():
+                raise ValueError("No timesteps with valid spectrum.")
     # Ensure regular dataset without missing timesteps
     # --> This adds NaN values for missing timesteps
-    ds = regularize_dataset(ds, freq=f"{sample_interval}s")
+    ds = regularize_dataset(ds, freq=f"{sample_interval}s", start_time=start_time, end_time=end_time)
+    ds["qc_resampling"] = ds["qc_resampling"].where(~np.isnan(ds["qc_resampling"]), 0)
     # --------------------------------------------------------------------------.
     # Define dataset attributes
-    attrs = ds.attrs.copy()
     if rolling:
         attrs["disdrodb_rolled_product"] = "True"
     else:
         attrs["disdrodb_rolled_product"] = "False"
-    if sample_interval == accumulation_interval:
-        attrs["disdrodb_aggregated_product"] = "False"
-        ds = add_sample_interval(ds, sample_interval=accumulation_interval)
-        ds.attrs = attrs
-        return ds
-    # --------------------------------------------------------------------------.
-    # Resample the dataset
     attrs["disdrodb_aggregated_product"] = "True"
+    attrs["disdrodb_temporal_resolution"] = temporal_resolution
+    # --------------------------------------------------------------------------.
     # Initialize resample dataset
     ds_resampled = xr.Dataset()
     # Retrieve variables to average/sum
+    # - ATTENTION: it will not resample non-dimensional time coordinates of the dataset !
     var_to_average = ["fall_velocity"]
-    var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nraw", "Nremoved"]
+    var_to_cumulate = [
+        "raw_drop_number",
+        "drop_number",
+        "drop_counts",
+        "drop_number_concentration",
+        "N",
+        "Nraw",
+        "Nremoved",
+        "qc_resampling",
+    ]
     var_to_min = ["Dmin"]
-    var_to_max = ["Dmax"]
+    var_to_max = ["Dmax", "time_qc"]
     # Retrieve available variables
     var_to_average = [var for var in var_to_average if var in ds]
@@ -209,11 +264,6 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
     var_to_min = [var for var in var_to_min if var in ds]
     var_to_max = [var for var in var_to_max if var in ds]
-    # TODO Define custom processing
-    # - quality_flag --> take worst
-    # - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
-    # - Add tolerance on fraction of missing timesteps for large accumulation_intervals
     # Resample the dataset
     # - Rolling currently does not allow direct rolling forward.
     # - We currently use center=False which means search for data backward (right-aligned) !
@@ -239,6 +289,19 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
             {"time": ds_resampled["time"].data[: -window_size + 1]},
         )
+    # Finalize qc_resampling
+    ds_resampled = _finalize_qc_resampling(
+        ds_resampled,
+        sample_interval=sample_interval,
+        accumulation_interval=accumulation_interval,
+    )
+    # Set to NaN timesteps where qc_resampling == 1
+    # --> This occurs for missing timesteps in input dataset or all NaN drop_number arrays
+    variables = list(set(ds_resampled.data_vars) - {"qc_resampling"})
+    mask_missing_timesteps = ds_resampled["qc_resampling"] != 1
+    for var in variables:
+        ds_resampled[var] = ds_resampled[var].where(mask_missing_timesteps)
     # Add attributes
     ds_resampled.attrs = attrs

disdrodb/l1_env/routines.py CHANGED Viewed

@@ -15,39 +15,68 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
 """Core functions for DISDRODB ENV production."""
+import numpy as np
 import xarray as xr
 from disdrodb.constants import GEOLOCATION_COORDS
+from disdrodb.l0.l0b_processing import ensure_valid_geolocation
+from disdrodb.utils.logger import log_warning
+DEFAULT_GEOLOCATION = {
+    "latitude": 46.159346,
+    "longitude": 8.774586,
+    "altitude": 0,
+}
 def get_default_environment_dataset():
     """Define defaults values for the ENV dataset."""
     ds_env = xr.Dataset()
-    ds_env["sea_level_air_pressure"] = 101_325
-    ds_env["gas_constant_dry_air"] = 287.04
-    ds_env["lapse_rate"] = 0.0065
-    ds_env["relative_humidity"] = 0.95  # Value between 0 and 1 !
-    ds_env["temperature"] = 20 + 273.15
+    ds_env["sea_level_air_pressure"] = 101_325  # Pa
+    ds_env["gas_constant_dry_air"] = 287.04  # J kg⁻¹ K⁻¹
+    ds_env["lapse_rate"] = 0.0065  # K m⁻¹
+    ds_env["relative_humidity"] = 0.95  # 0-1 !
+    ds_env["temperature"] = 20 + 273.15  # K
+    ds_env["water_density"] = 1000  # kg m⁻³   (T == 10 --> 999.7, T == 20 --> 998.2)
+    # get_water_density(temperature=temperature, air_pressure=air_pressure
     return ds_env
-def _assign_geolocation(ds_src, dst_dst):
+def _assign_geolocation(ds_src, dst_dst, logger=None):
+    dict_coords = {}
+    for coord in GEOLOCATION_COORDS:
+        if coord in ds_src:
+            # Check geolocation validity
+            ds_src = ensure_valid_geolocation(ds_src, coord=coord, errors="coerce")
+            # Assign valid geolocation (or default one if invalid)
+            if "time" not in ds_src[coord].dims:
+                dict_coords[coord] = ds_src[coord] if not np.isnan(ds_src[coord]) else DEFAULT_GEOLOCATION[coord]
+            else:  # If coordinates varies over time, infill NaN over time with forward and backward filling
+                dict_coords[coord] = ds_src[coord].ffill(dim="time").bfill(dim="time")
+        else:
+            dict_coords[coord] = DEFAULT_GEOLOCATION[coord]
+            log_warning(
+                logger=logger,
+                msg=f"{coord} not available. Setting {coord}={DEFAULT_GEOLOCATION[coord]}",
+                verbose=False,
+            )
-    dict_coords = {coord: ds_src[coord] for coord in GEOLOCATION_COORDS if coord in ds_src}
+    # Assign geolocation
     dst_dst = dst_dst.assign_coords(dict_coords)
     return dst_dst
-def load_env_dataset(ds):
+def load_env_dataset(ds=None, logger=None):
     """Load the ENV dataset."""
-    # TODO: Retrieve relative_humidity and temperature from L1-ENV
+    # TODO: Retrieve relative_humidity, lapse_rate and temperature from DISDRODB-ENV product
+    # Load default environment dataset
     ds_env = get_default_environment_dataset()
-    # Compute water density
-    # get_water_density(
-    # temperature=temperature,
-    # air_pressure=air_pressure,
-    # )
-    # -->  (T == 10 --> 999.7, T == 20 --> 998.2
-    ds_env["water_density"] = 1000  # kg / m3 # TODO as function of ENV (temperature, ...) ?
-    ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env)
+    # Assign geolocation if input dataset provided
+    if ds is not None:
+        ds_env = _assign_geolocation(ds_src=ds, dst_dst=ds_env, logger=logger)
+    # Otherwise add default geolocation
+    else:
+        ds_env = ds_env.assign_coords(DEFAULT_GEOLOCATION)
     return ds_env

disdrodb/l2/empirical_dsd.py CHANGED Viewed

@@ -236,6 +236,12 @@ def get_effective_sampling_area(sensor_name, diameter):
     if sensor_name == "RD80":
         sampling_area = 0.005  # m2
         return sampling_area
+    if sensor_name == "SWS250":  # TODO: L * (B - diameter / 2) ?
+        # Table 29 of the manual that the sample volume is 400cm3, path length?
+        # Distance between the end of the hood heaters is 291 mm.
+        # Adding a factor of 1.5 for better representation of the Tx-Rx distance: L= 436 mm.
+        sampling_area = 0.0091  # m2
+        return sampling_area
     raise NotImplementedError(f"Effective sampling area for {sensor_name} must yet to be specified in the software.")

disdrodb/l2/processing.py CHANGED Viewed

@@ -27,7 +27,6 @@ from disdrodb.l2.empirical_dsd import (
     add_bins_metrics,
     compute_integral_parameters,
     compute_spectrum_parameters,
-    get_drop_average_velocity,
     get_drop_number_concentration,
     get_effective_sampling_area,
     get_kinetic_energy_variables_from_drop_number,
@@ -273,6 +272,8 @@ def generate_l2e(
         "Dmin",
         "Dmax",
         "fall_velocity",
+        "qc_resampling",
+        "time_qc",
     ]
     variables = [var for var in variables if var in ds]
@@ -282,8 +283,8 @@ def generate_l2e(
     # -------------------------------------------------------------------------------------------
     # Compute and add drop average velocity if an optical disdrometer (i.e OTT Parsivel or ThiesLPM)
     # - We recompute it because if the input dataset is aggregated, it must be updated !
-    if has_velocity_dimension:
-        ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
+    # if has_velocity_dimension:
+    #     ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
     # -------------------------------------------------------------------------------------------
     # Define velocity array with dimension 'velocity_method'
@@ -441,7 +442,7 @@ def generate_l2m(
     diameter_spacing=0.05,
     # Processing options
     ds_env=None,
-    fall_velocity_method="Beard1976",
+    fall_velocity_model="Beard1976",
     # Filtering options
     minimum_ndrops=1,
     minimum_nbins=3,
@@ -548,7 +549,7 @@ def generate_l2m(
     drop_number_concentration = psd(diameter)
     # Retrieve fall velocity for each new diameter bin
-    velocity = get_raindrop_fall_velocity(diameter=diameter, method=fall_velocity_method, ds_env=ds_env)  # mm
+    velocity = get_raindrop_fall_velocity(diameter=diameter, model=fall_velocity_model, ds_env=ds_env)  # mm
     # Compute integral parameters
     ds_params = compute_integral_parameters(

disdrodb/metadata/geolocation.py CHANGED Viewed

@@ -60,8 +60,6 @@ def infer_altitude(latitude, longitude, dem="aster30m"):
     ----------
     https://www.opentopodata.org/api/
     """
-    import requests
     url = f"https://api.opentopodata.org/v1/{dem}?locations={latitude},{longitude}"
     r = requests.get(url)

disdrodb/metadata/search.py CHANGED Viewed

@@ -102,10 +102,9 @@ def get_list_metadata(
         Path to the root of the DISDRODB Metadata Archive. Format: ``<...>/DISDRODB``
         If None, the``metadata_archive_dir`` path specified in the DISDRODB active configuratio. The default is None.
     **product_kwargs : dict, optional
-        Additional arguments required for some products.
-        For example, for the "L2E" product, you need to specify ``rolling`` and
-        ``sample_interval``. For the "L2M" product, you need to specify also
-        the ``model_name``.
+        Additional arguments required for DISDRODB products L1, L2E and L2M.
+        For the L1, L2E and L2M products, ``temporal_resolution`` is required.
+        FOr the L2M product, ``model_name`` is required.
     Returns
     -------

disdrodb/psd/fitting.py CHANGED Viewed

@@ -23,7 +23,7 @@ from scipy.optimize import minimize
 from scipy.special import gamma, gammaln  # Regularized lower incomplete gamma function
 from disdrodb.constants import DIAMETER_DIMENSION
-from disdrodb.l1.fall_velocity import get_dataset_fall_velocity
+from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity_from_ds
 from disdrodb.l2.empirical_dsd import (
     get_median_volume_drop_diameter,
     get_moment,
@@ -607,7 +607,7 @@ def estimate_gamma_parameters(
     """
     # Define initial guess for parameters
-    a = mu + 1  # (mu = a-1, a = mu+1)
+    a = mu + 1  # (mu = a-1, a = mu+1) (a > 0 --> mu=-1)
     scale = 1 / Lambda
     initial_params = [a, scale]
@@ -1208,13 +1208,13 @@ def apply_gamma_gs(
 ):
     """Estimate GammaPSD model parameters using Grid Search."""
     # Define parameters bounds
-    mu_bounds = (0.01, 20)
-    lambda_bounds = (0.01, 60)
+    mu_bounds = (-1, 40)
+    lambda_bounds = (0, 60)
     # Define initial set of parameters
-    mu_step = 0.5
+    mu_step = 0.25
     lambda_step = 0.5
-    mu_values = np.arange(0.01, 20, step=mu_step)
+    mu_values = np.arange(0, 40, step=mu_step)
     lambda_values = np.arange(0, 60, step=lambda_step)
     # First round of GS
@@ -1304,15 +1304,17 @@ def apply_lognormal_gs(
     """Estimate LognormalPSD model parameters using Grid Search."""
     # Define parameters bounds
     sigma_bounds = (0, np.inf)  # > 0
-    scale_bounds = (0.1, np.inf)  # > 0
+    scale_bounds = (0, np.inf)  # > 0
     # mu_bounds = (- np.inf, np.inf) # mu = np.log(scale)
     # Define initial set of parameters
+    # --> Typically sigma between 0 and 3
+    # --> Typically mu between -2 and 2
     scale_step = 0.2
     sigma_step = 0.2
-    scale_values = np.arange(0.1, 20, step=scale_step)
-    mu_values = np.log(scale_values)  # TODO: define realistic values
-    sigma_values = np.arange(0, 20, step=sigma_step)  # TODO: define realistic values
+    scale_values = np.arange(scale_step, 20, step=scale_step)
+    mu_values = np.log(scale_values)
+    sigma_values = np.arange(0, 3, step=sigma_step)
     # First round of GS
     Nt, mu, sigma = _apply_lognormal_gs(
@@ -1333,7 +1335,8 @@ def apply_lognormal_gs(
     # Second round of GS
     sigma_values = define_param_range(sigma, sigma_step, bounds=sigma_bounds)
     scale_values = define_param_range(np.exp(mu), scale_step, bounds=scale_bounds)
-    mu_values = np.log(scale_values)
+    with suppress_warnings():
+        mu_values = np.log(scale_values)
     Nt, mu, sigma = _apply_lognormal_gs(
         mu_values=mu_values,
         sigma_values=sigma_values,
@@ -1365,7 +1368,7 @@ def apply_normalized_gamma_gs(
 ):
     """Estimate NormalizedGammaPSD model parameters using Grid Search."""
     # Define set of mu values
-    mu_arr = np.arange(0.01, 20, step=0.01)
+    mu_arr = np.arange(-4, 30, step=0.01)
     # Perform grid search
     with suppress_warnings():
@@ -2353,7 +2356,7 @@ def get_gs_parameters(ds, psd_model, target="ND", transformation="log", error_or
     # Check fall velocity is available if target R
     if "fall_velocity" not in ds:
-        ds["fall_velocity"] = get_dataset_fall_velocity(ds)
+        ds["fall_velocity"] = get_raindrop_fall_velocity_from_ds(ds)
     # Retrieve estimation function
     func = OPTIMIZATION_ROUTINES_DICT["GS"][psd_model]

disdrodb/routines/l0.py CHANGED Viewed

@@ -50,7 +50,7 @@ from disdrodb.l0.l0b_nc_processing import sanitize_ds
 from disdrodb.l0.l0b_processing import generate_l0b
 from disdrodb.l0.l0c_processing import TOLERANCE_SECONDS, create_l0c_datasets
 from disdrodb.metadata import read_station_metadata
-from disdrodb.utils.archiving import get_files_per_time_block
+from disdrodb.utils.archiving import group_files_by_time_block
 from disdrodb.utils.dask import execute_tasks_safely
 from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
@@ -928,7 +928,7 @@ def run_l0c_station(
     # -------------------------------------------------------------------------.
     # Retrieve dictionary with the required files per time block
     # TODO: allow customizing this in config file, but risk of out of memory !
-    list_event_info = get_files_per_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
+    list_event_info = group_files_by_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
     # -----------------------------------------------------------------.
     # Generate L0C files

disdrodb 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

disdrodb 0.1.4py3-none-any.whl → 0.2.0py3-none-any.whl