PyPI - disdrodb - Versions diffs - 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

disdrodb 0.1.5py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

disdrodb/__init__.py +1 -5
disdrodb/_version.py +2 -2
disdrodb/accessor/methods.py +22 -4
disdrodb/api/checks.py +10 -0
disdrodb/api/io.py +20 -18
disdrodb/api/path.py +42 -77
disdrodb/api/search.py +89 -23
disdrodb/cli/disdrodb_create_summary.py +1 -1
disdrodb/cli/disdrodb_run_l0.py +1 -1
disdrodb/cli/disdrodb_run_l0a.py +1 -1
disdrodb/cli/disdrodb_run_l0b.py +1 -1
disdrodb/cli/disdrodb_run_l0c.py +1 -1
disdrodb/cli/disdrodb_run_l1.py +1 -1
disdrodb/cli/disdrodb_run_l2e.py +1 -1
disdrodb/cli/disdrodb_run_l2m.py +1 -1
disdrodb/configs.py +30 -83
disdrodb/constants.py +4 -3
disdrodb/data_transfer/download_data.py +4 -2
disdrodb/docs.py +2 -2
disdrodb/etc/products/L1/1MIN.yaml +13 -0
disdrodb/etc/products/L1/LPM/1MIN.yaml +13 -0
disdrodb/etc/products/L1/LPM_V0/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PARSIVEL/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PARSIVEL2/1MIN.yaml +13 -0
disdrodb/etc/products/L1/PWS100/1MIN.yaml +13 -0
disdrodb/etc/products/L1/RD80/1MIN.yaml +13 -0
disdrodb/etc/products/L1/SWS250/1MIN.yaml +13 -0
disdrodb/etc/products/L1/global.yaml +6 -0
disdrodb/etc/products/L2E/10MIN.yaml +1 -12
disdrodb/etc/products/L2E/global.yaml +1 -1
disdrodb/etc/products/L2M/MODELS/NGAMMA_GS_R_MAE.yaml +6 -0
disdrodb/etc/products/L2M/global.yaml +1 -1
disdrodb/issue/checks.py +2 -2
disdrodb/l0/check_configs.py +1 -1
disdrodb/l0/configs/LPM/l0a_encodings.yml +0 -1
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +0 -4
disdrodb/l0/configs/LPM/l0b_encodings.yml +9 -9
disdrodb/l0/configs/LPM/raw_data_format.yml +11 -11
disdrodb/l0/configs/LPM_V0/bins_diameter.yml +103 -0
disdrodb/l0/configs/LPM_V0/bins_velocity.yml +103 -0
disdrodb/l0/configs/LPM_V0/l0a_encodings.yml +45 -0
disdrodb/l0/configs/LPM_V0/l0b_cf_attrs.yml +180 -0
disdrodb/l0/configs/LPM_V0/l0b_encodings.yml +410 -0
disdrodb/l0/configs/LPM_V0/raw_data_format.yml +474 -0
disdrodb/l0/configs/PARSIVEL/l0b_encodings.yml +1 -1
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +8 -8
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +9 -9
disdrodb/l0/l0_reader.py +2 -2
disdrodb/l0/l0a_processing.py +6 -2
disdrodb/l0/l0b_processing.py +26 -19
disdrodb/l0/l0c_processing.py +17 -3
disdrodb/l0/manuals/LPM_V0.pdf +0 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +15 -7
disdrodb/l0/readers/LPM/ITALY/GID_LPM_PI.py +279 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM_T.py +276 -0
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +2 -2
disdrodb/l0/readers/LPM/NETHERLANDS/DELFT_RWANDA_LPM_NC.py +103 -0
disdrodb/l0/readers/LPM/NORWAY/HAUKELISETER_LPM.py +216 -0
disdrodb/l0/readers/LPM/NORWAY/NMBU_LPM.py +208 -0
disdrodb/l0/readers/LPM/UK/WITHWORTH_LPM.py +219 -0
disdrodb/l0/readers/LPM/USA/CHARLESTON.py +229 -0
disdrodb/l0/readers/{LPM → LPM_V0}/BELGIUM/ULIEGE.py +33 -49
disdrodb/l0/readers/LPM_V0/ITALY/GID_LPM_V0.py +240 -0
disdrodb/l0/readers/PARSIVEL/BASQUECOUNTRY/EUSKALMET_OTT.py +227 -0
disdrodb/l0/readers/{PARSIVEL2 → PARSIVEL}/NASA/LPVEX.py +16 -28
disdrodb/l0/readers/PARSIVEL/{GPM → NASA}/MC3E.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2010_UF.py +3 -3
disdrodb/l0/readers/PARSIVEL2/BASQUECOUNTRY/EUSKALMET_OTT2.py +232 -0
disdrodb/l0/readers/PARSIVEL2/DENMARK/EROSION_raw.py +1 -1
disdrodb/l0/readers/PARSIVEL2/JAPAN/PRECIP.py +155 -0
disdrodb/l0/readers/PARSIVEL2/MPI/BCO_PARSIVEL2.py +14 -7
disdrodb/l0/readers/PARSIVEL2/MPI/BOWTIE.py +8 -3
disdrodb/l0/readers/PARSIVEL2/NASA/APU.py +28 -5
disdrodb/l0/readers/PARSIVEL2/NCAR/RELAMPAGO_PARSIVEL2.py +1 -1
disdrodb/l0/readers/PARSIVEL2/{GPM/GCPEX.py → NORWAY/UIB.py} +54 -29
disdrodb/l0/readers/PARSIVEL2/PHILIPPINES/{PANGASA.py → PAGASA.py} +6 -3
disdrodb/l0/readers/PARSIVEL2/SPAIN/GRANADA.py +1 -1
disdrodb/l0/readers/PARSIVEL2/SWEDEN/SMHI.py +189 -0
disdrodb/l0/readers/{PARSIVEL/GPM/PIERS.py → PARSIVEL2/USA/CSU.py} +62 -29
disdrodb/l0/readers/PARSIVEL2/USA/{C3WE.py → CW3E.py} +51 -24
disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} +50 -34
disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py +1 -1
disdrodb/l1/beard_model.py +45 -1
disdrodb/l1/fall_velocity.py +1 -6
disdrodb/l1/filters.py +2 -0
disdrodb/l1/processing.py +6 -5
disdrodb/l1/resampling.py +101 -38
disdrodb/l2/empirical_dsd.py +12 -8
disdrodb/l2/processing.py +4 -3
disdrodb/metadata/search.py +3 -4
disdrodb/routines/l0.py +4 -4
disdrodb/routines/l1.py +173 -60
disdrodb/routines/l2.py +121 -269
disdrodb/routines/options.py +347 -0
disdrodb/routines/wrappers.py +9 -1
disdrodb/scattering/axis_ratio.py +3 -0
disdrodb/scattering/routines.py +1 -1
disdrodb/summary/routines.py +765 -724
disdrodb/utils/archiving.py +51 -44
disdrodb/utils/attrs.py +1 -1
disdrodb/utils/compression.py +4 -2
disdrodb/utils/dask.py +35 -15
disdrodb/utils/dict.py +33 -0
disdrodb/utils/encoding.py +1 -1
disdrodb/utils/manipulations.py +7 -1
disdrodb/utils/routines.py +9 -8
disdrodb/utils/time.py +9 -1
disdrodb/viz/__init__.py +0 -13
disdrodb/viz/plots.py +209 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/METADATA +1 -1
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/RECORD +124 -95
disdrodb/l0/readers/PARSIVEL/GPM/LPVEX.py +0 -85
/disdrodb/etc/products/L2M/{GAMMA_GS_ND_MAE.yaml → MODELS/GAMMA_GS_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{GAMMA_ML.yaml → MODELS/GAMMA_ML.yaml} +0 -0
/disdrodb/etc/products/L2M/{LOGNORMAL_GS_LOG_ND_MAE.yaml → MODELS/LOGNORMAL_GS_LOG_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{LOGNORMAL_GS_ND_MAE.yaml → MODELS/LOGNORMAL_GS_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{LOGNORMAL_ML.yaml → MODELS/LOGNORMAL_ML.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_LOG_ND_MAE.yaml → MODELS/NGAMMA_GS_LOG_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_ND_MAE.yaml → MODELS/NGAMMA_GS_ND_MAE.yaml} +0 -0
/disdrodb/etc/products/L2M/{NGAMMA_GS_Z_MAE.yaml → MODELS/NGAMMA_GS_Z_MAE.yaml} +0 -0
/disdrodb/l0/readers/PARSIVEL2/{GPM → NASA}/NSSTC.py +0 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/WHEEL +0 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/entry_points.txt +0 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.5.dist-info → disdrodb-0.2.1.dist-info}/top_level.txt +0 -0

disdrodb/l0/readers/{PARSIVEL/GPM/IFLOODS.py → RD80/BRAZIL/ATTO_RD80.py} RENAMED Viewed

@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
 # -----------------------------------------------------------------------------.
 # Copyright (c) 2021-2023 DISDRODB developers
 #
@@ -29,17 +30,47 @@ def reader(
     """Reader."""
     ##------------------------------------------------------------------------.
     #### Define column names
-    column_names = ["time", "TO_BE_SPLITTED"]
+    column_names = [
+        "date",
+        "time",
+        "sensor_status",
+        "sample_interval",
+        "n1",
+        "n2",
+        "n3",
+        "n4",
+        "n5",
+        "n6",
+        "n7",
+        "n8",
+        "n9",
+        "n10",
+        "n11",
+        "n12",
+        "n13",
+        "n14",
+        "n15",
+        "n16",
+        "n17",
+        "n18",
+        "n19",
+        "n20",
+        "RI",
+        "RA",
+        "RAT",
+    ]
     ##------------------------------------------------------------------------.
     #### Define reader options
     reader_kwargs = {}
     # - Define delimiter
-    reader_kwargs["delimiter"] = ";"
-    # - Skip first row as columns names
+    reader_kwargs["delimiter"] = "\\t"
+    # Skip header
     reader_kwargs["header"] = None
-    # - Skip file with encoding errors
-    reader_kwargs["encoding_errors"] = "ignore"
+    # Skip first row as columns names
+    reader_kwargs["skiprows"] = 1
+    # - Define encoding
+    reader_kwargs["encoding"] = "ISO-8859-1"
     # - Avoid first column to become df index !!!
     reader_kwargs["index_col"] = False
     # - Define behaviour when encountering bad lines
@@ -55,7 +86,7 @@ def reader(
     #   - Already included: '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN',
     #                       '-NaN', '-nan', '1.#IND', '1.#QNAN', '<NA>', 'N/A',
     #                       'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'
-    reader_kwargs["na_values"] = ["na", "", "error", "NA", "-.-"]
+    reader_kwargs["na_values"] = ["na", "", "error"]
     ##------------------------------------------------------------------------.
     #### Read the data
@@ -68,37 +99,22 @@ def reader(
     ##------------------------------------------------------------------------.
     #### Adapt the dataframe to adhere to DISDRODB L0 standards
-    # Convert time column to datetime
-    df_time = pd.to_datetime(df["time"], format="%Y%m%d%H%M%S", errors="coerce")
-    # Split the 'TO_BE_SPLITTED' column
-    df = df["TO_BE_SPLITTED"].str.split(",", expand=True, n=9)
-    # Assign column names
-    columns_names = [
-        "station_name",
-        "sensor_status",
-        "sensor_temperature",
-        "number_particles",
-        "rainfall_rate_32bit",
-        "reflectivity_16bit",
-        "mor_visibility",
-        "weather_code_synop_4680",
-        "weather_code_synop_4677",
-        "raw_drop_number",
-    ]
-    df.columns = columns_names
+    # Replace 'status' NaN with 0
+    df["sensor_status"] = df["sensor_status"].astype(float).fillna(value=0).astype(int)
-    # Add the time column
-    df["time"] = df_time
+    # Define 'time' datetime column
+    df["time"] = df["date"].astype(str) + " " + df["time"].astype(str)
+    df["time"] = pd.to_datetime(df["time"], format="%Y-%m-%d %H:%M:%S", errors="coerce")
+    df = df.drop(columns=["date"])
-    # Drop columns not agreeing with DISDRODB L0 standards
-    df = df.drop(columns=["station_name"])
+    # Create raw_drop_number column
+    bin_columns = ["n" + str(i) for i in range(1, 21)]
+    df_arr = df[bin_columns]
+    df_raw_drop_number = df_arr.agg(";".join, axis=1)
+    df["raw_drop_number"] = df_raw_drop_number
-    # Drop rows with invalid values
-    # --> Ensure that weather_code_synop_4677 has length 2
-    # --> If a previous column is missing it will have 000
-    df = df[df["weather_code_synop_4677"].str.len() == 2]
+    # Remove bins columns
+    df = df.drop(columns=bin_columns)
     # Return the dataframe adhering to DISDRODB L0 standards
     return df

disdrodb/l0/readers/{SW250 → SWS250}/BELGIUM/KMI.py RENAMED Viewed

@@ -16,7 +16,7 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 # -----------------------------------------------------------------------------.
-"""DISDRODB reader for KMI Biral SW250 sensors."""
+"""DISDRODB reader for KMI Biral SWS250 sensors."""
 import pandas as pd
 from disdrodb.l0.l0_reader import is_documented_by, reader_generic_docstring

disdrodb/l1/beard_model.py CHANGED Viewed

@@ -385,6 +385,49 @@ def get_water_density(temperature, air_pressure, sea_level_air_pressure=101_325)
     return get_pure_water_density(temperature) * np.exp(-1 * water_compressibility * delta_pressure)
+####---------------------------------------------------------------------------.
+#### Wrappers
+def retrieve_air_pressure(ds_env):
+    """Retrieve air pressure."""
+    if "air_pressure" in ds_env:
+        return ds_env["air_pressure"]
+    air_pressure = get_air_pressure_at_height(
+        altitude=ds_env["altitude"],
+        latitude=ds_env["latitude"],
+        temperature=ds_env["temperature"],
+        sea_level_air_pressure=ds_env["sea_level_air_pressure"],
+        lapse_rate=ds_env["lapse_rate"],
+    )
+    return air_pressure
+def retrieve_air_dynamic_viscosity(ds_env):
+    """Retrieve air dynamic viscosity."""
+    air_viscosity = get_air_dynamic_viscosity(ds_env["temperature"])
+    return air_viscosity
+def retrieve_air_density(ds_env):
+    """Retrieve air density."""
+    temperature = ds_env["temperature"]
+    relative_humidity = ds_env["relative_humidity"]
+    air_pressure = retrieve_air_pressure(ds_env)
+    vapor_pressure = get_vapor_actual_pressure(
+        relative_humidity=relative_humidity,
+        temperature=temperature,
+    )
+    air_density = get_air_density(
+        temperature=temperature,
+        air_pressure=air_pressure,
+        vapor_pressure=vapor_pressure,
+    )
+    return air_density
+####---------------------------------------------------------------------------.
+#### Beard model
 def get_raindrop_reynolds_number(diameter, temperature, air_density, water_density, g):
     """Compute raindrop Reynolds number.
@@ -395,6 +438,7 @@ def get_raindrop_reynolds_number(diameter, temperature, air_density, water_densi
     Coefficients are taken from Table 1 of Beard 1976.
     Reference: Beard 1976; Pruppacher & Klett 1978
+    See also Table A1 in Rahman et al., 2020.
     Parameters
     ----------
@@ -422,7 +466,7 @@ def get_raindrop_reynolds_number(diameter, temperature, air_density, water_densi
     air_viscosity = get_air_dynamic_viscosity(temperature)  # kg/(m*s) (aka Pa*s).
     delta_density = water_density - air_density
-    # Compute Davis number for small droplets
+    # Compute Davies number for small droplets
     davis_number = 4 * air_density * delta_density * g * diameter**3 / (3 * air_viscosity**2)
     # Compute the slip correction (is approx 1 and can be discarded)

disdrodb/l1/fall_velocity.py CHANGED Viewed

@@ -45,11 +45,6 @@ def get_fall_velocity_atlas_1973(diameter):
     Reviews of Geophysics, 11(1), 1-35.
     https://doi.org/10.1029/RG011i001p00001
-    Atlas, D., & Ulbrich, C. W. (1977).
-    Path- and area-integrated rainfall measurement by microwave attenuation in the 1-3 cm band.
-    Journal of Applied Meteorology, 16(12), 1322-1331.
-    https://doi.org/10.1175/1520-0450(1977)016<1322:PAAIRM>2.0.CO;2
     Gunn, R., & Kinzer, G. D. (1949).
     The terminal velocity of fall for water droplets in stagnant air.
     Journal of Meteorology, 6(4), 243-248.
@@ -111,7 +106,7 @@ def get_fall_velocity_uplinger_1981(diameter):
     """
     # Valid between 0.1 and 7 mm
-    fall_velocity = 4.874 * diameter * np.exp(-0.195 * diameter)
+    fall_velocity = 4.874 * diameter * np.exp(-0.195 * diameter)  # 4.854?
     fall_velocity = fall_velocity.clip(min=0, max=None)
     return fall_velocity

disdrodb/l1/filters.py CHANGED Viewed

@@ -157,6 +157,8 @@ def define_raindrop_spectrum_mask(
         A boolean mask array indicating valid bins according to the specified criteria.
     """
+    # TODO: use lower and upper fall_velocity !
     # Ensure it creates a 2D mask if the fall_velocity does not vary over time
     if "time" in drop_number.dims and "time" not in fall_velocity.dims:
         drop_number = drop_number.isel(time=0)

disdrodb/l1/processing.py CHANGED Viewed

@@ -121,9 +121,11 @@ def generate_l1(
     # Add sample interval as coordinate (in seconds)
     ds_l1 = add_sample_interval(ds_l1, sample_interval=sample_interval)
-    # Add L0C coordinates that might got lost
-    if "time_qc" in ds:
-        ds_l1 = ds_l1.assign_coords({"time_qc": ds["time_qc"]})
+    # Add optional variables to L1 dataset
+    optional_variables = ["time_qc", "qc_resampling"]
+    for var in optional_variables:
+        if var in ds:
+            ds_l1[var] = ds[var]
     # -------------------------------------------------------------------------------------------
     # Filter dataset by diameter and velocity bins
@@ -160,10 +162,9 @@ def generate_l1(
     # -------------------------------------------------------------------------------------------
     # Retrieve drop number and drop_counts arrays
     if has_velocity_dimension:
-        drop_number = ds_l1["raw_drop_number"].where(mask)  # 2D (diameter, velocity)
+        drop_number = ds_l1["raw_drop_number"].where(mask, 0)  # 2D (diameter, velocity)
         drop_counts = drop_number.sum(dim=VELOCITY_DIMENSION)  # 1D (diameter)
         drop_counts_raw = ds_l1["raw_drop_number"].sum(dim=VELOCITY_DIMENSION)  # 1D (diameter)
     else:
         drop_number = ds_l1["raw_drop_number"]  # 1D (diameter)
         drop_counts = ds_l1["raw_drop_number"]  # 1D (diameter)

disdrodb/l1/resampling.py CHANGED Viewed

@@ -19,9 +19,12 @@ import numpy as np
 import pandas as pd
 import xarray as xr
-from disdrodb.utils.time import ensure_sample_interval_in_seconds, regularize_dataset
-DEFAULT_ACCUMULATIONS = ["10s", "30s", "1min", "2min", "5min", "10min", "30min", "1hour"]
+from disdrodb.utils.time import (
+    ensure_sample_interval_in_seconds,
+    get_dataset_start_end_time,
+    get_sampling_information,
+    regularize_dataset,
+)
 def add_sample_interval(ds, sample_interval):
@@ -95,6 +98,27 @@ def define_window_size(sample_interval, accumulation_interval):
     return window_size
+def _finalize_qc_resampling(ds, sample_interval, accumulation_interval):
+    # Compute qc_resampling
+    # - 0 if not missing timesteps
+    # - 1 if all timesteps missing
+    n_timesteps = accumulation_interval / sample_interval
+    ds["qc_resampling"] = np.round(1 - ds["qc_resampling"] / n_timesteps, 1)
+    ds["qc_resampling"].attrs = {
+        "long_name": "Resampling Quality Control Flag",
+        "standard_name": "quality_flag",
+        "units": "",
+        "valid_min": 0.0,
+        "valid_max": 1.0,
+        "description": (
+            "Fraction of timesteps missing when resampling the data."
+            "0 = No timesteps missing; 1 = All timesteps missing;"
+            "Intermediate values indicate partial data coverage."
+        ),
+    }
+    return ds
 def _resample(ds, variables, accumulation, op):
     if not variables:
         return {}
@@ -113,23 +137,24 @@ def _rolling(ds, variables, window_size, op):
     return ds_subset
-def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
+def resample_dataset(ds, sample_interval, temporal_resolution):
     """
     Resample the dataset to a specified accumulation interval.
+    The output timesteps correspond to the starts of the periods over which
+    the resampling operation has been performed !
     Parameters
     ----------
     ds : xarray.Dataset
         The input dataset to be resampled.
     sample_interval : int
-        The sample interval of the input dataset.
-    accumulation_interval : int
-        The interval in seconds over which to accumulate the data.
-    rolling : bool, optional
-        If True, apply a rolling window before resampling. Default is True.
-        If True, forward rolling is performed.
-        The output timesteps correspond to the starts of the periods over which
-        the resampling operation has been performed !
+        The sample interval (in seconds) of the input dataset.
+    temporal_resolution : str
+        The desired temporal resolution for resampling.
+        It should be a string representing the accumulation interval,
+        e.g., "5MIN" for 5 minutes, "1H" for 1 hour, "30S" for 30 seconds, etc.
+        Prefixed with "ROLL" for rolling resampling, e.g., "ROLL5MIN".
     Returns
     -------
@@ -149,6 +174,9 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
     # Ensure sample interval in seconds
     sample_interval = int(ensure_sample_interval_in_seconds(sample_interval))
+    # Retrieve accumulation_interval and rolling option
+    accumulation_interval, rolling = get_sampling_information(temporal_resolution)
     # --------------------------------------------------------------------------.
     # Raise error if the accumulation_interval is less than the sample interval
     if accumulation_interval < sample_interval:
@@ -157,51 +185,78 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
     if not accumulation_interval % sample_interval == 0:
         raise ValueError("The accumulation_interval is not a multiple of sample interval.")
+    # Retrieve input dataset start_time and end_time
+    start_time, end_time = get_dataset_start_end_time(ds, time_dim="time")
+    # Initialize qc_resampling
+    ds["qc_resampling"] = xr.ones_like(ds["time"], dtype="float")
+    # Retrieve dataset attributes
+    attrs = ds.attrs.copy()
+    # If no resampling, return as it is
+    if sample_interval == accumulation_interval:
+        attrs["disdrodb_aggregated_product"] = "False"
+        attrs["disdrodb_rolled_product"] = "False"
+        attrs["disdrodb_temporal_resolution"] = temporal_resolution
+        ds = _finalize_qc_resampling(ds, sample_interval=sample_interval, accumulation_interval=accumulation_interval)
+        ds = add_sample_interval(ds, sample_interval=accumulation_interval)
+        ds.attrs = attrs
+        return ds
     # --------------------------------------------------------------------------.
     #### Preprocess the dataset
-    # Here we set NaN in the raw_drop_number to 0
-    # - We assume that NaN corresponds to 0
-    # - When we regularize, we infill with NaN
+    # - Set timesteps with NaN in drop_number to zero (and set qc_resampling to 0)
     # - When we aggregate with sum, we don't skip NaN
-    # --> Aggregation with original missing timesteps currently results in NaN !
+    #   --> Resampling over missing timesteps will result in NaN drop_number and qc_resampling = 1
+    #   --> Resampling over timesteps with NaN in drop_number will result in finite drop_number but qc_resampling > 0
+    # - qc_resampling will inform on the amount of timesteps missing
-    # Infill NaN values with zeros for drop_number and raw_drop_number
-    # - This might alter integrated statistics if NaN in spectrum does not actually correspond to 0 !
-    # - TODO: NaN should not be set as 0 !
-    for var in ["drop_number", "raw_drop_number"]:
+    for var in ["drop_number", "raw_drop_number", "drop_counts", "drop_number_concentration"]:
         if var in ds:
-            ds[var] = xr.where(np.isnan(ds[var]), 0, ds[var])
+            dims = set(ds[var].dims) - {"time"}
+            invalid_timesteps = np.isnan(ds[var]).any(dim=dims)
+            ds[var] = ds[var].where(~invalid_timesteps, 0)
+            ds["qc_resampling"] = ds["qc_resampling"].where(~invalid_timesteps, 0)
+            if np.all(invalid_timesteps).item():
+                raise ValueError("No timesteps with valid spectrum.")
     # Ensure regular dataset without missing timesteps
     # --> This adds NaN values for missing timesteps
-    ds = regularize_dataset(ds, freq=f"{sample_interval}s")
+    ds = regularize_dataset(ds, freq=f"{sample_interval}s", start_time=start_time, end_time=end_time)
+    ds["qc_resampling"] = ds["qc_resampling"].where(~np.isnan(ds["qc_resampling"]), 0)
     # --------------------------------------------------------------------------.
     # Define dataset attributes
-    attrs = ds.attrs.copy()
     if rolling:
         attrs["disdrodb_rolled_product"] = "True"
     else:
         attrs["disdrodb_rolled_product"] = "False"
-    if sample_interval == accumulation_interval:
-        attrs["disdrodb_aggregated_product"] = "False"
-        ds = add_sample_interval(ds, sample_interval=accumulation_interval)
-        ds.attrs = attrs
-        return ds
-    # --------------------------------------------------------------------------.
-    # Resample the dataset
     attrs["disdrodb_aggregated_product"] = "True"
+    attrs["disdrodb_temporal_resolution"] = temporal_resolution
+    # --------------------------------------------------------------------------.
     # Initialize resample dataset
     ds_resampled = xr.Dataset()
     # Retrieve variables to average/sum
+    # - ATTENTION: it will not resample non-dimensional time coordinates of the dataset !
     var_to_average = ["fall_velocity"]
-    var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nraw", "Nremoved"]
+    var_to_cumulate = [
+        "raw_drop_number",
+        "drop_number",
+        "drop_counts",
+        "drop_number_concentration",
+        "N",
+        "Nraw",
+        "Nremoved",
+        "qc_resampling",
+    ]
     var_to_min = ["Dmin"]
-    var_to_max = ["Dmax"]
+    var_to_max = ["Dmax", "time_qc"]
     # Retrieve available variables
     var_to_average = [var for var in var_to_average if var in ds]
@@ -209,11 +264,6 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
     var_to_min = [var for var in var_to_min if var in ds]
     var_to_max = [var for var in var_to_max if var in ds]
-    # TODO Define custom processing
-    # - quality_flag --> take worst
-    # - skipna if less than fraction (to not waste lot of data when aggregating over i.e. hours)
-    # - Add tolerance on fraction of missing timesteps for large accumulation_intervals
     # Resample the dataset
     # - Rolling currently does not allow direct rolling forward.
     # - We currently use center=False which means search for data backward (right-aligned) !
@@ -239,6 +289,19 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
             {"time": ds_resampled["time"].data[: -window_size + 1]},
         )
+    # Finalize qc_resampling
+    ds_resampled = _finalize_qc_resampling(
+        ds_resampled,
+        sample_interval=sample_interval,
+        accumulation_interval=accumulation_interval,
+    )
+    # Set to NaN timesteps where qc_resampling == 1
+    # --> This occurs for missing timesteps in input dataset or all NaN drop_number arrays
+    variables = list(set(ds_resampled.data_vars) - {"qc_resampling"})
+    mask_missing_timesteps = ds_resampled["qc_resampling"] != 1
+    for var in variables:
+        ds_resampled[var] = ds_resampled[var].where(mask_missing_timesteps)
     # Add attributes
     ds_resampled.attrs = attrs

disdrodb/l2/empirical_dsd.py CHANGED Viewed

@@ -220,27 +220,31 @@ def get_effective_sampling_area(sensor_name, diameter):
     check_sensor_name(sensor_name)
     if sensor_name in ["PARSIVEL", "PARSIVEL2"]:
         # Calculate sampling area for each diameter bin (S_i)
+        # - Parsivel remove margin fallers !
+        # - The effective sampling area decreases with increasing drop diameter
+        # sampling_area = 0.0054 # m2
         L = 180 / 1000  # Length of the Parsivel beam in m (180 mm)
         B = 30 / 1000  # Width of the Parsivel beam in m (30mm)
-        sampling_area = L * (B - diameter / 2)
+        sampling_area = L * (B - diameter / 2)  # d_eq
         return sampling_area
-    if sensor_name == "LPM":
+    if sensor_name in ["LPM", "LPM_V0"]:
         # Calculate sampling area for each diameter bin (S_i)
-        L = 228 / 1000  # Length of the Parsivel beam in m (228 mm)
-        B = 20 / 1000  # Width of the Parsivel beam in m (20 mm)
-        sampling_area = L * (B - diameter / 2)
+        # L = 228 / 1000  # Length of the beam in m (228 mm)
+        # B = 20 / 1000  # Width of the beam in m (20 mm)
+        # sampling_area = L * (B - diameter / 2)
+        sampling_area = 0.0045  # m2
         return sampling_area
     if sensor_name == "PWS100":
-        sampling_area = 0.004  # m2  # TODO: L * (B - diameter / 2) ?
+        sampling_area = 0.004  # m2
         return sampling_area
     if sensor_name == "RD80":
         sampling_area = 0.005  # m2
         return sampling_area
-    if sensor_name == "SWS250":  # TODO: L * (B - diameter / 2) ?
+    if sensor_name == "SWS250":
         # Table 29 of the manual that the sample volume is 400cm3, path length?
         # Distance between the end of the hood heaters is 291 mm.
         # Adding a factor of 1.5 for better representation of the Tx-Rx distance: L= 436 mm.
-        sampling_area = 0.0091  # m2
+        sampling_area = 0.0091  # m2 #  0.006504 m2 maybe?
         return sampling_area
     raise NotImplementedError(f"Effective sampling area for {sensor_name} must yet to be specified in the software.")

disdrodb/l2/processing.py CHANGED Viewed

@@ -27,7 +27,6 @@ from disdrodb.l2.empirical_dsd import (
     add_bins_metrics,
     compute_integral_parameters,
     compute_spectrum_parameters,
-    get_drop_average_velocity,
     get_drop_number_concentration,
     get_effective_sampling_area,
     get_kinetic_energy_variables_from_drop_number,
@@ -273,6 +272,8 @@ def generate_l2e(
         "Dmin",
         "Dmax",
         "fall_velocity",
+        "qc_resampling",
+        "time_qc",
     ]
     variables = [var for var in variables if var in ds]
@@ -282,8 +283,8 @@ def generate_l2e(
     # -------------------------------------------------------------------------------------------
     # Compute and add drop average velocity if an optical disdrometer (i.e OTT Parsivel or ThiesLPM)
     # - We recompute it because if the input dataset is aggregated, it must be updated !
-    if has_velocity_dimension:
-        ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
+    # if has_velocity_dimension:
+    #     ds["drop_average_velocity"] = get_drop_average_velocity(ds["drop_number"])
     # -------------------------------------------------------------------------------------------
     # Define velocity array with dimension 'velocity_method'

disdrodb/metadata/search.py CHANGED Viewed

@@ -102,10 +102,9 @@ def get_list_metadata(
         Path to the root of the DISDRODB Metadata Archive. Format: ``<...>/DISDRODB``
         If None, the``metadata_archive_dir`` path specified in the DISDRODB active configuratio. The default is None.
     **product_kwargs : dict, optional
-        Additional arguments required for some products.
-        For example, for the "L2E" product, you need to specify ``rolling`` and
-        ``sample_interval``. For the "L2M" product, you need to specify also
-        the ``model_name``.
+        Additional arguments required for DISDRODB products L1, L2E and L2M.
+        For the L1, L2E and L2M products, ``temporal_resolution`` is required.
+        FOr the L2M product, ``model_name`` is required.
     Returns
     -------

disdrodb/routines/l0.py CHANGED Viewed

@@ -50,7 +50,7 @@ from disdrodb.l0.l0b_nc_processing import sanitize_ds
 from disdrodb.l0.l0b_processing import generate_l0b
 from disdrodb.l0.l0c_processing import TOLERANCE_SECONDS, create_l0c_datasets
 from disdrodb.metadata import read_station_metadata
-from disdrodb.utils.archiving import get_files_per_time_block
+from disdrodb.utils.archiving import group_files_by_time_block
 from disdrodb.utils.dask import execute_tasks_safely
 from disdrodb.utils.decorators import delayed_if_parallel, single_threaded_if_parallel
@@ -696,7 +696,7 @@ def run_l0b_station(
     # -----------------------------------------------------------------.
     # Start L0B processing
     t_i = time.time()
-    msg = f"{product} processing of station_name {station_name} has started."
+    msg = f"{product} processing of station {station_name} has started."
     log_info(logger=logger, msg=msg, verbose=verbose)
     # -----------------------------------------------------------------.
@@ -774,7 +774,7 @@ def run_l0b_station(
     # -----------------------------------------------------------------.
     # End L0B processing
     timedelta_str = str(datetime.timedelta(seconds=round(time.time() - t_i)))
-    msg = f"{product} processing of station_name {station_name} completed in {timedelta_str}"
+    msg = f"{product} processing of station {station_name} completed in {timedelta_str}"
     log_info(logger=logger, msg=msg, verbose=verbose)
     # -----------------------------------------------------------------.
@@ -928,7 +928,7 @@ def run_l0c_station(
     # -------------------------------------------------------------------------.
     # Retrieve dictionary with the required files per time block
     # TODO: allow customizing this in config file, but risk of out of memory !
-    list_event_info = get_files_per_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
+    list_event_info = group_files_by_time_block(filepaths=filepaths, freq="day", tolerance_seconds=TOLERANCE_SECONDS)
     # -----------------------------------------------------------------.
     # Generate L0C files

disdrodb 0.1.5__py3-none-any.whl → 0.2.1__py3-none-any.whl

disdrodb 0.1.5py3-none-any.whl → 0.2.1py3-none-any.whl