PyPI - disdrodb - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

disdrodb 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

disdrodb/__init__.py +1 -1
disdrodb/_version.py +2 -2
disdrodb/api/io.py +12 -2
disdrodb/l0/check_standards.py +15 -10
disdrodb/l0/configs/LPM/l0a_encodings.yml +4 -4
disdrodb/l0/configs/LPM/l0b_cf_attrs.yml +22 -6
disdrodb/l0/configs/LPM/l0b_encodings.yml +41 -0
disdrodb/l0/configs/LPM/raw_data_format.yml +40 -0
disdrodb/l0/configs/PARSIVEL/l0b_cf_attrs.yml +1 -1
disdrodb/l0/configs/PARSIVEL/raw_data_format.yml +1 -1
disdrodb/l0/configs/PARSIVEL2/l0b_cf_attrs.yml +4 -4
disdrodb/l0/configs/PARSIVEL2/raw_data_format.yml +10 -10
disdrodb/l0/configs/PWS100/bins_diameter.yml +173 -0
disdrodb/l0/configs/PWS100/bins_velocity.yml +173 -0
disdrodb/l0/configs/PWS100/l0a_encodings.yml +19 -0
disdrodb/l0/configs/PWS100/l0b_cf_attrs.yml +76 -0
disdrodb/l0/configs/PWS100/l0b_encodings.yml +176 -0
disdrodb/l0/configs/PWS100/raw_data_format.yml +182 -0
disdrodb/l0/configs/RD80/raw_data_format.yml +2 -6
disdrodb/l0/l0b_nc_processing.py +1 -1
disdrodb/l0/l0b_processing.py +12 -10
disdrodb/l0/readers/LPM/AUSTRALIA/MELBOURNE_2007_LPM.py +23 -13
disdrodb/l0/readers/LPM/BRAZIL/CHUVA_LPM.py +3 -3
disdrodb/l0/readers/LPM/BRAZIL/GOAMAZON_LPM.py +5 -3
disdrodb/l0/readers/LPM/ITALY/GID_LPM.py +36 -20
disdrodb/l0/readers/LPM/ITALY/GID_LPM_W.py +210 -0
disdrodb/l0/readers/LPM/KIT/CHWALA.py +225 -0
disdrodb/l0/readers/LPM/SLOVENIA/ARSO.py +197 -0
disdrodb/l0/readers/LPM/SLOVENIA/CRNI_VRH.py +197 -0
disdrodb/l0/readers/PARSIVEL/KIT/BURKINA_FASO.py +133 -0
disdrodb/l0/readers/PARSIVEL/NCAR/PECAN_MOBILE.py +1 -1
disdrodb/l0/readers/PARSIVEL/NCAR/VORTEX2_2009.py +1 -1
disdrodb/l0/readers/PARSIVEL/SLOVENIA/UL_FGG.py +121 -0
disdrodb/l0/readers/PARSIVEL2/FRANCE/ENPC_PARSIVEL2.py +189 -0
disdrodb/l0/readers/PARSIVEL2/NCAR/PECAN_FP3.py +1 -1
disdrodb/l0/readers/PARSIVEL2/NCAR/VORTEX_SE_2016_P2.py +1 -1
disdrodb/l0/readers/PWS100/FRANCE/ENPC_PWS100.py +150 -0
disdrodb/l0/readers/RD80/NOAA/PSL_RD80.py +274 -0
disdrodb/l0/readers/template_reader_raw_netcdf_data.py +1 -1
disdrodb/l0/standards.py +7 -4
disdrodb/l0/template_tools.py +2 -2
disdrodb/l1/encoding_attrs.py +21 -6
disdrodb/l1/processing.py +6 -4
disdrodb/l1/resampling.py +1 -1
disdrodb/l1/routines.py +2 -1
disdrodb/l2/empirical_dsd.py +100 -2
disdrodb/l2/event.py +3 -3
disdrodb/l2/processing.py +21 -12
disdrodb/l2/processing_options.py +7 -7
disdrodb/l2/routines.py +3 -3
disdrodb/metadata/checks.py +15 -6
disdrodb/metadata/manipulation.py +2 -2
disdrodb/metadata/standards.py +83 -79
disdrodb/metadata/writer.py +2 -2
disdrodb/routines.py +246 -10
disdrodb/scattering/routines.py +1 -1
disdrodb/utils/dataframe.py +342 -0
{disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/METADATA +34 -61
{disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/RECORD +63 -47
{disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/WHEEL +1 -1
{disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/entry_points.txt +3 -3
{disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/licenses/LICENSE +0 -0
{disdrodb-0.1.0.dist-info → disdrodb-0.1.1.dist-info}/top_level.txt +0 -0

disdrodb/l0/template_tools.py CHANGED Viewed

@@ -500,7 +500,7 @@ def _search_possible_columns(string: str, sensor_name: str) -> list:
 #### Infer column names and checks validity
-def infer_column_names(df: pd.DataFrame, sensor_name: str, row_idx: int = 1):
+def infer_column_names(df: pd.DataFrame, sensor_name: str, row_idx: int = 0):
     """Try to guess the dataframe columns names based on string characteristics.
     Parameters
@@ -511,7 +511,7 @@ def infer_column_names(df: pd.DataFrame, sensor_name: str, row_idx: int = 1):
         name of the sensor.
     row_idx : int, optional
         The row index of the dataframe to use to infer the column names.
-        The default row index is 1.
+        The default row index is 0.
     Returns
     -------

disdrodb/l1/encoding_attrs.py CHANGED Viewed

@@ -51,21 +51,36 @@ def get_attrs_dict():
             "long_name": "Measured average drop fall velocity",
             "units": "m s-1",
         },
-        "n_drops_selected": {
+        "N": {
             "description": "Total number of selected drops",
             "long_name": "Total number of selected drops",
             "units": "",
         },
-        "n_drops_discarded": {
+        "Nremoved": {
             "description": "Total number of discarded drops",
             "long_name": "Total number of discarded drops",
             "units": "",
         },
-        "n_bins_with_drops": {
+        "Nbins": {
             "description": "Number of diameter bins with drops",
             "long_name": "Number of diameter bins with drops",
             "units": "",
         },
+        "Nbins_missing": {
+            "description": "Number of diameter bins with no drops",
+            "long_name": "Number of diameter bins with no drops",
+            "units": "",
+        },
+        "Nbins_missing_fraction": {
+            "description": "Fraction of diameter bins with no drops",
+            "long_name": "Fraction of diameter bins with no drops",
+            "units": "",
+        },
+        "Nbins_missing_consecutive": {
+            "description": "Maximum number of consecutive diameter bins with no drops",
+            "long_name": "Maximum number of consecutive diameter bins with no drops",
+            "units": "",
+        },
         #### L2
         "drop_number_concentration": {
             "description": "Number concentration of drops per diameter class per unit volume",
@@ -436,7 +451,7 @@ def get_encoding_dict():
             "contiguous": False,
             "_FillValue": 4294967295,
         },
-        "n_drops_selected": {
+        "N": {
             "dtype": "uint32",
             "zlib": True,
             "complevel": 3,
@@ -445,7 +460,7 @@ def get_encoding_dict():
             "contiguous": False,
             "_FillValue": 4294967295,
         },
-        "n_drops_discarded": {
+        "Nremoved": {
             "dtype": "uint32",
             "zlib": True,
             "complevel": 3,
@@ -454,7 +469,7 @@ def get_encoding_dict():
             "contiguous": False,
             "_FillValue": 4294967295,
         },
-        "n_bins_with_drops": {
+        "Nbins": {
             "dtype": "uint8",
             "_FillValue": 255,
             "zlib": True,

disdrodb/l1/processing.py CHANGED Viewed

@@ -26,7 +26,7 @@ from disdrodb.l1.filters import define_spectrum_mask, filter_diameter_bins, filt
 from disdrodb.l1.resampling import add_sample_interval
 from disdrodb.l1_env.routines import load_env_dataset
 from disdrodb.l2.empirical_dsd import (  # TODO: maybe move out of L2
-    count_bins_with_drops,
+    compute_qc_bins_metrics,
     get_min_max_diameter,
 )
 from disdrodb.utils.attrs import set_attrs
@@ -172,9 +172,11 @@ def generate_l1(
     # Add drop statistics
     ds_l1["Dmin"] = min_drop_diameter
     ds_l1["Dmax"] = max_drop_diameter
-    ds_l1["n_drops_selected"] = drop_counts.sum(dim=DIAMETER_DIMENSION)
-    ds_l1["n_drops_discarded"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION) - ds_l1["n_drops_selected"]
-    ds_l1["n_bins_with_drops"] = count_bins_with_drops(ds_l1)
+    ds_l1["N"] = drop_counts.sum(dim=DIAMETER_DIMENSION)
+    ds_l1["Nremoved"] = drop_counts_raw.sum(dim=DIAMETER_DIMENSION) - ds_l1["N"]
+    # Add bins statistics
+    ds_l1.update(compute_qc_bins_metrics(ds_l1))
     # -------------------------------------------------------------------------------------------
     # Add quality flags

disdrodb/l1/resampling.py CHANGED Viewed

@@ -141,7 +141,7 @@ def resample_dataset(ds, sample_interval, accumulation_interval, rolling=True):
     # Retrieve variables to average/sum
     var_to_average = ["fall_velocity"]
-    var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "n_drops_selected", "n_drops_discarded"]
+    var_to_cumulate = ["raw_drop_number", "drop_number", "drop_counts", "N", "Nremoved"]
     var_to_min = ["Dmin"]
     var_to_max = ["Dmax"]

disdrodb/l1/routines.py CHANGED Viewed

@@ -61,6 +61,7 @@ def get_l1_options():
     # - TODO: as function of sensor name
     # minimum_diameter
+    # --> PWS100: 0.05
     # --> PARSIVEL: 0.2495
     # --> RD80: 0.313
     # --> LPM: 0.125 (we currently discard first bin with this setting)
@@ -75,7 +76,7 @@ def get_l1_options():
         "fall_velocity_method": "Beard1976",
         # Diameter-Velocity Filtering Options
         "minimum_diameter": 0.2495,  # OTT PARSIVEL first two bin no data !
-        "maximum_diameter": 8,
+        "maximum_diameter": 10,
         "minimum_velocity": 0,
         "maximum_velocity": 12,
         "above_velocity_fraction": 0.5,

disdrodb/l2/empirical_dsd.py CHANGED Viewed

@@ -101,6 +101,101 @@ def count_bins_with_drops(ds):
     return da
+def _compute_qc_bins_metrics(arr):
+    # Find indices of non-zero elements
+    arr = arr.copy()
+    arr[np.isnan(arr)] = 0
+    non_zero_indices = np.nonzero(arr)[0]
+    if non_zero_indices.size == 0:
+        return np.array([0, len(arr), 1, len(arr)])
+    # Define bins interval with drops
+    start_idx, end_idx = non_zero_indices[0], non_zero_indices[-1]
+    segment = arr[start_idx : end_idx + 1]
+    # Compute number of bins with drops
+    total_bins = segment.size
+    # Compute number of missing bins (zeros)
+    n_missing_bins = int(np.sum(segment == 0))
+    # Compute fraction of bins with missing drops
+    fraction_missing = n_missing_bins / total_bins
+    # Identify longest with with consecutive zeros
+    zero_mask = (segment == 0).astype(int)
+    # - Pad with zeros at both ends to detect edges
+    padded = np.pad(zero_mask, (1, 1), "constant", constant_values=0)
+    diffs = np.diff(padded)
+    # - Start and end indices of runs
+    run_starts = np.where(diffs == 1)[0]
+    run_ends = np.where(diffs == -1)[0]
+    run_lengths = run_ends - run_starts
+    max_consecutive_missing = run_lengths.max() if run_lengths.size > 0 else 0
+    # Define output
+    output = np.array([total_bins, n_missing_bins, fraction_missing, max_consecutive_missing])
+    return output
+def compute_qc_bins_metrics(ds):
+    """
+    Compute quality-control metrics for drop-count bins along the diameter dimension.
+    This function selects the first available drop-related variable from the dataset,
+    optionally collapses over velocity methods and the velocity dimension, then
+    computes four metrics per time step:
+      1. Nbins: total number of diameter bins between the first and last non-zero count
+      2. Nbins_missing: number of bins with zero or NaN counts in that interval
+      3. Nbins_missing_fraction: fraction of missing bins (zeros) in the interval
+      4. Nbins_missing_consecutive: maximum length of consecutive missing bins
+    Parameters
+    ----------
+    ds : xr.Dataset
+        Input dataset containing one of the following variables:
+        'drop_counts', 'drop_number_concentration', or 'drop_number'.
+        If a 'velocity_method' dimension exists, only the first method is used.
+        If a velocity dimension (specified by VELOCITY_DIMENSION) exists, it is summed over.
+    Returns
+    -------
+    xr.Dataset
+        Dataset with a new 'metric' dimension of size 4 and coordinates:
+        ['Nbins', 'Nbins_missing', 'Nbins_missing_fraction', 'Nbins_missing_consecutive'],
+        indexed by 'time'.
+    """
+    # Select useful variable
+    candidate_variables = ["drop_counts", "drop_number_concentration", "drop_number"]
+    available_variables = [var for var in candidate_variables if var in ds]
+    if len(available_variables) == 0:
+        raise ValueError(f"One of these variables is required: {candidate_variables}")
+    da = ds[available_variables[0]]
+    if "velocity_method" in da.dims:
+        da = da.isel(velocity_method=0)
+        da = da.drop_vars("velocity_method")
+    if VELOCITY_DIMENSION in da.dims:
+        da = da.sum(dim=VELOCITY_DIMENSION)
+    # Compute QC metrics
+    da_qc_bins = xr.apply_ufunc(
+        _compute_qc_bins_metrics,
+        da,
+        input_core_dims=[[DIAMETER_DIMENSION]],
+        output_core_dims=[["metric"]],
+        vectorize=True,
+        dask="parallelized",
+        output_dtypes=[float],
+        dask_gufunc_kwargs={"output_sizes": {"metric": 4}},
+    )
+    # Assign meaningful labels to the qc 'metric' dimension
+    variables = ["Nbins", "Nbins_missing", "Nbins_missing_fraction", "Nbins_missing_consecutive"]
+    ds_qc_bins = da_qc_bins.assign_coords(metric=variables).to_dataset(dim="metric")
+    return ds_qc_bins
 ####-------------------------------------------------------------------------------------------------------------------.
 #### DSD Spectrum, Concentration, Moments
@@ -117,13 +212,16 @@ def get_effective_sampling_area(sensor_name, diameter):
         B = 30 / 1000  # Width of the Parsivel beam in m (30mm)
         sampling_area = L * (B - diameter / 2)
         return sampling_area
-    if sensor_name in "LPM":
+    if sensor_name == "LPM":
         # Calculate sampling area for each diameter bin (S_i)
         L = 228 / 1000  # Length of the Parsivel beam in m (228 mm)
         B = 20 / 1000  # Width of the Parsivel beam in m (20 mm)
         sampling_area = L * (B - diameter / 2)
         return sampling_area
-    if sensor_name in "RD80":
+    if sensor_name == "PWS100":
+        sampling_area = 0.004  # m2  # TODO: L * (B - diameter / 2) ?
+        return sampling_area
+    if sensor_name == "RD80":
         sampling_area = 0.005  # m2
         return sampling_area
     raise NotImplementedError(f"Effective sampling area for {sensor_name} must yet to be specified in the software.")

disdrodb/l2/event.py CHANGED Viewed

@@ -43,7 +43,7 @@ def identify_events(
 ):
     """Return a list of rainy events.
-    Rainy timesteps are defined when n_drops_selected > min_n_drops.
+    Rainy timesteps are defined when N > min_n_drops.
     Any rainy isolated timesteps (based on neighborhood criteria) is removed.
     Then, consecutive rainy timesteps are grouped into the same event if the time gap between them does not
     exceed `intra_event_max_time_gap`. Finally, events that do not meet minimum size or duration
@@ -90,7 +90,7 @@ def identify_events(
     else:
         list_ds = [xr.open_dataset(filepath, chunks={}, cache=False, decode_timedelta=False) for filepath in filepaths]
     # Filter dataset for requested variables
-    variables = ["time", "n_drops_selected"]
+    variables = ["time", "N"]
     list_ds = [ds[variables] for ds in list_ds]
     # Concat datasets
     ds = xr.concat(list_ds, dim="time", compat="no_conflicts", combine_attrs="override")
@@ -102,7 +102,7 @@ def identify_events(
     # Sort dataset by time
     ds = ensure_sorted_by_time(ds)
     # Define candidate timesteps to group into events
-    idx_valid = ds["n_drops_selected"].data > min_n_drops
+    idx_valid = ds["N"].data > min_n_drops
     timesteps = ds["time"].data[idx_valid]
     # Define event list
     event_list = group_timesteps_into_event(

disdrodb/l2/processing.py CHANGED Viewed

@@ -24,8 +24,8 @@ from disdrodb.l1.fall_velocity import get_raindrop_fall_velocity
 from disdrodb.l1_env.routines import load_env_dataset
 from disdrodb.l2.empirical_dsd import (
     compute_integral_parameters,
+    compute_qc_bins_metrics,
     compute_spectrum_parameters,
-    count_bins_with_drops,
     get_drop_average_velocity,
     get_drop_number_concentration,
     get_effective_sampling_area,
@@ -140,11 +140,12 @@ def generate_l2_empirical(ds, ds_env=None, compute_spectra=False):
     # Discard all timesteps without measured drops
     # - This allow to speed up processing
     # - Regularization can be done at the end
-    ds = ds.isel(time=ds["n_drops_selected"] > 0)
+    ds = ds.isel(time=ds["N"] > 0)
     # Count number of diameter bins with data
-    if "n_bins_with_drops" not in ds:
-        ds["n_bins_with_drops"] = count_bins_with_drops(ds)
+    if "Nbins" not in ds:
+        # Add bins statistics
+        ds.update(compute_qc_bins_metrics(ds))
     # Retrieve ENV dataset or take defaults
     # --> Used for fall velocity and water density estimates
@@ -174,8 +175,8 @@ def generate_l2_empirical(ds, ds_env=None, compute_spectra=False):
         "drop_number",  # 2D V x D
         "drop_counts",  # 1D D
         "sample_interval",
-        "n_drops_selected",
-        "n_drops_discarded",
+        "N",
+        "Nremoved",
         "Dmin",
         "Dmax",
         "fall_velocity",
@@ -291,14 +292,14 @@ def generate_l2_model(
     fall_velocity_method="Beard1976",
     # PSD discretization
     diameter_min=0,
-    diameter_max=8,
+    diameter_max=10,
     diameter_spacing=0.05,
     # Fitting options
     psd_model=None,
     optimization=None,
     optimization_kwargs=None,
     # Filtering options
-    min_bins_with_drops=4,
+    min_nbins=4,
     remove_timesteps_with_few_bins=False,
     mask_timesteps_with_few_bins=False,
     # GOF metrics options
@@ -357,11 +358,12 @@ def generate_l2_model(
     ####------------------------------------------------------.
     #### Preprocessing
     # Count number of diameter bins with data
-    if "n_bins_with_drops" not in ds:
-        ds["n_bins_with_drops"] = count_bins_with_drops(ds)
+    if "Nbins" not in ds:
+        # Add bins statistics
+        ds.update(compute_qc_bins_metrics(ds))
     # Identify timesteps with enough diameter bins with counted trops
-    valid_timesteps = ds["n_bins_with_drops"] >= min_bins_with_drops
+    valid_timesteps = ds["Nbins"] >= min_nbins
     # Drop such timesteps if asked
     if remove_timesteps_with_few_bins:
@@ -466,7 +468,14 @@ def generate_l2_model(
 @check_pytmatrix_availability
-def generate_l2_radar(ds, radar_band=None, canting_angle_std=7, diameter_max=8, axis_ratio="Thurai2007", parallel=True):
+def generate_l2_radar(
+    ds,
+    radar_band=None,
+    canting_angle_std=7,
+    diameter_max=10,
+    axis_ratio="Thurai2007",
+    parallel=True,
+):
     """Simulate polarimetric radar variables from empirical drop number concentration or the estimated PSD.
     Parameters

disdrodb/l2/processing_options.py CHANGED Viewed

@@ -7,16 +7,16 @@ DEFAULT_CONFIG = {
     "global_settings": {
         "time_integration": [
             "1MIN",
+            "5MIN",
             "10MIN",
             "ROLL1MIN",
-            "ROLL10MIN",
         ],  # ["10S", "30S", "1MIN",  "5MIN", "10MIN", "15MIN", "30MIN", "1H", "ROLL5MIN", "ROLL10MIN"],
         # Radar options
         "radar_simulation_enabled": False,
         "radar_simulation_options": {
             "radar_band": ["S", "C", "X", "Ku", "Ka", "W"],
             "canting_angle_std": 7,
-            "diameter_max": 8,
+            "diameter_max": 10,
             "axis_ratio": "Thurai2007",
         },
         # L2E options
@@ -25,10 +25,10 @@ DEFAULT_CONFIG = {
         "l2m_options": {
             "fall_velocity_method": "Beard1976",
             "diameter_min": 0,
-            "diameter_max": 8,
+            "diameter_max": 10,
             "diameter_spacing": 0.05,
             "gof_metrics": True,
-            "min_bins_with_drops": 4,
+            "min_nbins": 4,
             "remove_timesteps_with_few_bins": False,
             "mask_timesteps_with_few_bins": False,
             "models": {
@@ -112,7 +112,7 @@ TEST_CONFIG = {
         "radar_simulation_options": {
             "radar_band": ["S", "C", "X", "Ku", "Ka", "W"],
             "canting_angle_std": 7,
-            "diameter_max": 8,
+            "diameter_max": 10,
             "axis_ratio": "Thurai2007",
         },
         # L2E options
@@ -121,10 +121,10 @@ TEST_CONFIG = {
         "l2m_options": {
             "fall_velocity_method": "Beard1976",
             "diameter_min": 0,
-            "diameter_max": 8,
+            "diameter_max": 10,
             "diameter_spacing": 0.05,
             "gof_metrics": True,
-            "min_bins_with_drops": 4,
+            "min_nbins": 4,
             "remove_timesteps_with_few_bins": False,
             "mask_timesteps_with_few_bins": False,
             "models": {

disdrodb/l2/routines.py CHANGED Viewed

@@ -156,11 +156,11 @@ def _generate_l2e(
         ##------------------------------------------------------------------------.
         # Remove timesteps with no drops or NaN (from L2E computations)
-        # timestep_zero_drops = ds["time"].data[ds["n_drops_selected"].data == 0]
-        # timestep_nan = ds["time"].data[np.isnan(ds["n_drops_selected"].data)]
+        # timestep_zero_drops = ds["time"].data[ds["N"].data == 0]
+        # timestep_nan = ds["time"].data[np.isnan(ds["N"].data)]
         # TODO: Make it a choice !
         indices_valid_timesteps = np.where(
-            ~np.logical_or(ds["n_drops_selected"].data == 0, np.isnan(ds["n_drops_selected"].data)),
+            ~np.logical_or(ds["N"].data == 0, np.isnan(ds["N"].data)),
         )[0]
         ds = ds.isel(time=indices_valid_timesteps)

disdrodb/metadata/checks.py CHANGED Viewed

@@ -30,7 +30,7 @@ from disdrodb.api.info import (
 from disdrodb.configs import get_metadata_archive_dir
 from disdrodb.metadata.reader import read_station_metadata
 from disdrodb.metadata.search import get_list_metadata
-from disdrodb.metadata.standards import get_valid_metadata_keys
+from disdrodb.metadata.standards import METADATA_KEYS, METADATA_VALUES
 from disdrodb.utils.yaml import read_yaml
 #### --------------------------------------------------------------------------.
@@ -40,19 +40,17 @@ from disdrodb.utils.yaml import read_yaml
 def get_metadata_missing_keys(metadata):
     """Return the DISDRODB metadata keys which are missing."""
     keys = list(metadata.keys())
-    valid_keys = get_valid_metadata_keys()
     # Identify missing keys
-    idx_missing_keys = np.where(np.isin(valid_keys, keys, invert=True))[0]
-    missing_keys = np.array(valid_keys)[idx_missing_keys].tolist()
+    idx_missing_keys = np.where(np.isin(METADATA_KEYS, keys, invert=True))[0]
+    missing_keys = np.array(METADATA_KEYS)[idx_missing_keys].tolist()
     return missing_keys
 def get_metadata_invalid_keys(metadata):
     """Return the DISDRODB metadata keys which are not valid."""
     keys = list(metadata.keys())
-    valid_keys = get_valid_metadata_keys()
     # Identify invalid keys
-    idx_invalid_keys = np.where(np.isin(keys, valid_keys, invert=True))[0]
+    idx_invalid_keys = np.where(np.isin(keys, METADATA_KEYS, invert=True))[0]
     invalid_keys = np.array(keys)[idx_invalid_keys].tolist()
     return invalid_keys
@@ -73,11 +71,22 @@ def _check_metadata_values(metadata):
     """Check validity of metadata values.
     If null is specified in the YAML files (or None in the dict) raise error.
+    For specific keys, check that values match one of the allowed options in METADATA_VALUES.
     """
     for key, value in metadata.items():
+        # Check for None/null values
         if isinstance(value, type(None)):
             raise ValueError(f"The metadata key {key} has None or null value. Use '' instead.")
+        # Check that values match allowed options for specific keys
+        if key in METADATA_VALUES:
+            allowed_values = METADATA_VALUES[key]
+            if value not in allowed_values:
+                allowed_str = ", ".join([f"'{v}'" for v in allowed_values])
+                raise ValueError(
+                    f"Invalid value '{value}' for metadata key '{key}'. " f"Allowed values are: {allowed_str}.",
+                )
 def _check_metadata_campaign_name(metadata, expected_name):
     """Check metadata ``campaign_name``."""

disdrodb/metadata/manipulation.py CHANGED Viewed

@@ -41,8 +41,8 @@ def add_missing_metadata_keys(metadata):
 def sort_metadata_dictionary(metadata):
     """Sort the keys of the metadata dictionary by ``valid_metadata_keys`` list order."""
-    from disdrodb.metadata.standards import get_valid_metadata_keys
+    from disdrodb.metadata.standards import METADATA_KEYS
-    list_metadata_keys = get_valid_metadata_keys()
+    list_metadata_keys = METADATA_KEYS
     metadata = {k: metadata[k] for k in list_metadata_keys}
     return metadata

disdrodb 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

disdrodb 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl